1/* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "ssa.h"
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
35#include "dumpfile.h"
36#include "alias.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "tree-eh.h"
40#include "gimplify.h"
41#include "gimple-iterator.h"
42#include "gimplify-me.h"
43#include "tree-cfg.h"
44#include "tree-ssa-loop-manip.h"
45#include "cfgloop.h"
46#include "explow.h"
47#include "tree-ssa-loop.h"
48#include "tree-scalar-evolution.h"
49#include "tree-vectorizer.h"
50#include "builtins.h"
51#include "internal-fn.h"
52#include "tree-vector-builder.h"
53#include "vec-perm-indices.h"
54#include "gimple-range.h"
55#include "tree-ssa-loop-niter.h"
56#include "gimple-fold.h"
57#include "regs.h"
58#include "attribs.h"
59#include "optabs-libfuncs.h"
60
61/* For lang_hooks.types.type_for_mode. */
62#include "langhooks.h"
63
64/* Return the vectorized type for the given statement. */
65
66tree
67stmt_vectype (class _stmt_vec_info *stmt_info)
68{
69 return STMT_VINFO_VECTYPE (stmt_info);
70}
71
72/* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74bool
75stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
76{
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (g: stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
80 class loop* loop;
81
82 if (!loop_vinfo)
83 return false;
84
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87 return (bb->loop_father == loop->inner);
88}
89
90/* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
93
94static unsigned
95record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
100{
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
107
108 stmt_info_for_cost si
109 = { .count: count, .kind: kind, .where: where, .stmt_info: stmt_info, .node: node, .vectype: vectype, .misalign: misalign };
110 body_cost_vec->safe_push (obj: si);
111
112 return (unsigned)
113 (builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count);
114}
115
116unsigned
117record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
121{
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
124}
125
126unsigned
127record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
131{
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
134}
135
136unsigned
137record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
140{
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, misalign: 0, where);
145}
146
147/* Return a variable of type ELEM_TYPE[NELEMS]. */
148
149static tree
150create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151{
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
154}
155
156/* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
160
161static tree
162read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165{
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
168
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
175
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (var: vect, stmt: new_stmt);
178 gimple_assign_set_lhs (gs: new_stmt, lhs: vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181 return vect_name;
182}
183
184/* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
187
188static void
189write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
192{
193 tree array_ref;
194 gimple *new_stmt;
195
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
199
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202}
203
204/* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
207
208static tree
209create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210{
211 tree mem_ref;
212
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
217}
218
219/* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
221
222static void
223vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
225{
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229}
230
231/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
232
233/* Function vect_mark_relevant.
234
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236
237static void
238vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
240{
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
248
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254 {
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
259
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
264
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271 if (live_p && relevant == vect_unused_in_scope)
272 {
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
278 }
279
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
284 }
285
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292 {
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
297 }
298
299 worklist->safe_push (obj: stmt_info);
300}
301
302
303/* Function is_simple_and_all_uses_invariant
304
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
306
307bool
308is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
310{
311 tree op;
312 ssa_op_iter iter;
313
314 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
315 if (!stmt)
316 return false;
317
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319 {
320 enum vect_def_type dt = vect_uninitialized_def;
321
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
323 {
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
328 }
329
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
332 }
333 return true;
334}
335
336/* Function vect_stmt_relevant_p.
337
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
340
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
346
347 CHECKME: what other side effects would the vectorizer allow? */
348
349static bool
350vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
351 enum vect_relevant *relevant, bool *live_p)
352{
353 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
354 ssa_op_iter op_iter;
355 imm_use_iterator imm_iter;
356 use_operand_p use_p;
357 def_operand_p def_p;
358
359 *relevant = vect_unused_in_scope;
360 *live_p = false;
361
362 /* cond stmt other than loop exit cond. */
363 gimple *stmt = STMT_VINFO_STMT (stmt_info);
364 if (is_ctrl_stmt (stmt)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
366 && (!loop->inner || gimple_bb (g: stmt)->loop_father == loop))
367 *relevant = vect_used_in_scope;
368
369 /* changing memory. */
370 if (gimple_code (g: stmt_info->stmt) != GIMPLE_PHI)
371 if (gimple_vdef (g: stmt_info->stmt)
372 && !gimple_clobber_p (s: stmt_info->stmt))
373 {
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE, vect_location,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant = vect_used_in_scope;
378 }
379
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
382 {
383 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
384 {
385 basic_block bb = gimple_bb (USE_STMT (use_p));
386 if (!flow_bb_inside_loop_p (loop, bb))
387 {
388 if (is_gimple_debug (USE_STMT (use_p)))
389 continue;
390
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE, vect_location,
393 "vec_stmt_relevant_p: used out of loop.\n");
394
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
398
399 *live_p = true;
400 }
401 }
402 }
403
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
407 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
408 {
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE, vect_location,
411 "vec_stmt_relevant_p: induction forced for "
412 "early break.\n");
413 *live_p = true;
414
415 }
416
417 if (*live_p && *relevant == vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
419 {
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE, vect_location,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant = vect_used_only_live;
424 }
425
426 return (*live_p || *relevant);
427}
428
429
430/* Function exist_non_indexing_operands_for_use_p
431
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
434
435static bool
436exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
437{
438 tree operand;
439
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info))
444 return true;
445
446 /* STMT has a data_ref. FORNOW this means that its of one of
447 the following forms:
448 -1- ARRAY_REF = var
449 -2- var = ARRAY_REF
450 (This should have been verified in analyze_data_refs).
451
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
454 for array indexing.
455
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
458
459 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
460 if (!assign || !gimple_assign_copy_p (assign))
461 {
462 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
463 if (call && gimple_call_internal_p (gs: call))
464 {
465 internal_fn ifn = gimple_call_internal_fn (gs: call);
466 int mask_index = internal_fn_mask_index (ifn);
467 if (mask_index >= 0
468 && use == gimple_call_arg (gs: call, index: mask_index))
469 return true;
470 int stored_value_index = internal_fn_stored_value_index (ifn);
471 if (stored_value_index >= 0
472 && use == gimple_call_arg (gs: call, index: stored_value_index))
473 return true;
474 if (internal_gather_scatter_fn_p (ifn)
475 && use == gimple_call_arg (gs: call, index: 1))
476 return true;
477 }
478 return false;
479 }
480
481 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
482 return false;
483 operand = gimple_assign_rhs1 (gs: assign);
484 if (TREE_CODE (operand) != SSA_NAME)
485 return false;
486
487 if (operand == use)
488 return true;
489
490 return false;
491}
492
493
494/*
495 Function process_use.
496
497 Inputs:
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
503 be performed.
504
505 Outputs:
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
510 Exceptions:
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
518
519 Return true if everything is as expected. Return false otherwise. */
520
521static opt_result
522process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
523 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
524 bool force)
525{
526 stmt_vec_info dstmt_vinfo;
527 enum vect_def_type dt;
528
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_info: stmt_vinfo))
532 return opt_result::success ();
533
534 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
535 return opt_result::failure_at (loc: stmt_vinfo->stmt,
536 fmt: "not vectorized:"
537 " unsupported use in stmt.\n");
538
539 if (!dstmt_vinfo)
540 return opt_result::success ();
541
542 basic_block def_bb = gimple_bb (g: dstmt_vinfo->stmt);
543 basic_block bb = gimple_bb (g: stmt_vinfo->stmt);
544
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
550 && gimple_code (g: dstmt_vinfo->stmt) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
552 && bb->loop_father == def_bb->loop_father)
553 {
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: true);
558 return opt_result::success ();
559 }
560
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
563 d = dstmt_vinfo
564 inner-loop:
565 stmt # use (d)
566 outer-loop-tail-bb:
567 ... */
568 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
569 {
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE, vect_location,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
573
574 switch (relevant)
575 {
576 case vect_unused_in_scope:
577 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
578 vect_used_in_scope : vect_unused_in_scope;
579 break;
580
581 case vect_used_in_outer_by_reduction:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
583 relevant = vect_used_by_reduction;
584 break;
585
586 case vect_used_in_outer:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
588 relevant = vect_used_in_scope;
589 break;
590
591 case vect_used_in_scope:
592 break;
593
594 default:
595 gcc_unreachable ();
596 }
597 }
598
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
601 ...
602 inner-loop:
603 d = dstmt_vinfo
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
605 stmt # use (d) */
606 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
607 {
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE, vect_location,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
611
612 switch (relevant)
613 {
614 case vect_unused_in_scope:
615 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
617 vect_used_in_outer_by_reduction : vect_unused_in_scope;
618 break;
619
620 case vect_used_by_reduction:
621 case vect_used_only_live:
622 relevant = vect_used_in_outer_by_reduction;
623 break;
624
625 case vect_used_in_scope:
626 relevant = vect_used_in_outer;
627 break;
628
629 default:
630 gcc_unreachable ();
631 }
632 }
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
636 of course. */
637 else if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
641 loop_latch_edge (bb->loop_father))
642 == use))
643 {
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE, vect_location,
646 "induction value on backedge.\n");
647 return opt_result::success ();
648 }
649
650
651 vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: false);
652 return opt_result::success ();
653}
654
655
656/* Function vect_mark_stmts_to_be_vectorized.
657
658 Not all stmts in the loop need to be vectorized. For example:
659
660 for i...
661 for j...
662 1. T0 = i + j
663 2. T1 = a[T0]
664
665 3. j = j + 1
666
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
669
670 This pass detects such stmts. */
671
672opt_result
673vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
674{
675 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
676 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
677 unsigned int nbbs = loop->num_nodes;
678 gimple_stmt_iterator si;
679 unsigned int i;
680 basic_block bb;
681 bool live_p;
682 enum vect_relevant relevant;
683
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
685
686 auto_vec<stmt_vec_info, 64> worklist;
687
688 /* 1. Init worklist. */
689 for (i = 0; i < nbbs; i++)
690 {
691 bb = bbs[i];
692 for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
693 {
694 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
697 phi_info->stmt);
698
699 if (vect_stmt_relevant_p (stmt_info: phi_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
700 vect_mark_relevant (worklist: &worklist, stmt_info: phi_info, relevant, live_p);
701 }
702 for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
703 {
704 if (is_gimple_debug (gs: gsi_stmt (i: si)))
705 continue;
706 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "init: stmt relevant? %G", stmt_info->stmt);
710
711 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
712 vect_mark_relevant (worklist: &worklist, stmt_info, relevant, live_p);
713 }
714 }
715
716 /* 2. Process_worklist */
717 while (worklist.length () > 0)
718 {
719 use_operand_p use_p;
720 ssa_op_iter iter;
721
722 stmt_vec_info stmt_vinfo = worklist.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "worklist: examine stmt: %G", stmt_vinfo->stmt);
726
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
729 of STMT. */
730 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
731
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
734
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
742
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
744 {
745 case vect_reduction_def:
746 gcc_assert (relevant != vect_unused_in_scope);
747 if (relevant != vect_unused_in_scope
748 && relevant != vect_used_in_scope
749 && relevant != vect_used_by_reduction
750 && relevant != vect_used_only_live)
751 return opt_result::failure_at
752 (loc: stmt_vinfo->stmt, fmt: "unsupported use of reduction.\n");
753 break;
754
755 case vect_nested_cycle:
756 if (relevant != vect_unused_in_scope
757 && relevant != vect_used_in_outer_by_reduction
758 && relevant != vect_used_in_outer)
759 return opt_result::failure_at
760 (loc: stmt_vinfo->stmt, fmt: "unsupported use of nested cycle.\n");
761 break;
762
763 case vect_double_reduction_def:
764 if (relevant != vect_unused_in_scope
765 && relevant != vect_used_by_reduction
766 && relevant != vect_used_only_live)
767 return opt_result::failure_at
768 (loc: stmt_vinfo->stmt, fmt: "unsupported use of double reduction.\n");
769 break;
770
771 default:
772 break;
773 }
774
775 if (is_pattern_stmt_p (stmt_info: stmt_vinfo))
776 {
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign *assign = dyn_cast <gassign *> (p: stmt_vinfo->stmt))
781 {
782 enum tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
783 tree op = gimple_assign_rhs1 (gs: assign);
784
785 i = 1;
786 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
787 {
788 opt_result res
789 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
790 loop_vinfo, relevant, worklist: &worklist, force: false);
791 if (!res)
792 return res;
793 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
794 loop_vinfo, relevant, worklist: &worklist, force: false);
795 if (!res)
796 return res;
797 i = 2;
798 }
799 for (; i < gimple_num_ops (gs: assign); i++)
800 {
801 op = gimple_op (gs: assign, i);
802 if (TREE_CODE (op) == SSA_NAME)
803 {
804 opt_result res
805 = process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
806 worklist: &worklist, force: false);
807 if (!res)
808 return res;
809 }
810 }
811 }
812 else if (gcond *cond = dyn_cast <gcond *> (p: stmt_vinfo->stmt))
813 {
814 tree_code rhs_code = gimple_cond_code (gs: cond);
815 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
816 opt_result res
817 = process_use (stmt_vinfo, use: gimple_cond_lhs (gs: cond),
818 loop_vinfo, relevant, worklist: &worklist, force: false);
819 if (!res)
820 return res;
821 res = process_use (stmt_vinfo, use: gimple_cond_rhs (gs: cond),
822 loop_vinfo, relevant, worklist: &worklist, force: false);
823 if (!res)
824 return res;
825 }
826 else if (gcall *call = dyn_cast <gcall *> (p: stmt_vinfo->stmt))
827 {
828 for (i = 0; i < gimple_call_num_args (gs: call); i++)
829 {
830 tree arg = gimple_call_arg (gs: call, index: i);
831 opt_result res
832 = process_use (stmt_vinfo, use: arg, loop_vinfo, relevant,
833 worklist: &worklist, force: false);
834 if (!res)
835 return res;
836 }
837 }
838 else
839 gcc_unreachable ();
840 }
841 else
842 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
843 {
844 tree op = USE_FROM_PTR (use_p);
845 opt_result res
846 = process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
847 worklist: &worklist, force: false);
848 if (!res)
849 return res;
850 }
851
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
853 {
854 gather_scatter_info gs_info;
855 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
856 gcc_unreachable ();
857 opt_result res
858 = process_use (stmt_vinfo, use: gs_info.offset, loop_vinfo, relevant,
859 worklist: &worklist, force: true);
860 if (!res)
861 {
862 if (fatal)
863 *fatal = false;
864 return res;
865 }
866 }
867 } /* while worklist */
868
869 return opt_result::success ();
870}
871
872/* Function vect_model_simple_cost.
873
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
877
878static void
879vect_model_simple_cost (vec_info *,
880 stmt_vec_info stmt_info, int ncopies,
881 enum vect_def_type *dt,
882 int ndts,
883 slp_tree node,
884 stmt_vector_for_cost *cost_vec,
885 vect_cost_for_stmt kind = vector_stmt)
886{
887 int inside_cost = 0, prologue_cost = 0;
888
889 gcc_assert (cost_vec != NULL);
890
891 /* ??? Somehow we need to fix this at the callers. */
892 if (node)
893 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
894
895 if (!node)
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
898 cost model. */
899 for (int i = 0; i < ndts; i++)
900 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
901 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
902 stmt_info, misalign: 0, where: vect_prologue);
903
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind,
906 stmt_info, misalign: 0, where: vect_body);
907
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE, vect_location,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost, prologue_cost);
912}
913
914
915/* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
922
923static void
924vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
925 enum vect_def_type *dt,
926 unsigned int ncopies, int pwr,
927 stmt_vector_for_cost *cost_vec,
928 bool widen_arith)
929{
930 int i;
931 int inside_cost = 0, prologue_cost = 0;
932
933 for (i = 0; i < pwr + 1; i++)
934 {
935 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies,
936 kind: widen_arith
937 ? vector_stmt : vec_promote_demote,
938 stmt_info, misalign: 0, where: vect_body);
939 ncopies *= 2;
940 }
941
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i = 0; i < 2; i++)
944 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
945 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vector_stmt,
946 stmt_info, misalign: 0, where: vect_prologue);
947
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE, vect_location,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost, prologue_cost);
952}
953
954/* Returns true if the current function returns DECL. */
955
956static bool
957cfun_returns (tree decl)
958{
959 edge_iterator ei;
960 edge e;
961 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
962 {
963 greturn *ret = safe_dyn_cast <greturn *> (p: *gsi_last_bb (bb: e->src));
964 if (!ret)
965 continue;
966 if (gimple_return_retval (gs: ret) == decl)
967 return true;
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
970 though. */
971 gimple *def = ret;
972 do
973 {
974 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
975 }
976 while (gimple_clobber_p (s: def));
977 if (is_a <gassign *> (p: def)
978 && gimple_assign_lhs (gs: def) == gimple_return_retval (gs: ret)
979 && gimple_assign_rhs1 (gs: def) == decl)
980 return true;
981 }
982 return false;
983}
984
985/* Calculate cost of DR's memory access. */
986void
987vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
988 dr_alignment_support alignment_support_scheme,
989 int misalignment,
990 unsigned int *inside_cost,
991 stmt_vector_for_cost *body_cost_vec)
992{
993 switch (alignment_support_scheme)
994 {
995 case dr_aligned:
996 {
997 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
998 kind: vector_store, stmt_info, misalign: 0,
999 where: vect_body);
1000
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: aligned.\n");
1004 break;
1005 }
1006
1007 case dr_unaligned_supported:
1008 {
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1011 kind: unaligned_store, stmt_info,
1012 misalign: misalignment, where: vect_body);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE, vect_location,
1015 "vect_model_store_cost: unaligned supported by "
1016 "hardware.\n");
1017 break;
1018 }
1019
1020 case dr_unaligned_unsupported:
1021 {
1022 *inside_cost = VECT_MAX_COST;
1023
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1026 "vect_model_store_cost: unsupported access.\n");
1027 break;
1028 }
1029
1030 default:
1031 gcc_unreachable ();
1032 }
1033}
1034
1035/* Calculate cost of DR's memory access. */
1036void
1037vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1038 dr_alignment_support alignment_support_scheme,
1039 int misalignment,
1040 bool add_realign_cost, unsigned int *inside_cost,
1041 unsigned int *prologue_cost,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec,
1044 bool record_prologue_costs)
1045{
1046 switch (alignment_support_scheme)
1047 {
1048 case dr_aligned:
1049 {
1050 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1051 stmt_info, misalign: 0, where: vect_body);
1052
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_load_cost: aligned.\n");
1056
1057 break;
1058 }
1059 case dr_unaligned_supported:
1060 {
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1063 kind: unaligned_load, stmt_info,
1064 misalign: misalignment, where: vect_body);
1065
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE, vect_location,
1068 "vect_model_load_cost: unaligned supported by "
1069 "hardware.\n");
1070
1071 break;
1072 }
1073 case dr_explicit_realign:
1074 {
1075 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies * 2,
1076 kind: vector_load, stmt_info, misalign: 0, where: vect_body);
1077 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1078 kind: vec_perm, stmt_info, misalign: 0, where: vect_body);
1079
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1082 prologue costs. */
1083 if (targetm.vectorize.builtin_mask_for_load)
1084 *inside_cost += record_stmt_cost (body_cost_vec, count: 1, kind: vector_stmt,
1085 stmt_info, misalign: 0, where: vect_body);
1086
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: explicit realign\n");
1090
1091 break;
1092 }
1093 case dr_explicit_realign_optimized:
1094 {
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: unaligned software "
1098 "pipelined.\n");
1099
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1106
1107 if (add_realign_cost && record_prologue_costs)
1108 {
1109 *prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: 2,
1110 kind: vector_stmt, stmt_info,
1111 misalign: 0, where: vect_prologue);
1112 if (targetm.vectorize.builtin_mask_for_load)
1113 *prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: 1,
1114 kind: vector_stmt, stmt_info,
1115 misalign: 0, where: vect_prologue);
1116 }
1117
1118 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1119 stmt_info, misalign: 0, where: vect_body);
1120 *inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vec_perm,
1121 stmt_info, misalign: 0, where: vect_body);
1122
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: explicit realign optimized"
1126 "\n");
1127
1128 break;
1129 }
1130
1131 case dr_unaligned_unsupported:
1132 {
1133 *inside_cost = VECT_MAX_COST;
1134
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137 "vect_model_load_cost: unsupported access.\n");
1138 break;
1139 }
1140
1141 default:
1142 gcc_unreachable ();
1143 }
1144}
1145
1146/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1148
1149static void
1150vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1151 gimple_stmt_iterator *gsi)
1152{
1153 if (gsi)
1154 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1155 else
1156 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1157
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "created new init_stmt: %G", new_stmt);
1161}
1162
1163/* Function vect_init_vector.
1164
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1172
1173tree
1174vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1175 gimple_stmt_iterator *gsi)
1176{
1177 gimple *init_stmt;
1178 tree new_temp;
1179
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1182 {
1183 gcc_assert (VECTOR_TYPE_P (type));
1184 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1185 {
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type))
1189 {
1190 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1191 tree false_val = build_zero_cst (TREE_TYPE (type));
1192
1193 if (CONSTANT_CLASS_P (val))
1194 val = integer_zerop (val) ? false_val : true_val;
1195 else
1196 {
1197 new_temp = make_ssa_name (TREE_TYPE (type));
1198 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1199 val, true_val, false_val);
1200 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1201 val = new_temp;
1202 }
1203 }
1204 else
1205 {
1206 gimple_seq stmts = NULL;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1208 val = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR,
1209 TREE_TYPE (type), ops: val);
1210 else
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val = gimple_convert (seq: &stmts, TREE_TYPE (type), op: val);
1214 for (gimple_stmt_iterator gsi2 = gsi_start (seq&: stmts);
1215 !gsi_end_p (i: gsi2); )
1216 {
1217 init_stmt = gsi_stmt (i: gsi2);
1218 gsi_remove (&gsi2, false);
1219 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1220 }
1221 }
1222 }
1223 val = build_vector_from_val (type, val);
1224 }
1225
1226 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1227 init_stmt = gimple_build_assign (new_temp, val);
1228 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1229 return new_temp;
1230}
1231
1232
1233/* Function vect_get_vec_defs_for_operand.
1234
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1237
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1240
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1244
1245void
1246vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1247 unsigned ncopies,
1248 tree op, vec<tree> *vec_oprnds, tree vectype)
1249{
1250 gimple *def_stmt;
1251 enum vect_def_type dt;
1252 bool is_simple_use;
1253 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1254
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_get_vec_defs_for_operand: %T\n", op);
1258
1259 stmt_vec_info def_stmt_info;
1260 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1261 &def_stmt_info, &def_stmt);
1262 gcc_assert (is_simple_use);
1263 if (def_stmt && dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1265
1266 vec_oprnds->create (nelems: ncopies);
1267 if (dt == vect_constant_def || dt == vect_external_def)
1268 {
1269 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1270 tree vector_type;
1271
1272 if (vectype)
1273 vector_type = vectype;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1276 vector_type = truth_type_for (stmt_vectype);
1277 else
1278 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1279
1280 gcc_assert (vector_type);
1281 tree vop = vect_init_vector (vinfo, stmt_info: stmt_vinfo, val: op, type: vector_type, NULL);
1282 while (ncopies--)
1283 vec_oprnds->quick_push (obj: vop);
1284 }
1285 else
1286 {
1287 def_stmt_info = vect_stmt_to_vectorize (stmt_info: def_stmt_info);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1289 for (unsigned i = 0; i < ncopies; ++i)
1290 vec_oprnds->quick_push (obj: gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1292 }
1293}
1294
1295
1296/* Get vectorized definitions for OP0 and OP1. */
1297
1298void
1299vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1300 unsigned ncopies,
1301 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1302 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1303 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1304 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1305{
1306 if (slp_node)
1307 {
1308 if (op0)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1310 if (op1)
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1312 if (op2)
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1314 if (op3)
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1316 }
1317 else
1318 {
1319 if (op0)
1320 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1321 op: op0, vec_oprnds: vec_oprnds0, vectype: vectype0);
1322 if (op1)
1323 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1324 op: op1, vec_oprnds: vec_oprnds1, vectype: vectype1);
1325 if (op2)
1326 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1327 op: op2, vec_oprnds: vec_oprnds2, vectype: vectype2);
1328 if (op3)
1329 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1330 op: op3, vec_oprnds: vec_oprnds3, vectype: vectype3);
1331 }
1332}
1333
1334void
1335vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1336 unsigned ncopies,
1337 tree op0, vec<tree> *vec_oprnds0,
1338 tree op1, vec<tree> *vec_oprnds1,
1339 tree op2, vec<tree> *vec_oprnds2,
1340 tree op3, vec<tree> *vec_oprnds3)
1341{
1342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1343 op0, NULL_TREE, vec_oprnds0,
1344 op1, NULL_TREE, vec_oprnds1,
1345 op2, NULL_TREE, vec_oprnds2,
1346 op3, NULL_TREE, vec_oprnds3);
1347}
1348
1349/* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1352
1353static void
1354vect_finish_stmt_generation_1 (vec_info *,
1355 stmt_vec_info stmt_info, gimple *vec_stmt)
1356{
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1359
1360 if (stmt_info)
1361 {
1362 gimple_set_location (g: vec_stmt, location: gimple_location (g: stmt_info->stmt));
1363
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1368 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1369 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1370 }
1371 else
1372 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1373}
1374
1375/* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1378
1379void
1380vect_finish_replace_stmt (vec_info *vinfo,
1381 stmt_vec_info stmt_info, gimple *vec_stmt)
1382{
1383 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1384 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1385
1386 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1387 gsi_replace (&gsi, vec_stmt, true);
1388
1389 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1390}
1391
1392/* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1394
1395void
1396vect_finish_stmt_generation (vec_info *vinfo,
1397 stmt_vec_info stmt_info, gimple *vec_stmt,
1398 gimple_stmt_iterator *gsi)
1399{
1400 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1401
1402 if (!gsi_end_p (i: *gsi)
1403 && gimple_has_mem_ops (g: vec_stmt))
1404 {
1405 gimple *at_stmt = gsi_stmt (i: *gsi);
1406 tree vuse = gimple_vuse (g: at_stmt);
1407 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1408 {
1409 tree vdef = gimple_vdef (g: at_stmt);
1410 gimple_set_vuse (g: vec_stmt, vuse: gimple_vuse (g: at_stmt));
1411 gimple_set_modified (s: vec_stmt, modifiedp: true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1417 && ((is_gimple_assign (gs: vec_stmt)
1418 && !is_gimple_reg (gimple_assign_lhs (gs: vec_stmt)))
1419 || (is_gimple_call (gs: vec_stmt)
1420 && (!(gimple_call_flags (vec_stmt)
1421 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1422 || (gimple_call_lhs (gs: vec_stmt)
1423 && !is_gimple_reg (gimple_call_lhs (gs: vec_stmt)))))))
1424 {
1425 tree new_vdef = copy_ssa_name (var: vuse, stmt: vec_stmt);
1426 gimple_set_vdef (g: vec_stmt, vdef: new_vdef);
1427 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1428 }
1429 }
1430 }
1431 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1432 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1433}
1434
1435/* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1439
1440static internal_fn
1441vectorizable_internal_function (combined_fn cfn, tree fndecl,
1442 tree vectype_out, tree vectype_in)
1443{
1444 internal_fn ifn;
1445 if (internal_fn_p (code: cfn))
1446 ifn = as_internal_fn (code: cfn);
1447 else
1448 ifn = associated_internal_fn (fndecl);
1449 if (ifn != IFN_LAST && direct_internal_fn_p (fn: ifn))
1450 {
1451 const direct_internal_fn_info &info = direct_internal_fn (fn: ifn);
1452 if (info.vectorizable)
1453 {
1454 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1455 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1456 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1457
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1463 return IFN_LAST;
1464
1465 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1466 OPTIMIZE_FOR_SPEED))
1467 return ifn;
1468 }
1469 }
1470 return IFN_LAST;
1471}
1472
1473
1474static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1475 gimple_stmt_iterator *);
1476
1477/* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1481
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1490
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1493 types. */
1494
1495static void
1496check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1497 slp_tree slp_node,
1498 vec_load_store_type vls_type,
1499 int group_size,
1500 vect_memory_access_type
1501 memory_access_type,
1502 gather_scatter_info *gs_info,
1503 tree scalar_mask)
1504{
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type == VMAT_INVARIANT)
1507 return;
1508
1509 unsigned int nvectors;
1510 if (slp_node)
1511 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1512 else
1513 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1514
1515 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1516 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1517 machine_mode vecmode = TYPE_MODE (vectype);
1518 bool is_load = (vls_type == VLS_LOAD);
1519 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1520 {
1521 internal_fn ifn
1522 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1523 : vect_store_lanes_supported (vectype, group_size, true));
1524 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1525 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1526 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1530 {
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537 }
1538 return;
1539 }
1540
1541 if (memory_access_type == VMAT_GATHER_SCATTER)
1542 {
1543 internal_fn ifn = (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE);
1546 internal_fn len_ifn = (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1550 gs_info->memory_type,
1551 gs_info->offset_vectype,
1552 gs_info->scale))
1553 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1555 gs_info->memory_type,
1556 gs_info->offset_vectype,
1557 gs_info->scale))
1558 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1559 scalar_mask);
1560 else
1561 {
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1568 }
1569 return;
1570 }
1571
1572 if (memory_access_type != VMAT_CONTIGUOUS
1573 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1574 {
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582 return;
1583 }
1584
1585 if (!VECTOR_MODE_P (vecmode))
1586 {
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1592 return;
1593 }
1594
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1599 {
1600 unsigned int nvectors;
1601 if (can_div_away_from_zero_p (a: size, b: nunits, quotient: &nvectors))
1602 return nvectors;
1603 gcc_unreachable ();
1604 };
1605
1606 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1607 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1608 machine_mode mask_mode;
1609 machine_mode vmode;
1610 bool using_partial_vectors_p = false;
1611 if (get_len_load_store_mode (vecmode, is_load).exists (mode: &vmode))
1612 {
1613 nvectors = group_memory_nvectors (group_size * vf, nunits);
1614 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1615 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1616 using_partial_vectors_p = true;
1617 }
1618 else if (targetm.vectorize.get_mask_mode (vecmode).exists (mode: &mask_mode)
1619 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1620 {
1621 nvectors = group_memory_nvectors (group_size * vf, nunits);
1622 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1623 using_partial_vectors_p = true;
1624 }
1625
1626 if (!using_partial_vectors_p)
1627 {
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1634 }
1635}
1636
1637/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1642
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1645
1646static tree
1647prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1648 tree vec_mask, gimple_stmt_iterator *gsi)
1649{
1650 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1651 if (!loop_mask)
1652 return vec_mask;
1653
1654 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1655
1656 if (loop_vinfo->vec_cond_masked_set.contains (k: { vec_mask, loop_mask }))
1657 return vec_mask;
1658
1659 tree and_res = make_temp_ssa_name (type: mask_type, NULL, name: "vec_mask_and");
1660 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1661 vec_mask, loop_mask);
1662
1663 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1664 return and_res;
1665}
1666
1667/* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1670
1671 { 0, X, X*2, X*3, ... }
1672
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1674
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1677
1678static bool
1679vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1680 loop_vec_info loop_vinfo, bool masked_p,
1681 gather_scatter_info *gs_info)
1682{
1683 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1684 data_reference *dr = dr_info->dr;
1685 tree step = DR_STEP (dr);
1686 if (TREE_CODE (step) != INTEGER_CST)
1687 {
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE, vect_location,
1691 "cannot truncate variable step.\n");
1692 return false;
1693 }
1694
1695 /* Get the number of bits in an element. */
1696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1698 unsigned int element_bits = GET_MODE_BITSIZE (mode: element_mode);
1699
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1703
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1706 widest_int max_iters;
1707 if (max_loop_iterations (loop, &max_iters)
1708 && max_iters < count)
1709 count = max_iters.to_shwi ();
1710
1711 /* Try scales of 1 and the element size. */
1712 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1713 wi::overflow_type overflow = wi::OVF_NONE;
1714 for (int i = 0; i < 2; ++i)
1715 {
1716 int scale = scales[i];
1717 widest_int factor;
1718 if (!wi::multiple_of_p (x: wi::to_widest (t: step), y: scale, sgn: SIGNED, res: &factor))
1719 continue;
1720
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range = wi::mul (x: count, y: factor, sgn: SIGNED, overflow: &overflow);
1723 if (overflow)
1724 continue;
1725 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1726 unsigned int min_offset_bits = wi::min_precision (x: range, sgn: sign);
1727
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits = 1U << ceil_log2 (x: min_offset_bits);
1730 tree offset_type = build_nonstandard_integer_type (offset_bits,
1731 sign == UNSIGNED);
1732
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type = TREE_TYPE (DR_REF (dr));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1737 vectype, memory_type, offset_type, scale,
1738 &gs_info->ifn, &gs_info->offset_vectype)
1739 || gs_info->ifn == IFN_LAST)
1740 continue;
1741
1742 gs_info->decl = NULL_TREE;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info->base = NULL_TREE;
1746 gs_info->element_type = TREE_TYPE (vectype);
1747 gs_info->offset = fold_convert (offset_type, step);
1748 gs_info->offset_dt = vect_constant_def;
1749 gs_info->scale = scale;
1750 gs_info->memory_type = memory_type;
1751 return true;
1752 }
1753
1754 if (overflow && dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits);
1758
1759 return false;
1760}
1761
1762/* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1766 operation. */
1767
1768static bool
1769vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1770 loop_vec_info loop_vinfo, bool masked_p,
1771 gather_scatter_info *gs_info)
1772{
1773 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1774 || gs_info->ifn == IFN_LAST)
1775 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1776 masked_p, gs_info);
1777
1778 tree old_offset_type = TREE_TYPE (gs_info->offset);
1779 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1780
1781 gcc_assert (TYPE_PRECISION (new_offset_type)
1782 >= TYPE_PRECISION (old_offset_type));
1783 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1784
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE, vect_location,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info->scale);
1789
1790 return true;
1791}
1792
1793/* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1796
1797static int
1798compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1799{
1800 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1801 return tree_int_cst_compare (t1: vect_dr_behavior (vinfo, dr_info)->step,
1802 size_zero_node);
1803}
1804
1805/* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1807
1808tree
1809perm_mask_for_reverse (tree vectype)
1810{
1811 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1812
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder sel (nunits, 1, 3);
1815 for (int i = 0; i < 3; ++i)
1816 sel.quick_push (obj: nunits - 1 - i);
1817
1818 vec_perm_indices indices (sel, 1, nunits);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1820 indices))
1821 return NULL_TREE;
1822 return vect_gen_perm_mask_checked (vectype, indices);
1823}
1824
1825/* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1829
1830static vect_memory_access_type
1831get_negative_load_store_type (vec_info *vinfo,
1832 stmt_vec_info stmt_info, tree vectype,
1833 vec_load_store_type vls_type,
1834 unsigned int ncopies, poly_int64 *poffset)
1835{
1836 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837 dr_alignment_support alignment_support_scheme;
1838
1839 if (ncopies > 1)
1840 {
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE;
1845 }
1846
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset = ((-TYPE_VECTOR_SUBPARTS (node: vectype) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1851
1852 int misalignment = dr_misalignment (dr_info, vectype, offset: *poffset);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1855 if (alignment_support_scheme != dr_aligned
1856 && alignment_support_scheme != dr_unaligned_supported)
1857 {
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "negative step but alignment required.\n");
1861 *poffset = 0;
1862 return VMAT_ELEMENTWISE;
1863 }
1864
1865 if (vls_type == VLS_STORE_INVARIANT)
1866 {
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE, vect_location,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN;
1872 }
1873
1874 if (!perm_mask_for_reverse (vectype))
1875 {
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "negative step and reversing not supported.\n");
1879 *poffset = 0;
1880 return VMAT_ELEMENTWISE;
1881 }
1882
1883 return VMAT_CONTIGUOUS_REVERSE;
1884}
1885
1886/* STMT_INFO is either a masked or unconditional store. Return the value
1887 being stored. */
1888
1889tree
1890vect_get_store_rhs (stmt_vec_info stmt_info)
1891{
1892 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
1893 {
1894 gcc_assert (gimple_assign_single_p (assign));
1895 return gimple_assign_rhs1 (gs: assign);
1896 }
1897 if (gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt))
1898 {
1899 internal_fn ifn = gimple_call_internal_fn (gs: call);
1900 int index = internal_fn_stored_value_index (ifn);
1901 gcc_assert (index >= 0);
1902 return gimple_call_arg (gs: call, index);
1903 }
1904 gcc_unreachable ();
1905}
1906
1907/* Function VECTOR_VECTOR_COMPOSITION_TYPE
1908
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1915
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1919 - NULL_TREE. */
1920
1921static tree
1922vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1923{
1924 gcc_assert (VECTOR_TYPE_P (vtype));
1925 gcc_assert (known_gt (nelts, 0U));
1926
1927 machine_mode vmode = TYPE_MODE (vtype);
1928 if (!VECTOR_MODE_P (vmode))
1929 return NULL_TREE;
1930
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1934 {
1935 *ptype = TREE_TYPE (vtype);
1936 return vtype;
1937 }
1938
1939 poly_uint64 vbsize = GET_MODE_BITSIZE (mode: vmode);
1940 unsigned int pbsize;
1941 if (constant_multiple_p (a: vbsize, b: nelts, multiple: &pbsize))
1942 {
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1946 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (mode: elmode);
1947 machine_mode rmode;
1948 if (related_vector_mode (vmode, elmode, inelts).exists (mode: &rmode)
1949 && (convert_optab_handler (op: vec_init_optab, to_mode: vmode, from_mode: rmode)
1950 != CODE_FOR_nothing))
1951 {
1952 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1953 return vtype;
1954 }
1955
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (size: pbsize, limit: 0).exists (mode: &elmode)
1959 && related_vector_mode (vmode, elmode, nelts).exists (mode: &rmode)
1960 && (convert_optab_handler (op: vec_init_optab, to_mode: rmode, from_mode: elmode)
1961 != CODE_FOR_nothing))
1962 {
1963 *ptype = build_nonstandard_integer_type (pbsize, 1);
1964 return build_vector_type (*ptype, nelts);
1965 }
1966 }
1967
1968 return NULL_TREE;
1969}
1970
1971/* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1973 or store.
1974
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1979
1980static bool
1981get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1982 tree vectype, slp_tree slp_node,
1983 bool masked_p, vec_load_store_type vls_type,
1984 vect_memory_access_type *memory_access_type,
1985 poly_int64 *poffset,
1986 dr_alignment_support *alignment_support_scheme,
1987 int *misalignment,
1988 gather_scatter_info *gs_info,
1989 internal_fn *lanes_ifn)
1990{
1991 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1992 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1993 stmt_vec_info first_stmt_info;
1994 unsigned int group_size;
1995 unsigned HOST_WIDE_INT gap;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1997 {
1998 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1999 group_size = DR_GROUP_SIZE (first_stmt_info);
2000 gap = DR_GROUP_GAP (first_stmt_info);
2001 }
2002 else
2003 {
2004 first_stmt_info = stmt_info;
2005 group_size = 1;
2006 gap = 0;
2007 }
2008 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2009 bool single_element_p = (stmt_info == first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2011 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2012
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p = false;
2016
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p = (!masked_p
2020 && vls_type == VLS_LOAD
2021 && loop_vinfo
2022 && !loop->inner);
2023
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2027
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2030
2031 if (slp_node)
2032 {
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2036 first_dr_info
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2039 {
2040 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 if (multiple_p (a: nunits, b: group_size))
2044 *memory_access_type = VMAT_STRIDED_SLP;
2045 else
2046 *memory_access_type = VMAT_ELEMENTWISE;
2047 }
2048 else
2049 {
2050 overrun_p = loop_vinfo && gap != 0;
2051 if (overrun_p && vls_type != VLS_LOAD)
2052 {
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054 "Grouped store with gaps requires"
2055 " non-consecutive accesses\n");
2056 return false;
2057 }
2058 /* An overrun is fine if the trailing elements are smaller
2059 than the alignment boundary B. Every vector access will
2060 be a multiple of B and so we are guaranteed to access a
2061 non-gap element in the same B-sized block. */
2062 if (overrun_p
2063 && gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info,
2064 vectype)
2065 / vect_get_scalar_dr_size (dr_info: first_dr_info)))
2066 overrun_p = false;
2067
2068 /* If the gap splits the vector in half and the target
2069 can do half-vector operations avoid the epilogue peeling
2070 by simply loading half of the vector only. Usually
2071 the construction with an upper zero half will be elided. */
2072 dr_alignment_support alss;
2073 int misalign = dr_misalignment (dr_info: first_dr_info, vectype);
2074 tree half_vtype;
2075 if (overrun_p
2076 && !masked_p
2077 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2078 vectype, misalign)))
2079 == dr_aligned
2080 || alss == dr_unaligned_supported)
2081 && known_eq (nunits, (group_size - gap) * 2)
2082 && known_eq (nunits, group_size)
2083 && (vector_vector_composition_type (vtype: vectype, nelts: 2, ptype: &half_vtype)
2084 != NULL_TREE))
2085 overrun_p = false;
2086
2087 if (overrun_p && !can_overrun_p)
2088 {
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "Peeling for outer loop is not supported\n");
2092 return false;
2093 }
2094 int cmp = compare_step_with_zero (vinfo, stmt_info);
2095 if (cmp < 0)
2096 {
2097 if (single_element_p)
2098 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2099 only correct for single element "interleaving" SLP. */
2100 *memory_access_type = get_negative_load_store_type
2101 (vinfo, stmt_info, vectype, vls_type, ncopies: 1, poffset);
2102 else
2103 {
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (a: nunits, b: group_size))
2108 *memory_access_type = VMAT_STRIDED_SLP;
2109 else
2110 *memory_access_type = VMAT_ELEMENTWISE;
2111 }
2112 }
2113 else if (cmp == 0 && loop_vinfo)
2114 {
2115 gcc_assert (vls_type == VLS_LOAD);
2116 *memory_access_type = VMAT_INVARIANT;
2117 /* Invariant accesses perform only component accesses, alignment
2118 is irrelevant for them. */
2119 *alignment_support_scheme = dr_unaligned_supported;
2120 }
2121 else
2122 *memory_access_type = VMAT_CONTIGUOUS;
2123
2124 /* When we have a contiguous access across loop iterations
2125 but the access in the loop doesn't cover the full vector
2126 we can end up with no gap recorded but still excess
2127 elements accessed, see PR103116. Make sure we peel for
2128 gaps if necessary and sufficient and give up if not.
2129
2130 If there is a combination of the access not covering the full
2131 vector and a gap recorded then we may need to peel twice. */
2132 if (loop_vinfo
2133 && *memory_access_type == VMAT_CONTIGUOUS
2134 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2135 && !multiple_p (a: group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2136 b: nunits))
2137 {
2138 unsigned HOST_WIDE_INT cnunits, cvf;
2139 if (!can_overrun_p
2140 || !nunits.is_constant (const_value: &cnunits)
2141 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &cvf)
2142 /* Peeling for gaps assumes that a single scalar iteration
2143 is enough to make sure the last vector iteration doesn't
2144 access excess elements.
2145 ??? Enhancements include peeling multiple iterations
2146 or using masked loads with a static mask. */
2147 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2148 {
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151 "peeling for gaps insufficient for "
2152 "access\n");
2153 return false;
2154 }
2155 overrun_p = true;
2156 }
2157 }
2158 }
2159 else
2160 {
2161 /* We can always handle this case using elementwise accesses,
2162 but see if something more efficient is available. */
2163 *memory_access_type = VMAT_ELEMENTWISE;
2164
2165 /* If there is a gap at the end of the group then these optimizations
2166 would access excess elements in the last iteration. */
2167 bool would_overrun_p = (gap != 0);
2168 /* An overrun is fine if the trailing elements are smaller than the
2169 alignment boundary B. Every vector access will be a multiple of B
2170 and so we are guaranteed to access a non-gap element in the
2171 same B-sized block. */
2172 if (would_overrun_p
2173 && !masked_p
2174 && gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype)
2175 / vect_get_scalar_dr_size (dr_info: first_dr_info)))
2176 would_overrun_p = false;
2177
2178 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2179 && (can_overrun_p || !would_overrun_p)
2180 && compare_step_with_zero (vinfo, stmt_info) > 0)
2181 {
2182 /* First cope with the degenerate case of a single-element
2183 vector. */
2184 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2185 ;
2186
2187 else
2188 {
2189 /* Otherwise try using LOAD/STORE_LANES. */
2190 *lanes_ifn
2191 = vls_type == VLS_LOAD
2192 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2193 : vect_store_lanes_supported (vectype, group_size,
2194 masked_p);
2195 if (*lanes_ifn != IFN_LAST)
2196 {
2197 *memory_access_type = VMAT_LOAD_STORE_LANES;
2198 overrun_p = would_overrun_p;
2199 }
2200
2201 /* If that fails, try using permuting loads. */
2202 else if (vls_type == VLS_LOAD
2203 ? vect_grouped_load_supported (vectype,
2204 single_element_p,
2205 group_size)
2206 : vect_grouped_store_supported (vectype, group_size))
2207 {
2208 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2209 overrun_p = would_overrun_p;
2210 }
2211 }
2212 }
2213
2214 /* As a last resort, trying using a gather load or scatter store.
2215
2216 ??? Although the code can handle all group sizes correctly,
2217 it probably isn't a win to use separate strided accesses based
2218 on nearby locations. Or, even if it's a win over scalar code,
2219 it might not be a win over vectorizing at a lower VF, if that
2220 allows us to use contiguous accesses. */
2221 if (*memory_access_type == VMAT_ELEMENTWISE
2222 && single_element_p
2223 && loop_vinfo
2224 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2225 masked_p, gs_info))
2226 *memory_access_type = VMAT_GATHER_SCATTER;
2227 }
2228
2229 if (*memory_access_type == VMAT_GATHER_SCATTER
2230 || *memory_access_type == VMAT_ELEMENTWISE)
2231 {
2232 *alignment_support_scheme = dr_unaligned_supported;
2233 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2234 }
2235 else
2236 {
2237 *misalignment = dr_misalignment (dr_info: first_dr_info, vectype, offset: *poffset);
2238 *alignment_support_scheme
2239 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2240 *misalignment);
2241 }
2242
2243 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2244 {
2245 /* STMT is the leader of the group. Check the operands of all the
2246 stmts of the group. */
2247 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2248 while (next_stmt_info)
2249 {
2250 tree op = vect_get_store_rhs (stmt_info: next_stmt_info);
2251 enum vect_def_type dt;
2252 if (!vect_is_simple_use (op, vinfo, &dt))
2253 {
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2256 "use not simple.\n");
2257 return false;
2258 }
2259 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2260 }
2261 }
2262
2263 if (overrun_p)
2264 {
2265 gcc_assert (can_overrun_p);
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2268 "Data access with gaps requires scalar "
2269 "epilogue loop\n");
2270 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2271 }
2272
2273 return true;
2274}
2275
2276/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2277 if there is a memory access type that the vectorized form can use,
2278 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2279 or scatters, fill in GS_INFO accordingly. In addition
2280 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281 the target does not support the alignment scheme. *MISALIGNMENT
2282 is set according to the alignment of the access (including
2283 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2284
2285 SLP says whether we're performing SLP rather than loop vectorization.
2286 MASKED_P is true if the statement is conditional on a vectorized mask.
2287 VECTYPE is the vector type that the vectorized statements will use.
2288 NCOPIES is the number of vector statements that will be needed. */
2289
2290static bool
2291get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2292 tree vectype, slp_tree slp_node,
2293 bool masked_p, vec_load_store_type vls_type,
2294 unsigned int ncopies,
2295 vect_memory_access_type *memory_access_type,
2296 poly_int64 *poffset,
2297 dr_alignment_support *alignment_support_scheme,
2298 int *misalignment,
2299 gather_scatter_info *gs_info,
2300 internal_fn *lanes_ifn)
2301{
2302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2304 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2305 *poffset = 0;
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2307 {
2308 *memory_access_type = VMAT_GATHER_SCATTER;
2309 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2310 gcc_unreachable ();
2311 /* When using internal functions, we rely on pattern recognition
2312 to convert the type of the offset to the type that the target
2313 requires, with the result being a call to an internal function.
2314 If that failed for some reason (e.g. because another pattern
2315 took priority), just handle cases in which the offset already
2316 has the right type. */
2317 else if (gs_info->ifn != IFN_LAST
2318 && !is_gimple_call (gs: stmt_info->stmt)
2319 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2320 TREE_TYPE (gs_info->offset_vectype)))
2321 {
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "%s offset requires a conversion\n",
2325 vls_type == VLS_LOAD ? "gather" : "scatter");
2326 return false;
2327 }
2328 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2329 &gs_info->offset_dt,
2330 &gs_info->offset_vectype))
2331 {
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2334 "%s index use not simple.\n",
2335 vls_type == VLS_LOAD ? "gather" : "scatter");
2336 return false;
2337 }
2338 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2339 {
2340 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ()
2341 || !TYPE_VECTOR_SUBPARTS (node: gs_info->offset_vectype).is_constant ()
2342 || !constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
2343 (node: gs_info->offset_vectype),
2344 b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2345 {
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "unsupported vector types for emulated "
2349 "gather.\n");
2350 return false;
2351 }
2352 }
2353 /* Gather-scatter accesses perform only component accesses, alignment
2354 is irrelevant for them. */
2355 *alignment_support_scheme = dr_unaligned_supported;
2356 }
2357 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2358 {
2359 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2360 masked_p,
2361 vls_type, memory_access_type, poffset,
2362 alignment_support_scheme,
2363 misalignment, gs_info, lanes_ifn))
2364 return false;
2365 }
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2367 {
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2377 }
2378 else
2379 {
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp == 0)
2382 {
2383 gcc_assert (vls_type == VLS_LOAD);
2384 *memory_access_type = VMAT_INVARIANT;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme = dr_unaligned_supported;
2388 }
2389 else
2390 {
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2394 else
2395 *memory_access_type = VMAT_CONTIGUOUS;
2396 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2397 vectype, offset: *poffset);
2398 *alignment_support_scheme
2399 = vect_supportable_dr_alignment (vinfo,
2400 STMT_VINFO_DR_INFO (stmt_info),
2401 vectype, *misalignment);
2402 }
2403 }
2404
2405 if ((*memory_access_type == VMAT_ELEMENTWISE
2406 || *memory_access_type == VMAT_STRIDED_SLP)
2407 && !nunits.is_constant ())
2408 {
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "Not using elementwise accesses due to variable "
2412 "vectorization factor.\n");
2413 return false;
2414 }
2415
2416 if (*alignment_support_scheme == dr_unaligned_unsupported)
2417 {
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "unsupported unaligned access\n");
2421 return false;
2422 }
2423
2424 /* FIXME: At the moment the cost model seems to underestimate the
2425 cost of using elementwise accesses. This check preserves the
2426 traditional behavior until that can be fixed. */
2427 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2428 if (!first_stmt_info)
2429 first_stmt_info = stmt_info;
2430 if (*memory_access_type == VMAT_ELEMENTWISE
2431 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2432 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2433 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2434 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2435 {
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "not falling back to elementwise accesses\n");
2439 return false;
2440 }
2441 return true;
2442}
2443
2444/* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2445 conditional operation STMT_INFO. When returning true, store the mask
2446 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2447 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2448 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2449
2450static bool
2451vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2452 slp_tree slp_node, unsigned mask_index,
2453 tree *mask, slp_tree *mask_node,
2454 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2455{
2456 enum vect_def_type mask_dt;
2457 tree mask_vectype;
2458 slp_tree mask_node_1;
2459 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2460 mask, &mask_node_1, &mask_dt, &mask_vectype))
2461 {
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "mask use not simple.\n");
2465 return false;
2466 }
2467
2468 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2469 {
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "mask argument is not a boolean.\n");
2473 return false;
2474 }
2475
2476 /* If the caller is not prepared for adjusting an external/constant
2477 SLP mask vector type fail. */
2478 if (slp_node
2479 && !mask_node
2480 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2481 {
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "SLP mask argument is not vectorized.\n");
2485 return false;
2486 }
2487
2488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 if (!mask_vectype)
2490 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2491 mask_node_1);
2492
2493 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2494 {
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2497 "could not find an appropriate vector mask type.\n");
2498 return false;
2499 }
2500
2501 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_vectype),
2502 b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2503 {
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "vector mask type %T"
2507 " does not match vector data type %T.\n",
2508 mask_vectype, vectype);
2509
2510 return false;
2511 }
2512
2513 *mask_dt_out = mask_dt;
2514 *mask_vectype_out = mask_vectype;
2515 if (mask_node)
2516 *mask_node = mask_node_1;
2517 return true;
2518}
2519
2520/* Return true if stored value is suitable for vectorizing store
2521 statement STMT_INFO. When returning true, store the scalar stored
2522 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2523 the type of the vectorized store value in
2524 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2525
2526static bool
2527vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2528 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2529 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2530 vec_load_store_type *vls_type_out)
2531{
2532 int op_no = 0;
2533 if (gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt))
2534 {
2535 if (gimple_call_internal_p (gs: call)
2536 && internal_store_fn_p (gimple_call_internal_fn (gs: call)))
2537 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (gs: call));
2538 }
2539 if (slp_node)
2540 op_no = vect_slp_child_index_for_operand
2541 (stmt_info->stmt, op: op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2542
2543 enum vect_def_type rhs_dt;
2544 tree rhs_vectype;
2545 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2546 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2547 {
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "use not simple.\n");
2551 return false;
2552 }
2553
2554 /* In the case this is a store from a constant make sure
2555 native_encode_expr can handle it. */
2556 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2557 {
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560 "cannot encode constant as a byte sequence.\n");
2561 return false;
2562 }
2563
2564 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2565 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2566 {
2567 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569 "incompatible vector types.\n");
2570 return false;
2571 }
2572
2573 *rhs_dt_out = rhs_dt;
2574 *rhs_vectype_out = rhs_vectype;
2575 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2576 *vls_type_out = VLS_STORE_INVARIANT;
2577 else
2578 *vls_type_out = VLS_STORE;
2579 return true;
2580}
2581
2582/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2583 Note that we support masks with floating-point type, in which case the
2584 floats are interpreted as a bitmask. */
2585
2586static tree
2587vect_build_all_ones_mask (vec_info *vinfo,
2588 stmt_vec_info stmt_info, tree masktype)
2589{
2590 if (TREE_CODE (masktype) == INTEGER_TYPE)
2591 return build_int_cst (masktype, -1);
2592 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2593 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2594 {
2595 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2596 mask = build_vector_from_val (masktype, mask);
2597 return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2598 }
2599 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2600 {
2601 REAL_VALUE_TYPE r;
2602 long tmp[6];
2603 for (int j = 0; j < 6; ++j)
2604 tmp[j] = -1;
2605 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2606 tree mask = build_real (TREE_TYPE (masktype), r);
2607 mask = build_vector_from_val (masktype, mask);
2608 return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2609 }
2610 gcc_unreachable ();
2611}
2612
2613/* Build an all-zero merge value of type VECTYPE while vectorizing
2614 STMT_INFO as a gather load. */
2615
2616static tree
2617vect_build_zero_merge_argument (vec_info *vinfo,
2618 stmt_vec_info stmt_info, tree vectype)
2619{
2620 tree merge;
2621 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2622 merge = build_int_cst (TREE_TYPE (vectype), 0);
2623 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2624 {
2625 REAL_VALUE_TYPE r;
2626 long tmp[6];
2627 for (int j = 0; j < 6; ++j)
2628 tmp[j] = 0;
2629 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2630 merge = build_real (TREE_TYPE (vectype), r);
2631 }
2632 else
2633 gcc_unreachable ();
2634 merge = build_vector_from_val (vectype, merge);
2635 return vect_init_vector (vinfo, stmt_info, val: merge, type: vectype, NULL);
2636}
2637
2638/* Build a gather load call while vectorizing STMT_INFO. Insert new
2639 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2640 the gather load operation. If the load is conditional, MASK is the
2641 vectorized condition, otherwise MASK is null. PTR is the base
2642 pointer and OFFSET is the vectorized offset. */
2643
2644static gimple *
2645vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2646 gimple_stmt_iterator *gsi,
2647 gather_scatter_info *gs_info,
2648 tree ptr, tree offset, tree mask)
2649{
2650 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2651 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2652 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2653 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2654 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2655 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2656 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2657 tree scaletype = TREE_VALUE (arglist);
2658 tree var;
2659 gcc_checking_assert (types_compatible_p (srctype, rettype)
2660 && (!mask
2661 || TREE_CODE (masktype) == INTEGER_TYPE
2662 || types_compatible_p (srctype, masktype)));
2663
2664 tree op = offset;
2665 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2666 {
2667 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2668 TYPE_VECTOR_SUBPARTS (idxtype)));
2669 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2670 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2671 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2672 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2673 op = var;
2674 }
2675
2676 tree src_op = NULL_TREE;
2677 tree mask_op = NULL_TREE;
2678 if (mask)
2679 {
2680 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2681 {
2682 tree utype, optype = TREE_TYPE (mask);
2683 if (VECTOR_TYPE_P (masktype)
2684 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2685 utype = masktype;
2686 else
2687 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2688 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2689 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2690 gassign *new_stmt
2691 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2692 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2693 mask_arg = var;
2694 if (!useless_type_conversion_p (masktype, utype))
2695 {
2696 gcc_assert (TYPE_PRECISION (utype)
2697 <= TYPE_PRECISION (masktype));
2698 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2699 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2700 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2701 mask_arg = var;
2702 }
2703 src_op = build_zero_cst (srctype);
2704 mask_op = mask_arg;
2705 }
2706 else
2707 {
2708 src_op = mask;
2709 mask_op = mask;
2710 }
2711 }
2712 else
2713 {
2714 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, vectype: rettype);
2715 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2716 }
2717
2718 tree scale = build_int_cst (scaletype, gs_info->scale);
2719 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2720 mask_op, scale);
2721
2722 if (!useless_type_conversion_p (vectype, rettype))
2723 {
2724 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2725 TYPE_VECTOR_SUBPARTS (rettype)));
2726 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2727 gimple_call_set_lhs (gs: new_stmt, lhs: op);
2728 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2729 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2730 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2731 }
2732
2733 return new_stmt;
2734}
2735
2736/* Build a scatter store call while vectorizing STMT_INFO. Insert new
2737 instructions before GSI. GS_INFO describes the scatter store operation.
2738 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2739 vectorized data to store.
2740 If the store is conditional, MASK is the vectorized condition, otherwise
2741 MASK is null. */
2742
2743static gimple *
2744vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2745 gimple_stmt_iterator *gsi,
2746 gather_scatter_info *gs_info,
2747 tree ptr, tree offset, tree oprnd, tree mask)
2748{
2749 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2750 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2751 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2752 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2753 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2754 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2755 tree scaletype = TREE_VALUE (arglist);
2756 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2757 && TREE_CODE (rettype) == VOID_TYPE);
2758
2759 tree mask_arg = NULL_TREE;
2760 if (mask)
2761 {
2762 mask_arg = mask;
2763 tree optype = TREE_TYPE (mask_arg);
2764 tree utype;
2765 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2766 utype = masktype;
2767 else
2768 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2769 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2770 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2773 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2774 mask_arg = var;
2775 if (!useless_type_conversion_p (masktype, utype))
2776 {
2777 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2778 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2779 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2780 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2781 mask_arg = var;
2782 }
2783 }
2784 else
2785 {
2786 mask_arg = build_int_cst (masktype, -1);
2787 mask_arg = vect_init_vector (vinfo, stmt_info, val: mask_arg, type: masktype, NULL);
2788 }
2789
2790 tree src = oprnd;
2791 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2792 {
2793 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2794 TYPE_VECTOR_SUBPARTS (srctype)));
2795 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2796 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2797 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2798 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2799 src = var;
2800 }
2801
2802 tree op = offset;
2803 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2804 {
2805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2806 TYPE_VECTOR_SUBPARTS (idxtype)));
2807 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2808 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2809 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2810 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2811 op = var;
2812 }
2813
2814 tree scale = build_int_cst (scaletype, gs_info->scale);
2815 gcall *new_stmt
2816 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2817 return new_stmt;
2818}
2819
2820/* Prepare the base and offset in GS_INFO for vectorization.
2821 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2822 to the vectorized offset argument for the first copy of STMT_INFO.
2823 STMT_INFO is the statement described by GS_INFO and LOOP is the
2824 containing loop. */
2825
2826static void
2827vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2828 class loop *loop, stmt_vec_info stmt_info,
2829 slp_tree slp_node, gather_scatter_info *gs_info,
2830 tree *dataref_ptr, vec<tree> *vec_offset)
2831{
2832 gimple_seq stmts = NULL;
2833 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2834 if (stmts != NULL)
2835 {
2836 basic_block new_bb;
2837 edge pe = loop_preheader_edge (loop);
2838 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2839 gcc_assert (!new_bb);
2840 }
2841 if (slp_node)
2842 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2843 else
2844 {
2845 unsigned ncopies
2846 = vect_get_num_copies (loop_vinfo, vectype: gs_info->offset_vectype);
2847 vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_vinfo: stmt_info, ncopies,
2848 op: gs_info->offset, vec_oprnds: vec_offset,
2849 vectype: gs_info->offset_vectype);
2850 }
2851}
2852
2853/* Prepare to implement a grouped or strided load or store using
2854 the gather load or scatter store operation described by GS_INFO.
2855 STMT_INFO is the load or store statement.
2856
2857 Set *DATAREF_BUMP to the amount that should be added to the base
2858 address after each copy of the vectorized statement. Set *VEC_OFFSET
2859 to an invariant offset vector in which element I has the value
2860 I * DR_STEP / SCALE. */
2861
2862static void
2863vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2864 loop_vec_info loop_vinfo,
2865 gimple_stmt_iterator *gsi,
2866 gather_scatter_info *gs_info,
2867 tree *dataref_bump, tree *vec_offset,
2868 vec_loop_lens *loop_lens)
2869{
2870 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2871 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2872
2873 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2874 {
2875 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2876 ivtmp_8 = _31 * 16 (step in bytes);
2877 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2878 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2879 tree loop_len
2880 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2881 tree tmp
2882 = fold_build2 (MULT_EXPR, sizetype,
2883 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2884 loop_len);
2885 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2886 GSI_SAME_STMT);
2887 }
2888 else
2889 {
2890 tree bump
2891 = size_binop (MULT_EXPR,
2892 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2893 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2894 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2895 }
2896
2897 /* The offset given in GS_INFO can have pointer type, so use the element
2898 type of the vector instead. */
2899 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2900
2901 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2902 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2903 ssize_int (gs_info->scale));
2904 step = fold_convert (offset_type, step);
2905
2906 /* Create {0, X, X*2, X*3, ...}. */
2907 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2908 build_zero_cst (offset_type), step);
2909 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2910}
2911
2912/* Prepare the pointer IVs which needs to be updated by a variable amount.
2913 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2914 allow each iteration process the flexible number of elements as long as
2915 the number <= vf elments.
2916
2917 Return data reference according to SELECT_VL.
2918 If new statements are needed, insert them before GSI. */
2919
2920static tree
2921vect_get_loop_variant_data_ptr_increment (
2922 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2923 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2924 vect_memory_access_type memory_access_type)
2925{
2926 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2927 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2928
2929 /* gather/scatter never reach here. */
2930 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2931
2932 /* When we support SELECT_VL pattern, we dynamic adjust
2933 the memory address by .SELECT_VL result.
2934
2935 The result of .SELECT_VL is the number of elements to
2936 be processed of each iteration. So the memory address
2937 adjustment operation should be:
2938
2939 addr = addr + .SELECT_VL (ARG..) * step;
2940 */
2941 tree loop_len
2942 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2943 tree len_type = TREE_TYPE (loop_len);
2944 /* Since the outcome of .SELECT_VL is element size, we should adjust
2945 it into bytesize so that it can be used in address pointer variable
2946 amount IVs adjustment. */
2947 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2948 wide_int_to_tree (len_type, wi::to_widest (step)));
2949 tree bump = make_temp_ssa_name (type: len_type, NULL, name: "ivtmp");
2950 gassign *assign = gimple_build_assign (bump, tmp);
2951 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2952 return bump;
2953}
2954
2955/* Return the amount that should be added to a vector pointer to move
2956 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2957 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2958 vectorization. */
2959
2960static tree
2961vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2962 dr_vec_info *dr_info, tree aggr_type,
2963 vect_memory_access_type memory_access_type,
2964 vec_loop_lens *loop_lens = nullptr)
2965{
2966 if (memory_access_type == VMAT_INVARIANT)
2967 return size_zero_node;
2968
2969 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2970 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2971 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2972 loop_lens, dr_info,
2973 memory_access_type);
2974
2975 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2976 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2977 if (tree_int_cst_sgn (step) == -1)
2978 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2979 return iv_step;
2980}
2981
2982/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2983
2984static bool
2985vectorizable_bswap (vec_info *vinfo,
2986 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2987 gimple **vec_stmt, slp_tree slp_node,
2988 slp_tree *slp_op,
2989 tree vectype_in, stmt_vector_for_cost *cost_vec)
2990{
2991 tree op, vectype;
2992 gcall *stmt = as_a <gcall *> (p: stmt_info->stmt);
2993 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2994 unsigned ncopies;
2995
2996 op = gimple_call_arg (gs: stmt, index: 0);
2997 vectype = STMT_VINFO_VECTYPE (stmt_info);
2998 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2999
3000 /* Multiple types in SLP are handled by creating the appropriate number of
3001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3002 case of SLP. */
3003 if (slp_node)
3004 ncopies = 1;
3005 else
3006 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3007
3008 gcc_assert (ncopies >= 1);
3009
3010 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3011 {
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014 "mismatched vector sizes %T and %T\n",
3015 vectype_in, vectype);
3016 return false;
3017 }
3018
3019 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3020 if (! char_vectype)
3021 return false;
3022
3023 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (node: char_vectype);
3024 unsigned word_bytes;
3025 if (!constant_multiple_p (a: num_bytes, b: nunits, multiple: &word_bytes))
3026 return false;
3027
3028 /* The encoding uses one stepped pattern for each byte in the word. */
3029 vec_perm_builder elts (num_bytes, word_bytes, 3);
3030 for (unsigned i = 0; i < 3; ++i)
3031 for (unsigned j = 0; j < word_bytes; ++j)
3032 elts.quick_push (obj: (i + 1) * word_bytes - j - 1);
3033
3034 vec_perm_indices indices (elts, 1, num_bytes);
3035 machine_mode vmode = TYPE_MODE (char_vectype);
3036 if (!can_vec_perm_const_p (vmode, vmode, indices))
3037 return false;
3038
3039 if (! vec_stmt)
3040 {
3041 if (slp_node
3042 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3043 {
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3046 "incompatible vector types for invariants\n");
3047 return false;
3048 }
3049
3050 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3051 DUMP_VECT_SCOPE ("vectorizable_bswap");
3052 record_stmt_cost (body_cost_vec: cost_vec,
3053 count: 1, kind: vector_stmt, stmt_info, misalign: 0, where: vect_prologue);
3054 record_stmt_cost (body_cost_vec: cost_vec,
3055 count: slp_node
3056 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3057 kind: vec_perm, stmt_info, misalign: 0, where: vect_body);
3058 return true;
3059 }
3060
3061 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3062
3063 /* Transform. */
3064 vec<tree> vec_oprnds = vNULL;
3065 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3066 op0: op, vec_oprnds0: &vec_oprnds);
3067 /* Arguments are ready. create the new vector stmt. */
3068 unsigned i;
3069 tree vop;
3070 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3071 {
3072 gimple *new_stmt;
3073 tree tem = make_ssa_name (var: char_vectype);
3074 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3075 char_vectype, vop));
3076 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3077 tree tem2 = make_ssa_name (var: char_vectype);
3078 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3079 tem, tem, bswap_vconst);
3080 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3081 tem = make_ssa_name (var: vectype);
3082 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3083 vectype, tem2));
3084 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3085 if (slp_node)
3086 slp_node->push_vec_def (def: new_stmt);
3087 else
3088 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3089 }
3090
3091 if (!slp_node)
3092 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3093
3094 vec_oprnds.release ();
3095 return true;
3096}
3097
3098/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3099 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3100 in a single step. On success, store the binary pack code in
3101 *CONVERT_CODE. */
3102
3103static bool
3104simple_integer_narrowing (tree vectype_out, tree vectype_in,
3105 code_helper *convert_code)
3106{
3107 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3108 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3109 return false;
3110
3111 code_helper code;
3112 int multi_step_cvt = 0;
3113 auto_vec <tree, 8> interm_types;
3114 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3115 &code, &multi_step_cvt, &interm_types)
3116 || multi_step_cvt)
3117 return false;
3118
3119 *convert_code = code;
3120 return true;
3121}
3122
3123/* Function vectorizable_call.
3124
3125 Check if STMT_INFO performs a function call that can be vectorized.
3126 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3127 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3128 Return true if STMT_INFO is vectorizable in this way. */
3129
3130static bool
3131vectorizable_call (vec_info *vinfo,
3132 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3133 gimple **vec_stmt, slp_tree slp_node,
3134 stmt_vector_for_cost *cost_vec)
3135{
3136 gcall *stmt;
3137 tree vec_dest;
3138 tree scalar_dest;
3139 tree op;
3140 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3141 tree vectype_out, vectype_in;
3142 poly_uint64 nunits_in;
3143 poly_uint64 nunits_out;
3144 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3145 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3146 tree fndecl, new_temp, rhs_type;
3147 enum vect_def_type dt[4]
3148 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3149 vect_unknown_def_type };
3150 tree vectypes[ARRAY_SIZE (dt)] = {};
3151 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3152 int ndts = ARRAY_SIZE (dt);
3153 int ncopies, j;
3154 auto_vec<tree, 8> vargs;
3155 enum { NARROW, NONE, WIDEN } modifier;
3156 size_t i, nargs;
3157 tree lhs;
3158 tree clz_ctz_arg1 = NULL_TREE;
3159
3160 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3161 return false;
3162
3163 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3164 && ! vec_stmt)
3165 return false;
3166
3167 /* Is STMT_INFO a vectorizable call? */
3168 stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3169 if (!stmt)
3170 return false;
3171
3172 if (gimple_call_internal_p (gs: stmt)
3173 && (internal_load_fn_p (gimple_call_internal_fn (gs: stmt))
3174 || internal_store_fn_p (gimple_call_internal_fn (gs: stmt))))
3175 /* Handled by vectorizable_load and vectorizable_store. */
3176 return false;
3177
3178 if (gimple_call_lhs (gs: stmt) == NULL_TREE
3179 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3180 return false;
3181
3182 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3183
3184 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3185
3186 /* Process function arguments. */
3187 rhs_type = NULL_TREE;
3188 vectype_in = NULL_TREE;
3189 nargs = gimple_call_num_args (gs: stmt);
3190
3191 /* Bail out if the function has more than four arguments, we do not have
3192 interesting builtin functions to vectorize with more than two arguments
3193 except for fma. No arguments is also not good. */
3194 if (nargs == 0 || nargs > 4)
3195 return false;
3196
3197 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3198 combined_fn cfn = gimple_call_combined_fn (stmt);
3199 if (cfn == CFN_GOMP_SIMD_LANE)
3200 {
3201 nargs = 0;
3202 rhs_type = unsigned_type_node;
3203 }
3204 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3205 argument just says whether it is well-defined at zero or not and what
3206 value should be returned for it. */
3207 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3208 {
3209 nargs = 1;
3210 clz_ctz_arg1 = gimple_call_arg (gs: stmt, index: 1);
3211 }
3212
3213 int mask_opno = -1;
3214 if (internal_fn_p (code: cfn))
3215 mask_opno = internal_fn_mask_index (as_internal_fn (code: cfn));
3216
3217 for (i = 0; i < nargs; i++)
3218 {
3219 if ((int) i == mask_opno)
3220 {
3221 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index: mask_opno,
3222 mask: &op, mask_node: &slp_op[i], mask_dt_out: &dt[i], mask_vectype_out: &vectypes[i]))
3223 return false;
3224 continue;
3225 }
3226
3227 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3228 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3229 {
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "use not simple.\n");
3233 return false;
3234 }
3235
3236 /* We can only handle calls with arguments of the same type. */
3237 if (rhs_type
3238 && !types_compatible_p (type1: rhs_type, TREE_TYPE (op)))
3239 {
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "argument types differ.\n");
3243 return false;
3244 }
3245 if (!rhs_type)
3246 rhs_type = TREE_TYPE (op);
3247
3248 if (!vectype_in)
3249 vectype_in = vectypes[i];
3250 else if (vectypes[i]
3251 && !types_compatible_p (type1: vectypes[i], type2: vectype_in))
3252 {
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 "argument vector types differ.\n");
3256 return false;
3257 }
3258 }
3259 /* If all arguments are external or constant defs, infer the vector type
3260 from the scalar type. */
3261 if (!vectype_in)
3262 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3263 if (vec_stmt)
3264 gcc_assert (vectype_in);
3265 if (!vectype_in)
3266 {
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "no vectype for scalar type %T\n", rhs_type);
3270
3271 return false;
3272 }
3273
3274 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3275 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3276 {
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "mixed mask and nonmask vector types\n");
3280 return false;
3281 }
3282
3283 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3284 {
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "use emulated vector type for call\n");
3288 return false;
3289 }
3290
3291 /* FORNOW */
3292 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
3293 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
3294 if (known_eq (nunits_in * 2, nunits_out))
3295 modifier = NARROW;
3296 else if (known_eq (nunits_out, nunits_in))
3297 modifier = NONE;
3298 else if (known_eq (nunits_out * 2, nunits_in))
3299 modifier = WIDEN;
3300 else
3301 return false;
3302
3303 /* We only handle functions that do not read or clobber memory. */
3304 if (gimple_vuse (g: stmt))
3305 {
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "function reads from or writes to memory.\n");
3309 return false;
3310 }
3311
3312 /* For now, we only vectorize functions if a target specific builtin
3313 is available. TODO -- in some cases, it might be profitable to
3314 insert the calls for pieces of the vector, in order to be able
3315 to vectorize other operations in the loop. */
3316 fndecl = NULL_TREE;
3317 internal_fn ifn = IFN_LAST;
3318 tree callee = gimple_call_fndecl (gs: stmt);
3319
3320 /* First try using an internal function. */
3321 code_helper convert_code = MAX_TREE_CODES;
3322 if (cfn != CFN_LAST
3323 && (modifier == NONE
3324 || (modifier == NARROW
3325 && simple_integer_narrowing (vectype_out, vectype_in,
3326 convert_code: &convert_code))))
3327 ifn = vectorizable_internal_function (cfn, fndecl: callee, vectype_out,
3328 vectype_in);
3329
3330 /* If that fails, try asking for a target-specific built-in function. */
3331 if (ifn == IFN_LAST)
3332 {
3333 if (cfn != CFN_LAST)
3334 fndecl = targetm.vectorize.builtin_vectorized_function
3335 (cfn, vectype_out, vectype_in);
3336 else if (callee && fndecl_built_in_p (node: callee, klass: BUILT_IN_MD))
3337 fndecl = targetm.vectorize.builtin_md_vectorized_function
3338 (callee, vectype_out, vectype_in);
3339 }
3340
3341 if (ifn == IFN_LAST && !fndecl)
3342 {
3343 if (cfn == CFN_GOMP_SIMD_LANE
3344 && !slp_node
3345 && loop_vinfo
3346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3347 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3349 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3350 {
3351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3352 { 0, 1, 2, ... vf - 1 } vector. */
3353 gcc_assert (nargs == 0);
3354 }
3355 else if (modifier == NONE
3356 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3357 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3358 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3359 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3360 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3361 slp_op, vectype_in, cost_vec);
3362 else
3363 {
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 "function is not vectorizable.\n");
3367 return false;
3368 }
3369 }
3370
3371 if (slp_node)
3372 ncopies = 1;
3373 else if (modifier == NARROW && ifn == IFN_LAST)
3374 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
3375 else
3376 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
3377
3378 /* Sanity check: make sure that at least one copy of the vectorized stmt
3379 needs to be generated. */
3380 gcc_assert (ncopies >= 1);
3381
3382 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3383 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3384 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3385 int len_opno = internal_fn_len_index (cond_len_fn);
3386 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3387 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3388 if (!vec_stmt) /* transformation not required. */
3389 {
3390 if (slp_node)
3391 for (i = 0; i < nargs; ++i)
3392 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3393 vectypes[i]
3394 ? vectypes[i] : vectype_in))
3395 {
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3398 "incompatible vector types for invariants\n");
3399 return false;
3400 }
3401 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3402 DUMP_VECT_SCOPE ("vectorizable_call");
3403 vect_model_simple_cost (vinfo, stmt_info,
3404 ncopies, dt, ndts, node: slp_node, cost_vec);
3405 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3406 record_stmt_cost (body_cost_vec: cost_vec, count: ncopies / 2,
3407 kind: vec_promote_demote, stmt_info, misalign: 0, where: vect_body);
3408
3409 if (loop_vinfo
3410 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3411 && (reduc_idx >= 0 || mask_opno >= 0))
3412 {
3413 if (reduc_idx >= 0
3414 && (cond_fn == IFN_LAST
3415 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3416 OPTIMIZE_FOR_SPEED))
3417 && (cond_len_fn == IFN_LAST
3418 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3419 OPTIMIZE_FOR_SPEED)))
3420 {
3421 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3423 "can't use a fully-masked loop because no"
3424 " conditional operation is available.\n");
3425 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3426 }
3427 else
3428 {
3429 unsigned int nvectors
3430 = (slp_node
3431 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3432 : ncopies);
3433 tree scalar_mask = NULL_TREE;
3434 if (mask_opno >= 0)
3435 scalar_mask = gimple_call_arg (gs: stmt_info->stmt, index: mask_opno);
3436 if (cond_len_fn != IFN_LAST
3437 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3438 OPTIMIZE_FOR_SPEED))
3439 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3440 1);
3441 else
3442 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3443 scalar_mask);
3444 }
3445 }
3446 return true;
3447 }
3448
3449 /* Transform. */
3450
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3453
3454 /* Handle def. */
3455 scalar_dest = gimple_call_lhs (gs: stmt);
3456 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3457
3458 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3459 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3460 unsigned int vect_nargs = nargs;
3461 if (len_loop_p)
3462 {
3463 if (len_opno >= 0)
3464 {
3465 ifn = cond_len_fn;
3466 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3467 vect_nargs += 2;
3468 }
3469 else if (reduc_idx >= 0)
3470 gcc_unreachable ();
3471 }
3472 else if (masked_loop_p && reduc_idx >= 0)
3473 {
3474 ifn = cond_fn;
3475 vect_nargs += 2;
3476 }
3477 if (clz_ctz_arg1)
3478 ++vect_nargs;
3479
3480 if (modifier == NONE || ifn != IFN_LAST)
3481 {
3482 tree prev_res = NULL_TREE;
3483 vargs.safe_grow (len: vect_nargs, exact: true);
3484 auto_vec<vec<tree> > vec_defs (nargs);
3485 for (j = 0; j < ncopies; ++j)
3486 {
3487 /* Build argument list for the vectorized call. */
3488 if (slp_node)
3489 {
3490 vec<tree> vec_oprnds0;
3491
3492 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3493 vec_oprnds0 = vec_defs[0];
3494
3495 /* Arguments are ready. Create the new vector stmt. */
3496 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3497 {
3498 int varg = 0;
3499 if (masked_loop_p && reduc_idx >= 0)
3500 {
3501 unsigned int vec_num = vec_oprnds0.length ();
3502 /* Always true for SLP. */
3503 gcc_assert (ncopies == 1);
3504 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3505 gsi, masks, vec_num,
3506 vectype_out, i);
3507 }
3508 size_t k;
3509 for (k = 0; k < nargs; k++)
3510 {
3511 vec<tree> vec_oprndsk = vec_defs[k];
3512 vargs[varg++] = vec_oprndsk[i];
3513 }
3514 if (masked_loop_p && reduc_idx >= 0)
3515 vargs[varg++] = vargs[reduc_idx + 1];
3516 if (clz_ctz_arg1)
3517 vargs[varg++] = clz_ctz_arg1;
3518
3519 gimple *new_stmt;
3520 if (modifier == NARROW)
3521 {
3522 /* We don't define any narrowing conditional functions
3523 at present. */
3524 gcc_assert (mask_opno < 0);
3525 tree half_res = make_ssa_name (var: vectype_in);
3526 gcall *call
3527 = gimple_build_call_internal_vec (ifn, vargs);
3528 gimple_call_set_lhs (gs: call, lhs: half_res);
3529 gimple_call_set_nothrow (s: call, nothrow_p: true);
3530 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3531 if ((i & 1) == 0)
3532 {
3533 prev_res = half_res;
3534 continue;
3535 }
3536 new_temp = make_ssa_name (var: vec_dest);
3537 new_stmt = vect_gimple_build (new_temp, convert_code,
3538 prev_res, half_res);
3539 vect_finish_stmt_generation (vinfo, stmt_info,
3540 vec_stmt: new_stmt, gsi);
3541 }
3542 else
3543 {
3544 if (len_opno >= 0 && len_loop_p)
3545 {
3546 unsigned int vec_num = vec_oprnds0.length ();
3547 /* Always true for SLP. */
3548 gcc_assert (ncopies == 1);
3549 tree len
3550 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3551 vectype_out, i, 1);
3552 signed char biasval
3553 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3554 tree bias = build_int_cst (intQI_type_node, biasval);
3555 vargs[len_opno] = len;
3556 vargs[len_opno + 1] = bias;
3557 }
3558 else if (mask_opno >= 0 && masked_loop_p)
3559 {
3560 unsigned int vec_num = vec_oprnds0.length ();
3561 /* Always true for SLP. */
3562 gcc_assert (ncopies == 1);
3563 tree mask = vect_get_loop_mask (loop_vinfo,
3564 gsi, masks, vec_num,
3565 vectype_out, i);
3566 vargs[mask_opno] = prepare_vec_mask
3567 (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3568 vec_mask: vargs[mask_opno], gsi);
3569 }
3570
3571 gcall *call;
3572 if (ifn != IFN_LAST)
3573 call = gimple_build_call_internal_vec (ifn, vargs);
3574 else
3575 call = gimple_build_call_vec (fndecl, vargs);
3576 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3577 gimple_call_set_lhs (gs: call, lhs: new_temp);
3578 gimple_call_set_nothrow (s: call, nothrow_p: true);
3579 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3580 new_stmt = call;
3581 }
3582 slp_node->push_vec_def (def: new_stmt);
3583 }
3584 continue;
3585 }
3586
3587 int varg = 0;
3588 if (masked_loop_p && reduc_idx >= 0)
3589 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3590 vectype_out, j);
3591 for (i = 0; i < nargs; i++)
3592 {
3593 op = gimple_call_arg (gs: stmt, index: i);
3594 if (j == 0)
3595 {
3596 vec_defs.quick_push (obj: vNULL);
3597 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
3598 op, vec_oprnds: &vec_defs[i],
3599 vectype: vectypes[i]);
3600 }
3601 vargs[varg++] = vec_defs[i][j];
3602 }
3603 if (masked_loop_p && reduc_idx >= 0)
3604 vargs[varg++] = vargs[reduc_idx + 1];
3605 if (clz_ctz_arg1)
3606 vargs[varg++] = clz_ctz_arg1;
3607
3608 if (len_opno >= 0 && len_loop_p)
3609 {
3610 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3611 vectype_out, j, 1);
3612 signed char biasval
3613 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3614 tree bias = build_int_cst (intQI_type_node, biasval);
3615 vargs[len_opno] = len;
3616 vargs[len_opno + 1] = bias;
3617 }
3618 else if (mask_opno >= 0 && masked_loop_p)
3619 {
3620 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3621 vectype_out, j);
3622 vargs[mask_opno]
3623 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3624 vec_mask: vargs[mask_opno], gsi);
3625 }
3626
3627 gimple *new_stmt;
3628 if (cfn == CFN_GOMP_SIMD_LANE)
3629 {
3630 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3631 tree new_var
3632 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3633 gimple *init_stmt = gimple_build_assign (new_var, cst);
3634 vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, NULL);
3635 new_temp = make_ssa_name (var: vec_dest);
3636 new_stmt = gimple_build_assign (new_temp, new_var);
3637 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3638 }
3639 else if (modifier == NARROW)
3640 {
3641 /* We don't define any narrowing conditional functions at
3642 present. */
3643 gcc_assert (mask_opno < 0);
3644 tree half_res = make_ssa_name (var: vectype_in);
3645 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3646 gimple_call_set_lhs (gs: call, lhs: half_res);
3647 gimple_call_set_nothrow (s: call, nothrow_p: true);
3648 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3649 if ((j & 1) == 0)
3650 {
3651 prev_res = half_res;
3652 continue;
3653 }
3654 new_temp = make_ssa_name (var: vec_dest);
3655 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3656 half_res);
3657 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3658 }
3659 else
3660 {
3661 gcall *call;
3662 if (ifn != IFN_LAST)
3663 call = gimple_build_call_internal_vec (ifn, vargs);
3664 else
3665 call = gimple_build_call_vec (fndecl, vargs);
3666 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3667 gimple_call_set_lhs (gs: call, lhs: new_temp);
3668 gimple_call_set_nothrow (s: call, nothrow_p: true);
3669 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3670 new_stmt = call;
3671 }
3672
3673 if (j == (modifier == NARROW ? 1 : 0))
3674 *vec_stmt = new_stmt;
3675 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3676 }
3677 for (i = 0; i < nargs; i++)
3678 {
3679 vec<tree> vec_oprndsi = vec_defs[i];
3680 vec_oprndsi.release ();
3681 }
3682 }
3683 else if (modifier == NARROW)
3684 {
3685 auto_vec<vec<tree> > vec_defs (nargs);
3686 /* We don't define any narrowing conditional functions at present. */
3687 gcc_assert (mask_opno < 0);
3688 for (j = 0; j < ncopies; ++j)
3689 {
3690 /* Build argument list for the vectorized call. */
3691 if (j == 0)
3692 vargs.create (nelems: nargs * 2);
3693 else
3694 vargs.truncate (size: 0);
3695
3696 if (slp_node)
3697 {
3698 vec<tree> vec_oprnds0;
3699
3700 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3701 vec_oprnds0 = vec_defs[0];
3702
3703 /* Arguments are ready. Create the new vector stmt. */
3704 for (i = 0; vec_oprnds0.iterate (ix: i, ptr: &vec_oprnd0); i += 2)
3705 {
3706 size_t k;
3707 vargs.truncate (size: 0);
3708 for (k = 0; k < nargs; k++)
3709 {
3710 vec<tree> vec_oprndsk = vec_defs[k];
3711 vargs.quick_push (obj: vec_oprndsk[i]);
3712 vargs.quick_push (obj: vec_oprndsk[i + 1]);
3713 }
3714 gcall *call;
3715 if (ifn != IFN_LAST)
3716 call = gimple_build_call_internal_vec (ifn, vargs);
3717 else
3718 call = gimple_build_call_vec (fndecl, vargs);
3719 new_temp = make_ssa_name (var: vec_dest, stmt: call);
3720 gimple_call_set_lhs (gs: call, lhs: new_temp);
3721 gimple_call_set_nothrow (s: call, nothrow_p: true);
3722 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3723 slp_node->push_vec_def (def: call);
3724 }
3725 continue;
3726 }
3727
3728 for (i = 0; i < nargs; i++)
3729 {
3730 op = gimple_call_arg (gs: stmt, index: i);
3731 if (j == 0)
3732 {
3733 vec_defs.quick_push (obj: vNULL);
3734 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies: 2 * ncopies,
3735 op, vec_oprnds: &vec_defs[i], vectype: vectypes[i]);
3736 }
3737 vec_oprnd0 = vec_defs[i][2*j];
3738 vec_oprnd1 = vec_defs[i][2*j+1];
3739
3740 vargs.quick_push (obj: vec_oprnd0);
3741 vargs.quick_push (obj: vec_oprnd1);
3742 }
3743
3744 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3745 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
3746 gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
3747 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3748
3749 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3750 }
3751
3752 if (!slp_node)
3753 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3754
3755 for (i = 0; i < nargs; i++)
3756 {
3757 vec<tree> vec_oprndsi = vec_defs[i];
3758 vec_oprndsi.release ();
3759 }
3760 }
3761 else
3762 /* No current target implements this case. */
3763 return false;
3764
3765 vargs.release ();
3766
3767 /* The call in STMT might prevent it from being removed in dce.
3768 We however cannot remove it here, due to the way the ssa name
3769 it defines is mapped to the new definition. So just replace
3770 rhs of the statement with something harmless. */
3771
3772 if (slp_node)
3773 return true;
3774
3775 stmt_info = vect_orig_stmt (stmt_info);
3776 lhs = gimple_get_lhs (stmt_info->stmt);
3777
3778 gassign *new_stmt
3779 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3780 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3781
3782 return true;
3783}
3784
3785
3786struct simd_call_arg_info
3787{
3788 tree vectype;
3789 tree op;
3790 HOST_WIDE_INT linear_step;
3791 enum vect_def_type dt;
3792 unsigned int align;
3793 bool simd_lane_linear;
3794};
3795
3796/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3797 is linear within simd lane (but not within whole loop), note it in
3798 *ARGINFO. */
3799
3800static void
3801vect_simd_lane_linear (tree op, class loop *loop,
3802 struct simd_call_arg_info *arginfo)
3803{
3804 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3805
3806 if (!is_gimple_assign (gs: def_stmt)
3807 || gimple_assign_rhs_code (gs: def_stmt) != POINTER_PLUS_EXPR
3808 || !is_gimple_min_invariant (gimple_assign_rhs1 (gs: def_stmt)))
3809 return;
3810
3811 tree base = gimple_assign_rhs1 (gs: def_stmt);
3812 HOST_WIDE_INT linear_step = 0;
3813 tree v = gimple_assign_rhs2 (gs: def_stmt);
3814 while (TREE_CODE (v) == SSA_NAME)
3815 {
3816 tree t;
3817 def_stmt = SSA_NAME_DEF_STMT (v);
3818 if (is_gimple_assign (gs: def_stmt))
3819 switch (gimple_assign_rhs_code (gs: def_stmt))
3820 {
3821 case PLUS_EXPR:
3822 t = gimple_assign_rhs2 (gs: def_stmt);
3823 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3824 return;
3825 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3826 v = gimple_assign_rhs1 (gs: def_stmt);
3827 continue;
3828 case MULT_EXPR:
3829 t = gimple_assign_rhs2 (gs: def_stmt);
3830 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3831 return;
3832 linear_step = tree_to_shwi (t);
3833 v = gimple_assign_rhs1 (gs: def_stmt);
3834 continue;
3835 CASE_CONVERT:
3836 t = gimple_assign_rhs1 (gs: def_stmt);
3837 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3838 || (TYPE_PRECISION (TREE_TYPE (v))
3839 < TYPE_PRECISION (TREE_TYPE (t))))
3840 return;
3841 if (!linear_step)
3842 linear_step = 1;
3843 v = t;
3844 continue;
3845 default:
3846 return;
3847 }
3848 else if (gimple_call_internal_p (gs: def_stmt, fn: IFN_GOMP_SIMD_LANE)
3849 && loop->simduid
3850 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3851 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3852 == loop->simduid))
3853 {
3854 if (!linear_step)
3855 linear_step = 1;
3856 arginfo->linear_step = linear_step;
3857 arginfo->op = base;
3858 arginfo->simd_lane_linear = true;
3859 return;
3860 }
3861 }
3862}
3863
3864/* Function vectorizable_simd_clone_call.
3865
3866 Check if STMT_INFO performs a function call that can be vectorized
3867 by calling a simd clone of the function.
3868 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3869 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3870 Return true if STMT_INFO is vectorizable in this way. */
3871
3872static bool
3873vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3874 gimple_stmt_iterator *gsi,
3875 gimple **vec_stmt, slp_tree slp_node,
3876 stmt_vector_for_cost *)
3877{
3878 tree vec_dest;
3879 tree scalar_dest;
3880 tree op, type;
3881 tree vec_oprnd0 = NULL_TREE;
3882 tree vectype;
3883 poly_uint64 nunits;
3884 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3885 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3886 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3887 tree fndecl, new_temp;
3888 int ncopies, j;
3889 auto_vec<simd_call_arg_info> arginfo;
3890 vec<tree> vargs = vNULL;
3891 size_t i, nargs;
3892 tree lhs, rtype, ratype;
3893 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3894 int masked_call_offset = 0;
3895
3896 /* Is STMT a vectorizable call? */
3897 gcall *stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3898 if (!stmt)
3899 return false;
3900
3901 fndecl = gimple_call_fndecl (gs: stmt);
3902 if (fndecl == NULL_TREE
3903 && gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
3904 {
3905 fndecl = gimple_call_arg (gs: stmt, index: 0);
3906 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3907 fndecl = TREE_OPERAND (fndecl, 0);
3908 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3909 masked_call_offset = 1;
3910 }
3911 if (fndecl == NULL_TREE)
3912 return false;
3913
3914 struct cgraph_node *node = cgraph_node::get (decl: fndecl);
3915 if (node == NULL || node->simd_clones == NULL)
3916 return false;
3917
3918 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3919 return false;
3920
3921 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3922 && ! vec_stmt)
3923 return false;
3924
3925 if (gimple_call_lhs (gs: stmt)
3926 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3927 return false;
3928
3929 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3930
3931 vectype = STMT_VINFO_VECTYPE (stmt_info);
3932
3933 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3934 return false;
3935
3936 /* Process function arguments. */
3937 nargs = gimple_call_num_args (gs: stmt) - masked_call_offset;
3938
3939 /* Bail out if the function has zero arguments. */
3940 if (nargs == 0)
3941 return false;
3942
3943 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3944 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3945 arginfo.reserve (nelems: nargs, exact: true);
3946 auto_vec<slp_tree> slp_op;
3947 slp_op.safe_grow_cleared (len: nargs);
3948
3949 for (i = 0; i < nargs; i++)
3950 {
3951 simd_call_arg_info thisarginfo;
3952 affine_iv iv;
3953
3954 thisarginfo.linear_step = 0;
3955 thisarginfo.align = 0;
3956 thisarginfo.op = NULL_TREE;
3957 thisarginfo.simd_lane_linear = false;
3958
3959 int op_no = i + masked_call_offset;
3960 if (slp_node)
3961 op_no = vect_slp_child_index_for_operand (stmt, op: op_no, false);
3962 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3963 op_no, &op, &slp_op[i],
3964 &thisarginfo.dt, &thisarginfo.vectype)
3965 || thisarginfo.dt == vect_uninitialized_def)
3966 {
3967 if (dump_enabled_p ())
3968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3969 "use not simple.\n");
3970 return false;
3971 }
3972
3973 if (thisarginfo.dt == vect_constant_def
3974 || thisarginfo.dt == vect_external_def)
3975 {
3976 /* With SLP we determine the vector type of constants/externals
3977 at analysis time, handling conflicts via
3978 vect_maybe_update_slp_op_vectype. At transform time
3979 we have a vector type recorded for SLP. */
3980 gcc_assert (!vec_stmt
3981 || !slp_node
3982 || thisarginfo.vectype != NULL_TREE);
3983 if (!vec_stmt)
3984 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3985 TREE_TYPE (op),
3986 slp_node);
3987 }
3988 else
3989 gcc_assert (thisarginfo.vectype != NULL_TREE);
3990
3991 /* For linear arguments, the analyze phase should have saved
3992 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3993 if (i * 3 + 4 <= simd_clone_info.length ()
3994 && simd_clone_info[i * 3 + 2])
3995 {
3996 gcc_assert (vec_stmt);
3997 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3998 thisarginfo.op = simd_clone_info[i * 3 + 1];
3999 thisarginfo.simd_lane_linear
4000 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4001 /* If loop has been peeled for alignment, we need to adjust it. */
4002 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4003 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4004 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4005 {
4006 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4007 tree step = simd_clone_info[i * 3 + 2];
4008 tree opt = TREE_TYPE (thisarginfo.op);
4009 bias = fold_convert (TREE_TYPE (step), bias);
4010 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4011 thisarginfo.op
4012 = fold_build2 (POINTER_TYPE_P (opt)
4013 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4014 thisarginfo.op, bias);
4015 }
4016 }
4017 else if (!vec_stmt
4018 && thisarginfo.dt != vect_constant_def
4019 && thisarginfo.dt != vect_external_def
4020 && loop_vinfo
4021 && TREE_CODE (op) == SSA_NAME
4022 && simple_iv (loop, loop_containing_stmt (stmt), op,
4023 &iv, false)
4024 && tree_fits_shwi_p (iv.step))
4025 {
4026 thisarginfo.linear_step = tree_to_shwi (iv.step);
4027 thisarginfo.op = iv.base;
4028 }
4029 else if ((thisarginfo.dt == vect_constant_def
4030 || thisarginfo.dt == vect_external_def)
4031 && POINTER_TYPE_P (TREE_TYPE (op)))
4032 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4033 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4034 linear too. */
4035 if (POINTER_TYPE_P (TREE_TYPE (op))
4036 && !thisarginfo.linear_step
4037 && !vec_stmt
4038 && thisarginfo.dt != vect_constant_def
4039 && thisarginfo.dt != vect_external_def
4040 && loop_vinfo
4041 && TREE_CODE (op) == SSA_NAME)
4042 vect_simd_lane_linear (op, loop, arginfo: &thisarginfo);
4043
4044 arginfo.quick_push (obj: thisarginfo);
4045 }
4046
4047 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4048 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4049 unsigned int badness = 0;
4050 struct cgraph_node *bestn = NULL;
4051 if (simd_clone_info.exists ())
4052 bestn = cgraph_node::get (decl: simd_clone_info[0]);
4053 else
4054 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4055 n = n->simdclone->next_clone)
4056 {
4057 unsigned int this_badness = 0;
4058 unsigned int num_calls;
4059 /* The number of arguments in the call and the number of parameters in
4060 the simdclone should match. However, when the simdclone is
4061 'inbranch', it could have one more paramater than nargs when using
4062 an inbranch simdclone to call a non-inbranch call, either in a
4063 non-masked loop using a all true constant mask, or inside a masked
4064 loop using it's mask. */
4065 size_t simd_nargs = n->simdclone->nargs;
4066 if (!masked_call_offset && n->simdclone->inbranch)
4067 simd_nargs--;
4068 if (!constant_multiple_p (a: vf * group_size, b: n->simdclone->simdlen,
4069 multiple: &num_calls)
4070 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4071 || (nargs != simd_nargs))
4072 continue;
4073 if (num_calls != 1)
4074 this_badness += floor_log2 (x: num_calls) * 4096;
4075 if (n->simdclone->inbranch)
4076 this_badness += 8192;
4077 int target_badness = targetm.simd_clone.usable (n);
4078 if (target_badness < 0)
4079 continue;
4080 this_badness += target_badness * 512;
4081 for (i = 0; i < nargs; i++)
4082 {
4083 switch (n->simdclone->args[i].arg_type)
4084 {
4085 case SIMD_CLONE_ARG_TYPE_VECTOR:
4086 if (!useless_type_conversion_p
4087 (n->simdclone->args[i].orig_type,
4088 TREE_TYPE (gimple_call_arg (stmt,
4089 i + masked_call_offset))))
4090 i = -1;
4091 else if (arginfo[i].dt == vect_constant_def
4092 || arginfo[i].dt == vect_external_def
4093 || arginfo[i].linear_step)
4094 this_badness += 64;
4095 break;
4096 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4097 if (arginfo[i].dt != vect_constant_def
4098 && arginfo[i].dt != vect_external_def)
4099 i = -1;
4100 break;
4101 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4103 if (arginfo[i].dt == vect_constant_def
4104 || arginfo[i].dt == vect_external_def
4105 || (arginfo[i].linear_step
4106 != n->simdclone->args[i].linear_step))
4107 i = -1;
4108 break;
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4112 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4113 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4114 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4115 /* FORNOW */
4116 i = -1;
4117 break;
4118 case SIMD_CLONE_ARG_TYPE_MASK:
4119 /* While we can create a traditional data vector from
4120 an incoming integer mode mask we have no good way to
4121 force generate an integer mode mask from a traditional
4122 boolean vector input. */
4123 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4124 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4125 i = -1;
4126 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4127 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4128 this_badness += 2048;
4129 break;
4130 }
4131 if (i == (size_t) -1)
4132 break;
4133 if (n->simdclone->args[i].alignment > arginfo[i].align)
4134 {
4135 i = -1;
4136 break;
4137 }
4138 if (arginfo[i].align)
4139 this_badness += (exact_log2 (x: arginfo[i].align)
4140 - exact_log2 (x: n->simdclone->args[i].alignment));
4141 }
4142 if (i == (size_t) -1)
4143 continue;
4144 if (masked_call_offset == 0
4145 && n->simdclone->inbranch
4146 && n->simdclone->nargs > nargs)
4147 {
4148 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4149 SIMD_CLONE_ARG_TYPE_MASK);
4150 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4151 not in a branch, as we'd have to construct an all-true mask. */
4152 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4153 this_badness += 64;
4154 }
4155 if (bestn == NULL || this_badness < badness)
4156 {
4157 bestn = n;
4158 badness = this_badness;
4159 }
4160 }
4161
4162 if (bestn == NULL)
4163 return false;
4164
4165 unsigned int num_mask_args = 0;
4166 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4167 for (i = 0; i < nargs; i++)
4168 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4169 num_mask_args++;
4170
4171 for (i = 0; i < nargs; i++)
4172 {
4173 if ((arginfo[i].dt == vect_constant_def
4174 || arginfo[i].dt == vect_external_def)
4175 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4176 {
4177 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4178 i + masked_call_offset));
4179 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4180 slp_node);
4181 if (arginfo[i].vectype == NULL
4182 || !constant_multiple_p (a: bestn->simdclone->simdlen,
4183 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4184 return false;
4185 }
4186
4187 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4188 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4189 {
4190 if (dump_enabled_p ())
4191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4192 "vector mask arguments are not supported.\n");
4193 return false;
4194 }
4195
4196 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4197 {
4198 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4199 if (bestn->simdclone->mask_mode == VOIDmode)
4200 {
4201 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: clone_arg_vectype),
4202 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4203 {
4204 /* FORNOW we only have partial support for vector-type masks
4205 that can't hold all of simdlen. */
4206 if (dump_enabled_p ())
4207 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4208 vect_location,
4209 "in-branch vector clones are not yet"
4210 " supported for mismatched vector sizes.\n");
4211 return false;
4212 }
4213 if (!expand_vec_cond_expr_p (clone_arg_vectype,
4214 arginfo[i].vectype, ERROR_MARK))
4215 {
4216 if (dump_enabled_p ())
4217 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4218 vect_location,
4219 "cannot compute mask argument for"
4220 " in-branch vector clones.\n");
4221 return false;
4222 }
4223 }
4224 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4225 {
4226 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4227 || maybe_ne (a: exact_div (a: bestn->simdclone->simdlen,
4228 b: num_mask_args),
4229 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4230 {
4231 /* FORNOW we only have partial support for integer-type masks
4232 that represent the same number of lanes as the
4233 vectorized mask inputs. */
4234 if (dump_enabled_p ())
4235 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4236 vect_location,
4237 "in-branch vector clones are not yet "
4238 "supported for mismatched vector sizes.\n");
4239 return false;
4240 }
4241 }
4242 else
4243 {
4244 if (dump_enabled_p ())
4245 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4246 vect_location,
4247 "in-branch vector clones not supported"
4248 " on this target.\n");
4249 return false;
4250 }
4251 }
4252 }
4253
4254 fndecl = bestn->decl;
4255 nunits = bestn->simdclone->simdlen;
4256 if (slp_node)
4257 ncopies = vector_unroll_factor (vf * group_size, nunits);
4258 else
4259 ncopies = vector_unroll_factor (vf, nunits);
4260
4261 /* If the function isn't const, only allow it in simd loops where user
4262 has asserted that at least nunits consecutive iterations can be
4263 performed using SIMD instructions. */
4264 if ((loop == NULL || maybe_lt (a: (unsigned) loop->safelen, b: nunits))
4265 && gimple_vuse (g: stmt))
4266 return false;
4267
4268 /* Sanity check: make sure that at least one copy of the vectorized stmt
4269 needs to be generated. */
4270 gcc_assert (ncopies >= 1);
4271
4272 if (!vec_stmt) /* transformation not required. */
4273 {
4274 if (slp_node)
4275 for (unsigned i = 0; i < nargs; ++i)
4276 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4277 {
4278 if (dump_enabled_p ())
4279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4280 "incompatible vector types for invariants\n");
4281 return false;
4282 }
4283 /* When the original call is pure or const but the SIMD ABI dictates
4284 an aggregate return we will have to use a virtual definition and
4285 in a loop eventually even need to add a virtual PHI. That's
4286 not straight-forward so allow to fix this up via renaming. */
4287 if (gimple_call_lhs (gs: stmt)
4288 && !gimple_vdef (g: stmt)
4289 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4290 vinfo->any_known_not_updated_vssa = true;
4291 /* ??? For SLP code-gen we end up inserting after the last
4292 vector argument def rather than at the original call position
4293 so automagic virtual operand updating doesn't work. */
4294 if (gimple_vuse (g: stmt) && slp_node)
4295 vinfo->any_known_not_updated_vssa = true;
4296 simd_clone_info.safe_push (obj: bestn->decl);
4297 for (i = 0; i < bestn->simdclone->nargs; i++)
4298 {
4299 switch (bestn->simdclone->args[i].arg_type)
4300 {
4301 default:
4302 continue;
4303 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4305 {
4306 simd_clone_info.safe_grow_cleared (len: i * 3 + 1, exact: true);
4307 simd_clone_info.safe_push (obj: arginfo[i].op);
4308 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4309 ? size_type_node : TREE_TYPE (arginfo[i].op);
4310 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4311 simd_clone_info.safe_push (obj: ls);
4312 tree sll = arginfo[i].simd_lane_linear
4313 ? boolean_true_node : boolean_false_node;
4314 simd_clone_info.safe_push (obj: sll);
4315 }
4316 break;
4317 case SIMD_CLONE_ARG_TYPE_MASK:
4318 if (loop_vinfo
4319 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4320 vect_record_loop_mask (loop_vinfo,
4321 &LOOP_VINFO_MASKS (loop_vinfo),
4322 ncopies, vectype, op);
4323
4324 break;
4325 }
4326 }
4327
4328 if (!bestn->simdclone->inbranch && loop_vinfo)
4329 {
4330 if (dump_enabled_p ()
4331 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4332 dump_printf_loc (MSG_NOTE, vect_location,
4333 "can't use a fully-masked loop because a"
4334 " non-masked simd clone was selected.\n");
4335 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4336 }
4337
4338 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4339 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4340/* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4341 dt, slp_node, cost_vec); */
4342 return true;
4343 }
4344
4345 /* Transform. */
4346
4347 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4349
4350 /* Handle def. */
4351 scalar_dest = gimple_call_lhs (gs: stmt);
4352 vec_dest = NULL_TREE;
4353 rtype = NULL_TREE;
4354 ratype = NULL_TREE;
4355 if (scalar_dest)
4356 {
4357 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4358 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4359 if (TREE_CODE (rtype) == ARRAY_TYPE)
4360 {
4361 ratype = rtype;
4362 rtype = TREE_TYPE (ratype);
4363 }
4364 }
4365
4366 auto_vec<vec<tree> > vec_oprnds;
4367 auto_vec<unsigned> vec_oprnds_i;
4368 vec_oprnds_i.safe_grow_cleared (len: nargs, exact: true);
4369 if (slp_node)
4370 {
4371 vec_oprnds.reserve_exact (nelems: nargs);
4372 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4373 }
4374 else
4375 vec_oprnds.safe_grow_cleared (len: nargs, exact: true);
4376 for (j = 0; j < ncopies; ++j)
4377 {
4378 poly_uint64 callee_nelements;
4379 poly_uint64 caller_nelements;
4380 /* Build argument list for the vectorized call. */
4381 if (j == 0)
4382 vargs.create (nelems: nargs);
4383 else
4384 vargs.truncate (size: 0);
4385
4386 for (i = 0; i < nargs; i++)
4387 {
4388 unsigned int k, l, m, o;
4389 tree atype;
4390 op = gimple_call_arg (gs: stmt, index: i + masked_call_offset);
4391 switch (bestn->simdclone->args[i].arg_type)
4392 {
4393 case SIMD_CLONE_ARG_TYPE_VECTOR:
4394 atype = bestn->simdclone->args[i].vector_type;
4395 caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype);
4396 callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4397 o = vector_unroll_factor (nunits, callee_nelements);
4398 for (m = j * o; m < (j + 1) * o; m++)
4399 {
4400 if (known_lt (callee_nelements, caller_nelements))
4401 {
4402 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4403 if (!constant_multiple_p (a: caller_nelements,
4404 b: callee_nelements, multiple: &k))
4405 gcc_unreachable ();
4406
4407 gcc_assert ((k & (k - 1)) == 0);
4408 if (m == 0)
4409 {
4410 if (!slp_node)
4411 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4412 ncopies: ncopies * o / k, op,
4413 vec_oprnds: &vec_oprnds[i]);
4414 vec_oprnds_i[i] = 0;
4415 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4416 }
4417 else
4418 {
4419 vec_oprnd0 = arginfo[i].op;
4420 if ((m & (k - 1)) == 0)
4421 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4422 }
4423 arginfo[i].op = vec_oprnd0;
4424 vec_oprnd0
4425 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4426 bitsize_int (prec),
4427 bitsize_int ((m & (k - 1)) * prec));
4428 gassign *new_stmt
4429 = gimple_build_assign (make_ssa_name (var: atype),
4430 vec_oprnd0);
4431 vect_finish_stmt_generation (vinfo, stmt_info,
4432 vec_stmt: new_stmt, gsi);
4433 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4434 }
4435 else
4436 {
4437 if (!constant_multiple_p (a: callee_nelements,
4438 b: caller_nelements, multiple: &k))
4439 gcc_unreachable ();
4440 gcc_assert ((k & (k - 1)) == 0);
4441 vec<constructor_elt, va_gc> *ctor_elts;
4442 if (k != 1)
4443 vec_alloc (v&: ctor_elts, nelems: k);
4444 else
4445 ctor_elts = NULL;
4446 for (l = 0; l < k; l++)
4447 {
4448 if (m == 0 && l == 0)
4449 {
4450 if (!slp_node)
4451 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4452 ncopies: k * o * ncopies,
4453 op,
4454 vec_oprnds: &vec_oprnds[i]);
4455 vec_oprnds_i[i] = 0;
4456 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4457 }
4458 else
4459 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4460 arginfo[i].op = vec_oprnd0;
4461 if (k == 1)
4462 break;
4463 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4464 vec_oprnd0);
4465 }
4466 if (k == 1)
4467 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4468 atype))
4469 {
4470 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4471 vec_oprnd0);
4472 gassign *new_stmt
4473 = gimple_build_assign (make_ssa_name (var: atype),
4474 vec_oprnd0);
4475 vect_finish_stmt_generation (vinfo, stmt_info,
4476 vec_stmt: new_stmt, gsi);
4477 vargs.safe_push (obj: gimple_get_lhs (new_stmt));
4478 }
4479 else
4480 vargs.safe_push (obj: vec_oprnd0);
4481 else
4482 {
4483 vec_oprnd0 = build_constructor (atype, ctor_elts);
4484 gassign *new_stmt
4485 = gimple_build_assign (make_ssa_name (var: atype),
4486 vec_oprnd0);
4487 vect_finish_stmt_generation (vinfo, stmt_info,
4488 vec_stmt: new_stmt, gsi);
4489 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4490 }
4491 }
4492 }
4493 break;
4494 case SIMD_CLONE_ARG_TYPE_MASK:
4495 if (bestn->simdclone->mask_mode == VOIDmode)
4496 {
4497 atype = bestn->simdclone->args[i].vector_type;
4498 tree elt_type = TREE_TYPE (atype);
4499 tree one = fold_convert (elt_type, integer_one_node);
4500 tree zero = fold_convert (elt_type, integer_zero_node);
4501 callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4502 caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype);
4503 o = vector_unroll_factor (nunits, callee_nelements);
4504 for (m = j * o; m < (j + 1) * o; m++)
4505 {
4506 if (maybe_lt (a: callee_nelements, b: caller_nelements))
4507 {
4508 /* The mask type has fewer elements than simdlen. */
4509
4510 /* FORNOW */
4511 gcc_unreachable ();
4512 }
4513 else if (known_eq (callee_nelements, caller_nelements))
4514 {
4515 /* The SIMD clone function has the same number of
4516 elements as the current function. */
4517 if (m == 0)
4518 {
4519 if (!slp_node)
4520 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4521 ncopies: o * ncopies,
4522 op,
4523 vec_oprnds: &vec_oprnds[i]);
4524 vec_oprnds_i[i] = 0;
4525 }
4526 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4527 if (loop_vinfo
4528 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4529 {
4530 vec_loop_masks *loop_masks
4531 = &LOOP_VINFO_MASKS (loop_vinfo);
4532 tree loop_mask
4533 = vect_get_loop_mask (loop_vinfo, gsi,
4534 loop_masks, ncopies,
4535 vectype, j);
4536 vec_oprnd0
4537 = prepare_vec_mask (loop_vinfo,
4538 TREE_TYPE (loop_mask),
4539 loop_mask, vec_mask: vec_oprnd0,
4540 gsi);
4541 loop_vinfo->vec_cond_masked_set.add (k: { vec_oprnd0,
4542 loop_mask });
4543
4544 }
4545 vec_oprnd0
4546 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4547 build_vector_from_val (atype, one),
4548 build_vector_from_val (atype, zero));
4549 gassign *new_stmt
4550 = gimple_build_assign (make_ssa_name (var: atype),
4551 vec_oprnd0);
4552 vect_finish_stmt_generation (vinfo, stmt_info,
4553 vec_stmt: new_stmt, gsi);
4554 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4555 }
4556 else
4557 {
4558 /* The mask type has more elements than simdlen. */
4559
4560 /* FORNOW */
4561 gcc_unreachable ();
4562 }
4563 }
4564 }
4565 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4566 {
4567 atype = bestn->simdclone->args[i].vector_type;
4568 /* Guess the number of lanes represented by atype. */
4569 poly_uint64 atype_subparts
4570 = exact_div (a: bestn->simdclone->simdlen,
4571 b: num_mask_args);
4572 o = vector_unroll_factor (nunits, atype_subparts);
4573 for (m = j * o; m < (j + 1) * o; m++)
4574 {
4575 if (m == 0)
4576 {
4577 if (!slp_node)
4578 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4579 ncopies: o * ncopies,
4580 op,
4581 vec_oprnds: &vec_oprnds[i]);
4582 vec_oprnds_i[i] = 0;
4583 }
4584 if (maybe_lt (a: atype_subparts,
4585 b: TYPE_VECTOR_SUBPARTS (node: arginfo[i].vectype)))
4586 {
4587 /* The mask argument has fewer elements than the
4588 input vector. */
4589 /* FORNOW */
4590 gcc_unreachable ();
4591 }
4592 else if (known_eq (atype_subparts,
4593 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4594 {
4595 /* The vector mask argument matches the input
4596 in the number of lanes, but not necessarily
4597 in the mode. */
4598 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4599 tree st = lang_hooks.types.type_for_mode
4600 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4601 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4602 vec_oprnd0);
4603 gassign *new_stmt
4604 = gimple_build_assign (make_ssa_name (var: st),
4605 vec_oprnd0);
4606 vect_finish_stmt_generation (vinfo, stmt_info,
4607 vec_stmt: new_stmt, gsi);
4608 if (!types_compatible_p (type1: atype, type2: st))
4609 {
4610 new_stmt
4611 = gimple_build_assign (make_ssa_name (var: atype),
4612 NOP_EXPR,
4613 gimple_assign_lhs
4614 (gs: new_stmt));
4615 vect_finish_stmt_generation (vinfo, stmt_info,
4616 vec_stmt: new_stmt, gsi);
4617 }
4618 vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4619 }
4620 else
4621 {
4622 /* The mask argument has more elements than the
4623 input vector. */
4624 /* FORNOW */
4625 gcc_unreachable ();
4626 }
4627 }
4628 }
4629 else
4630 gcc_unreachable ();
4631 break;
4632 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4633 vargs.safe_push (obj: op);
4634 break;
4635 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4636 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4637 if (j == 0)
4638 {
4639 gimple_seq stmts;
4640 arginfo[i].op
4641 = force_gimple_operand (unshare_expr (arginfo[i].op),
4642 &stmts, true, NULL_TREE);
4643 if (stmts != NULL)
4644 {
4645 basic_block new_bb;
4646 edge pe = loop_preheader_edge (loop);
4647 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4648 gcc_assert (!new_bb);
4649 }
4650 if (arginfo[i].simd_lane_linear)
4651 {
4652 vargs.safe_push (obj: arginfo[i].op);
4653 break;
4654 }
4655 tree phi_res = copy_ssa_name (var: op);
4656 gphi *new_phi = create_phi_node (phi_res, loop->header);
4657 add_phi_arg (new_phi, arginfo[i].op,
4658 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4659 enum tree_code code
4660 = POINTER_TYPE_P (TREE_TYPE (op))
4661 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4662 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4663 ? sizetype : TREE_TYPE (op);
4664 poly_widest_int cst
4665 = wi::mul (a: bestn->simdclone->args[i].linear_step,
4666 b: ncopies * nunits);
4667 tree tcst = wide_int_to_tree (type, cst);
4668 tree phi_arg = copy_ssa_name (var: op);
4669 gassign *new_stmt
4670 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4671 gimple_stmt_iterator si = gsi_after_labels (bb: loop->header);
4672 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4673 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4674 UNKNOWN_LOCATION);
4675 arginfo[i].op = phi_res;
4676 vargs.safe_push (obj: phi_res);
4677 }
4678 else
4679 {
4680 enum tree_code code
4681 = POINTER_TYPE_P (TREE_TYPE (op))
4682 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4683 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4684 ? sizetype : TREE_TYPE (op);
4685 poly_widest_int cst
4686 = wi::mul (a: bestn->simdclone->args[i].linear_step,
4687 b: j * nunits);
4688 tree tcst = wide_int_to_tree (type, cst);
4689 new_temp = make_ssa_name (TREE_TYPE (op));
4690 gassign *new_stmt
4691 = gimple_build_assign (new_temp, code,
4692 arginfo[i].op, tcst);
4693 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4694 vargs.safe_push (obj: new_temp);
4695 }
4696 break;
4697 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4698 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4699 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4700 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4701 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4702 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4703 default:
4704 gcc_unreachable ();
4705 }
4706 }
4707
4708 if (masked_call_offset == 0
4709 && bestn->simdclone->inbranch
4710 && bestn->simdclone->nargs > nargs)
4711 {
4712 unsigned long m, o;
4713 size_t mask_i = bestn->simdclone->nargs - 1;
4714 tree mask;
4715 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4716 SIMD_CLONE_ARG_TYPE_MASK);
4717
4718 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4719 callee_nelements = TYPE_VECTOR_SUBPARTS (node: masktype);
4720 o = vector_unroll_factor (nunits, callee_nelements);
4721 for (m = j * o; m < (j + 1) * o; m++)
4722 {
4723 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4724 {
4725 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4726 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4727 ncopies, vectype, j);
4728 }
4729 else
4730 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4731
4732 gassign *new_stmt;
4733 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4734 {
4735 /* This means we are dealing with integer mask modes.
4736 First convert to an integer type with the same size as
4737 the current vector type. */
4738 unsigned HOST_WIDE_INT intermediate_size
4739 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4740 tree mid_int_type =
4741 build_nonstandard_integer_type (intermediate_size, 1);
4742 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4743 new_stmt
4744 = gimple_build_assign (make_ssa_name (var: mid_int_type),
4745 mask);
4746 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4747 /* Then zero-extend to the mask mode. */
4748 mask = fold_build1 (NOP_EXPR, masktype,
4749 gimple_get_lhs (new_stmt));
4750 }
4751 else if (bestn->simdclone->mask_mode == VOIDmode)
4752 {
4753 tree one = fold_convert (TREE_TYPE (masktype),
4754 integer_one_node);
4755 tree zero = fold_convert (TREE_TYPE (masktype),
4756 integer_zero_node);
4757 mask = build3 (VEC_COND_EXPR, masktype, mask,
4758 build_vector_from_val (masktype, one),
4759 build_vector_from_val (masktype, zero));
4760 }
4761 else
4762 gcc_unreachable ();
4763
4764 new_stmt = gimple_build_assign (make_ssa_name (var: masktype), mask);
4765 vect_finish_stmt_generation (vinfo, stmt_info,
4766 vec_stmt: new_stmt, gsi);
4767 mask = gimple_assign_lhs (gs: new_stmt);
4768 vargs.safe_push (obj: mask);
4769 }
4770 }
4771
4772 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4773 if (vec_dest)
4774 {
4775 gcc_assert (ratype
4776 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4777 if (ratype)
4778 new_temp = create_tmp_var (ratype);
4779 else if (useless_type_conversion_p (vectype, rtype))
4780 new_temp = make_ssa_name (var: vec_dest, stmt: new_call);
4781 else
4782 new_temp = make_ssa_name (var: rtype, stmt: new_call);
4783 gimple_call_set_lhs (gs: new_call, lhs: new_temp);
4784 }
4785 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_call, gsi);
4786 gimple *new_stmt = new_call;
4787
4788 if (vec_dest)
4789 {
4790 if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
4791 {
4792 unsigned int k, l;
4793 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4794 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4795 k = vector_unroll_factor (nunits,
4796 TYPE_VECTOR_SUBPARTS (vectype));
4797 gcc_assert ((k & (k - 1)) == 0);
4798 for (l = 0; l < k; l++)
4799 {
4800 tree t;
4801 if (ratype)
4802 {
4803 t = build_fold_addr_expr (new_temp);
4804 t = build2 (MEM_REF, vectype, t,
4805 build_int_cst (TREE_TYPE (t), l * bytes));
4806 }
4807 else
4808 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4809 bitsize_int (prec), bitsize_int (l * prec));
4810 new_stmt = gimple_build_assign (make_ssa_name (var: vectype), t);
4811 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4812
4813 if (j == 0 && l == 0)
4814 *vec_stmt = new_stmt;
4815 if (slp_node)
4816 SLP_TREE_VEC_DEFS (slp_node)
4817 .quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4818 else
4819 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4820 }
4821
4822 if (ratype)
4823 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4824 continue;
4825 }
4826 else if (!multiple_p (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
4827 {
4828 unsigned int k;
4829 if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype),
4830 b: TYPE_VECTOR_SUBPARTS (node: rtype), multiple: &k))
4831 gcc_unreachable ();
4832 gcc_assert ((k & (k - 1)) == 0);
4833 if ((j & (k - 1)) == 0)
4834 vec_alloc (v&: ret_ctor_elts, nelems: k);
4835 if (ratype)
4836 {
4837 unsigned int m, o;
4838 o = vector_unroll_factor (nunits,
4839 TYPE_VECTOR_SUBPARTS (rtype));
4840 for (m = 0; m < o; m++)
4841 {
4842 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4843 size_int (m), NULL_TREE, NULL_TREE);
4844 new_stmt = gimple_build_assign (make_ssa_name (var: rtype),
4845 tem);
4846 vect_finish_stmt_generation (vinfo, stmt_info,
4847 vec_stmt: new_stmt, gsi);
4848 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4849 gimple_assign_lhs (new_stmt));
4850 }
4851 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4852 }
4853 else
4854 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4855 if ((j & (k - 1)) != k - 1)
4856 continue;
4857 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4858 new_stmt
4859 = gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4860 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4861
4862 if ((unsigned) j == k - 1)
4863 *vec_stmt = new_stmt;
4864 if (slp_node)
4865 SLP_TREE_VEC_DEFS (slp_node)
4866 .quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4867 else
4868 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4869 continue;
4870 }
4871 else if (ratype)
4872 {
4873 tree t = build_fold_addr_expr (new_temp);
4874 t = build2 (MEM_REF, vectype, t,
4875 build_int_cst (TREE_TYPE (t), 0));
4876 new_stmt = gimple_build_assign (make_ssa_name (var: vec_dest), t);
4877 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4878 vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4879 }
4880 else if (!useless_type_conversion_p (vectype, rtype))
4881 {
4882 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4883 new_stmt
4884 = gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4885 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4886 }
4887 }
4888
4889 if (j == 0)
4890 *vec_stmt = new_stmt;
4891 if (slp_node)
4892 SLP_TREE_VEC_DEFS (slp_node).quick_push (obj: gimple_get_lhs (new_stmt));
4893 else
4894 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4895 }
4896
4897 for (i = 0; i < nargs; ++i)
4898 {
4899 vec<tree> oprndsi = vec_oprnds[i];
4900 oprndsi.release ();
4901 }
4902 vargs.release ();
4903
4904 /* Mark the clone as no longer being a candidate for GC. */
4905 bestn->gc_candidate = false;
4906
4907 /* The call in STMT might prevent it from being removed in dce.
4908 We however cannot remove it here, due to the way the ssa name
4909 it defines is mapped to the new definition. So just replace
4910 rhs of the statement with something harmless. */
4911
4912 if (slp_node)
4913 return true;
4914
4915 gimple *new_stmt;
4916 if (scalar_dest)
4917 {
4918 type = TREE_TYPE (scalar_dest);
4919 lhs = gimple_call_lhs (gs: vect_orig_stmt (stmt_info)->stmt);
4920 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4921 }
4922 else
4923 new_stmt = gimple_build_nop ();
4924 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4925 unlink_stmt_vdef (stmt);
4926
4927 return true;
4928}
4929
4930
4931/* Function vect_gen_widened_results_half
4932
4933 Create a vector stmt whose code, type, number of arguments, and result
4934 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4935 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4936 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4937 needs to be created (DECL is a function-decl of a target-builtin).
4938 STMT_INFO is the original scalar stmt that we are vectorizing. */
4939
4940static gimple *
4941vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4942 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4943 tree vec_dest, gimple_stmt_iterator *gsi,
4944 stmt_vec_info stmt_info)
4945{
4946 gimple *new_stmt;
4947 tree new_temp;
4948
4949 /* Generate half of the widened result: */
4950 if (op_type != binary_op)
4951 vec_oprnd1 = NULL;
4952 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4953 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4954 gimple_set_lhs (new_stmt, new_temp);
4955 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4956
4957 return new_stmt;
4958}
4959
4960
4961/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4962 For multi-step conversions store the resulting vectors and call the function
4963 recursively. When NARROW_SRC_P is true, there's still a conversion after
4964 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4965 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4966
4967static void
4968vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4969 int multi_step_cvt,
4970 stmt_vec_info stmt_info,
4971 vec<tree> &vec_dsts,
4972 gimple_stmt_iterator *gsi,
4973 slp_tree slp_node, code_helper code,
4974 bool narrow_src_p)
4975{
4976 unsigned int i;
4977 tree vop0, vop1, new_tmp, vec_dest;
4978
4979 vec_dest = vec_dsts.pop ();
4980
4981 for (i = 0; i < vec_oprnds->length (); i += 2)
4982 {
4983 /* Create demotion operation. */
4984 vop0 = (*vec_oprnds)[i];
4985 vop1 = (*vec_oprnds)[i + 1];
4986 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4987 new_tmp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4988 gimple_set_lhs (new_stmt, new_tmp);
4989 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4990 if (multi_step_cvt || narrow_src_p)
4991 /* Store the resulting vector for next recursive call,
4992 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4993 (*vec_oprnds)[i/2] = new_tmp;
4994 else
4995 {
4996 /* This is the last step of the conversion sequence. Store the
4997 vectors in SLP_NODE or in vector info of the scalar statement
4998 (or in STMT_VINFO_RELATED_STMT chain). */
4999 if (slp_node)
5000 slp_node->push_vec_def (def: new_stmt);
5001 else
5002 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5003 }
5004 }
5005
5006 /* For multi-step demotion operations we first generate demotion operations
5007 from the source type to the intermediate types, and then combine the
5008 results (stored in VEC_OPRNDS) in demotion operation to the destination
5009 type. */
5010 if (multi_step_cvt)
5011 {
5012 /* At each level of recursion we have half of the operands we had at the
5013 previous level. */
5014 vec_oprnds->truncate (size: (i+1)/2);
5015 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5016 multi_step_cvt: multi_step_cvt - 1,
5017 stmt_info, vec_dsts, gsi,
5018 slp_node, code: VEC_PACK_TRUNC_EXPR,
5019 narrow_src_p);
5020 }
5021
5022 vec_dsts.quick_push (obj: vec_dest);
5023}
5024
5025
5026/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5027 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5028 STMT_INFO. For multi-step conversions store the resulting vectors and
5029 call the function recursively. */
5030
5031static void
5032vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5033 vec<tree> *vec_oprnds0,
5034 vec<tree> *vec_oprnds1,
5035 stmt_vec_info stmt_info, tree vec_dest,
5036 gimple_stmt_iterator *gsi,
5037 code_helper ch1,
5038 code_helper ch2, int op_type)
5039{
5040 int i;
5041 tree vop0, vop1, new_tmp1, new_tmp2;
5042 gimple *new_stmt1, *new_stmt2;
5043 vec<tree> vec_tmp = vNULL;
5044
5045 vec_tmp.create (nelems: vec_oprnds0->length () * 2);
5046 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5047 {
5048 if (op_type == binary_op)
5049 vop1 = (*vec_oprnds1)[i];
5050 else
5051 vop1 = NULL_TREE;
5052
5053 /* Generate the two halves of promotion operation. */
5054 new_stmt1 = vect_gen_widened_results_half (vinfo, ch: ch1, vec_oprnd0: vop0, vec_oprnd1: vop1,
5055 op_type, vec_dest, gsi,
5056 stmt_info);
5057 new_stmt2 = vect_gen_widened_results_half (vinfo, ch: ch2, vec_oprnd0: vop0, vec_oprnd1: vop1,
5058 op_type, vec_dest, gsi,
5059 stmt_info);
5060 if (is_gimple_call (gs: new_stmt1))
5061 {
5062 new_tmp1 = gimple_call_lhs (gs: new_stmt1);
5063 new_tmp2 = gimple_call_lhs (gs: new_stmt2);
5064 }
5065 else
5066 {
5067 new_tmp1 = gimple_assign_lhs (gs: new_stmt1);
5068 new_tmp2 = gimple_assign_lhs (gs: new_stmt2);
5069 }
5070
5071 /* Store the results for the next step. */
5072 vec_tmp.quick_push (obj: new_tmp1);
5073 vec_tmp.quick_push (obj: new_tmp2);
5074 }
5075
5076 vec_oprnds0->release ();
5077 *vec_oprnds0 = vec_tmp;
5078}
5079
5080/* Create vectorized promotion stmts for widening stmts using only half the
5081 potential vector size for input. */
5082static void
5083vect_create_half_widening_stmts (vec_info *vinfo,
5084 vec<tree> *vec_oprnds0,
5085 vec<tree> *vec_oprnds1,
5086 stmt_vec_info stmt_info, tree vec_dest,
5087 gimple_stmt_iterator *gsi,
5088 code_helper code1,
5089 int op_type)
5090{
5091 int i;
5092 tree vop0, vop1;
5093 gimple *new_stmt1;
5094 gimple *new_stmt2;
5095 gimple *new_stmt3;
5096 vec<tree> vec_tmp = vNULL;
5097
5098 vec_tmp.create (nelems: vec_oprnds0->length ());
5099 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5100 {
5101 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5102
5103 gcc_assert (op_type == binary_op);
5104 vop1 = (*vec_oprnds1)[i];
5105
5106 /* Widen the first vector input. */
5107 out_type = TREE_TYPE (vec_dest);
5108 new_tmp1 = make_ssa_name (var: out_type);
5109 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5110 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt1, gsi);
5111 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5112 {
5113 /* Widen the second vector input. */
5114 new_tmp2 = make_ssa_name (var: out_type);
5115 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5116 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt2, gsi);
5117 /* Perform the operation. With both vector inputs widened. */
5118 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5119 }
5120 else
5121 {
5122 /* Perform the operation. With the single vector input widened. */
5123 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5124 }
5125
5126 new_tmp3 = make_ssa_name (var: vec_dest, stmt: new_stmt3);
5127 gimple_assign_set_lhs (gs: new_stmt3, lhs: new_tmp3);
5128 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt3, gsi);
5129
5130 /* Store the results for the next step. */
5131 vec_tmp.quick_push (obj: new_tmp3);
5132 }
5133
5134 vec_oprnds0->release ();
5135 *vec_oprnds0 = vec_tmp;
5136}
5137
5138
5139/* Check if STMT_INFO performs a conversion operation that can be vectorized.
5140 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5141 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5142 Return true if STMT_INFO is vectorizable in this way. */
5143
5144static bool
5145vectorizable_conversion (vec_info *vinfo,
5146 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5147 gimple **vec_stmt, slp_tree slp_node,
5148 stmt_vector_for_cost *cost_vec)
5149{
5150 tree vec_dest, cvt_op = NULL_TREE;
5151 tree scalar_dest;
5152 tree op0, op1 = NULL_TREE;
5153 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5154 tree_code tc1, tc2;
5155 code_helper code, code1, code2;
5156 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5157 tree new_temp;
5158 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5159 int ndts = 2;
5160 poly_uint64 nunits_in;
5161 poly_uint64 nunits_out;
5162 tree vectype_out, vectype_in;
5163 int ncopies, i;
5164 tree lhs_type, rhs_type;
5165 /* For conversions between floating point and integer, there're 2 NARROW
5166 cases. NARROW_SRC is for FLOAT_EXPR, means
5167 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5168 This is safe when the range of the source integer can fit into the lower
5169 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5170 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5171 For other conversions, when there's narrowing, NARROW_DST is used as
5172 default. */
5173 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5174 vec<tree> vec_oprnds0 = vNULL;
5175 vec<tree> vec_oprnds1 = vNULL;
5176 tree vop0;
5177 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5178 int multi_step_cvt = 0;
5179 vec<tree> interm_types = vNULL;
5180 tree intermediate_type, cvt_type = NULL_TREE;
5181 int op_type;
5182 unsigned short fltsz;
5183
5184 /* Is STMT a vectorizable conversion? */
5185
5186 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5187 return false;
5188
5189 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5190 && ! vec_stmt)
5191 return false;
5192
5193 gimple* stmt = stmt_info->stmt;
5194 if (!(is_gimple_assign (gs: stmt) || is_gimple_call (gs: stmt)))
5195 return false;
5196
5197 if (gimple_get_lhs (stmt) == NULL_TREE
5198 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5199 return false;
5200
5201 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5202 return false;
5203
5204 if (is_gimple_assign (gs: stmt))
5205 {
5206 code = gimple_assign_rhs_code (gs: stmt);
5207 op_type = TREE_CODE_LENGTH ((tree_code) code);
5208 }
5209 else if (gimple_call_internal_p (gs: stmt))
5210 {
5211 code = gimple_call_internal_fn (gs: stmt);
5212 op_type = gimple_call_num_args (gs: stmt);
5213 }
5214 else
5215 return false;
5216
5217 bool widen_arith = (code == WIDEN_MULT_EXPR
5218 || code == WIDEN_LSHIFT_EXPR
5219 || widening_fn_p (code));
5220
5221 if (!widen_arith
5222 && !CONVERT_EXPR_CODE_P (code)
5223 && code != FIX_TRUNC_EXPR
5224 && code != FLOAT_EXPR)
5225 return false;
5226
5227 /* Check types of lhs and rhs. */
5228 scalar_dest = gimple_get_lhs (stmt);
5229 lhs_type = TREE_TYPE (scalar_dest);
5230 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5231
5232 /* Check the operands of the operation. */
5233 slp_tree slp_op0, slp_op1 = NULL;
5234 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5235 0, &op0, &slp_op0, &dt[0], &vectype_in))
5236 {
5237 if (dump_enabled_p ())
5238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5239 "use not simple.\n");
5240 return false;
5241 }
5242
5243 rhs_type = TREE_TYPE (op0);
5244 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5245 && !((INTEGRAL_TYPE_P (lhs_type)
5246 && INTEGRAL_TYPE_P (rhs_type))
5247 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5248 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5249 return false;
5250
5251 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5252 && ((INTEGRAL_TYPE_P (lhs_type)
5253 && !type_has_mode_precision_p (t: lhs_type))
5254 || (INTEGRAL_TYPE_P (rhs_type)
5255 && !type_has_mode_precision_p (t: rhs_type))))
5256 {
5257 if (dump_enabled_p ())
5258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5259 "type conversion to/from bit-precision unsupported."
5260 "\n");
5261 return false;
5262 }
5263
5264 if (op_type == binary_op)
5265 {
5266 gcc_assert (code == WIDEN_MULT_EXPR
5267 || code == WIDEN_LSHIFT_EXPR
5268 || widening_fn_p (code));
5269
5270 op1 = is_gimple_assign (gs: stmt) ? gimple_assign_rhs2 (gs: stmt) :
5271 gimple_call_arg (gs: stmt, index: 0);
5272 tree vectype1_in;
5273 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5274 &op1, &slp_op1, &dt[1], &vectype1_in))
5275 {
5276 if (dump_enabled_p ())
5277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278 "use not simple.\n");
5279 return false;
5280 }
5281 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5282 OP1. */
5283 if (!vectype_in)
5284 vectype_in = vectype1_in;
5285 }
5286
5287 /* If op0 is an external or constant def, infer the vector type
5288 from the scalar type. */
5289 if (!vectype_in)
5290 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5291 if (vec_stmt)
5292 gcc_assert (vectype_in);
5293 if (!vectype_in)
5294 {
5295 if (dump_enabled_p ())
5296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5297 "no vectype for scalar type %T\n", rhs_type);
5298
5299 return false;
5300 }
5301
5302 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5303 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5304 {
5305 if (dump_enabled_p ())
5306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5307 "can't convert between boolean and non "
5308 "boolean vectors %T\n", rhs_type);
5309
5310 return false;
5311 }
5312
5313 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
5314 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
5315 if (known_eq (nunits_out, nunits_in))
5316 if (widen_arith)
5317 modifier = WIDEN;
5318 else
5319 modifier = NONE;
5320 else if (multiple_p (a: nunits_out, b: nunits_in))
5321 modifier = NARROW_DST;
5322 else
5323 {
5324 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5325 modifier = WIDEN;
5326 }
5327
5328 /* Multiple types in SLP are handled by creating the appropriate number of
5329 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5330 case of SLP. */
5331 if (slp_node)
5332 ncopies = 1;
5333 else if (modifier == NARROW_DST)
5334 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
5335 else
5336 ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
5337
5338 /* Sanity check: make sure that at least one copy of the vectorized stmt
5339 needs to be generated. */
5340 gcc_assert (ncopies >= 1);
5341
5342 bool found_mode = false;
5343 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5344 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5345 opt_scalar_mode rhs_mode_iter;
5346
5347 /* Supportable by target? */
5348 switch (modifier)
5349 {
5350 case NONE:
5351 if (code != FIX_TRUNC_EXPR
5352 && code != FLOAT_EXPR
5353 && !CONVERT_EXPR_CODE_P (code))
5354 return false;
5355 gcc_assert (code.is_tree_code ());
5356 if (supportable_convert_operation ((tree_code) code, vectype_out,
5357 vectype_in, &tc1))
5358 {
5359 code1 = tc1;
5360 break;
5361 }
5362
5363 /* For conversions between float and integer types try whether
5364 we can use intermediate signed integer types to support the
5365 conversion. */
5366 if (GET_MODE_SIZE (mode: lhs_mode) != GET_MODE_SIZE (mode: rhs_mode)
5367 && (code == FLOAT_EXPR ||
5368 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5369 {
5370 bool demotion = GET_MODE_SIZE (mode: rhs_mode) > GET_MODE_SIZE (mode: lhs_mode);
5371 bool float_expr_p = code == FLOAT_EXPR;
5372 unsigned short target_size;
5373 scalar_mode intermediate_mode;
5374 if (demotion)
5375 {
5376 intermediate_mode = lhs_mode;
5377 target_size = GET_MODE_SIZE (mode: rhs_mode);
5378 }
5379 else
5380 {
5381 target_size = GET_MODE_SIZE (mode: lhs_mode);
5382 if (!int_mode_for_size
5383 (size: GET_MODE_BITSIZE (mode: rhs_mode), limit: 0).exists (mode: &intermediate_mode))
5384 goto unsupported;
5385 }
5386 code1 = float_expr_p ? code : NOP_EXPR;
5387 codecvt1 = float_expr_p ? NOP_EXPR : code;
5388 opt_scalar_mode mode_iter;
5389 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5390 {
5391 intermediate_mode = mode_iter.require ();
5392
5393 if (GET_MODE_SIZE (mode: intermediate_mode) > target_size)
5394 break;
5395
5396 scalar_mode cvt_mode;
5397 if (!int_mode_for_size
5398 (size: GET_MODE_BITSIZE (mode: intermediate_mode), limit: 0).exists (mode: &cvt_mode))
5399 break;
5400
5401 cvt_type = build_nonstandard_integer_type
5402 (GET_MODE_BITSIZE (mode: cvt_mode), 0);
5403
5404 /* Check if the intermediate type can hold OP0's range.
5405 When converting from float to integer this is not necessary
5406 because values that do not fit the (smaller) target type are
5407 unspecified anyway. */
5408 if (demotion && float_expr_p)
5409 {
5410 wide_int op_min_value, op_max_value;
5411 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5412 break;
5413
5414 if (cvt_type == NULL_TREE
5415 || (wi::min_precision (x: op_max_value, sgn: SIGNED)
5416 > TYPE_PRECISION (cvt_type))
5417 || (wi::min_precision (x: op_min_value, sgn: SIGNED)
5418 > TYPE_PRECISION (cvt_type)))
5419 continue;
5420 }
5421
5422 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5423 /* This should only happened for SLP as long as loop vectorizer
5424 only supports same-sized vector. */
5425 if (cvt_type == NULL_TREE
5426 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: cvt_type), b: nunits_in)
5427 || !supportable_convert_operation ((tree_code) code1,
5428 vectype_out,
5429 cvt_type, &tc1)
5430 || !supportable_convert_operation ((tree_code) codecvt1,
5431 cvt_type,
5432 vectype_in, &tc2))
5433 continue;
5434
5435 found_mode = true;
5436 break;
5437 }
5438
5439 if (found_mode)
5440 {
5441 multi_step_cvt++;
5442 interm_types.safe_push (obj: cvt_type);
5443 cvt_type = NULL_TREE;
5444 code1 = tc1;
5445 codecvt1 = tc2;
5446 break;
5447 }
5448 }
5449 /* FALLTHRU */
5450 unsupported:
5451 if (dump_enabled_p ())
5452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5453 "conversion not supported by target.\n");
5454 return false;
5455
5456 case WIDEN:
5457 if (known_eq (nunits_in, nunits_out))
5458 {
5459 if (!(code.is_tree_code ()
5460 && supportable_half_widening_operation ((tree_code) code,
5461 vectype_out, vectype_in,
5462 &tc1)))
5463 goto unsupported;
5464 code1 = tc1;
5465 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5466 break;
5467 }
5468 if (supportable_widening_operation (vinfo, code, stmt_info,
5469 vectype_out, vectype_in, &code1,
5470 &code2, &multi_step_cvt,
5471 &interm_types))
5472 {
5473 /* Binary widening operation can only be supported directly by the
5474 architecture. */
5475 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5476 break;
5477 }
5478
5479 if (code != FLOAT_EXPR
5480 || GET_MODE_SIZE (mode: lhs_mode) <= GET_MODE_SIZE (mode: rhs_mode))
5481 goto unsupported;
5482
5483 fltsz = GET_MODE_SIZE (mode: lhs_mode);
5484 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5485 {
5486 rhs_mode = rhs_mode_iter.require ();
5487 if (GET_MODE_SIZE (mode: rhs_mode) > fltsz)
5488 break;
5489
5490 cvt_type
5491 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), 0);
5492 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5493 if (cvt_type == NULL_TREE)
5494 goto unsupported;
5495
5496 if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5497 {
5498 tc1 = ERROR_MARK;
5499 gcc_assert (code.is_tree_code ());
5500 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5501 cvt_type, &tc1))
5502 goto unsupported;
5503 codecvt1 = tc1;
5504 }
5505 else if (!supportable_widening_operation (vinfo, code,
5506 stmt_info, vectype_out,
5507 cvt_type, &codecvt1,
5508 &codecvt2, &multi_step_cvt,
5509 &interm_types))
5510 continue;
5511 else
5512 gcc_assert (multi_step_cvt == 0);
5513
5514 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5515 cvt_type,
5516 vectype_in, &code1,
5517 &code2, &multi_step_cvt,
5518 &interm_types))
5519 {
5520 found_mode = true;
5521 break;
5522 }
5523 }
5524
5525 if (!found_mode)
5526 goto unsupported;
5527
5528 if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5529 codecvt2 = ERROR_MARK;
5530 else
5531 {
5532 multi_step_cvt++;
5533 interm_types.safe_push (obj: cvt_type);
5534 cvt_type = NULL_TREE;
5535 }
5536 break;
5537
5538 case NARROW_DST:
5539 gcc_assert (op_type == unary_op);
5540 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5541 &code1, &multi_step_cvt,
5542 &interm_types))
5543 break;
5544
5545 if (GET_MODE_SIZE (mode: lhs_mode) >= GET_MODE_SIZE (mode: rhs_mode))
5546 goto unsupported;
5547
5548 if (code == FIX_TRUNC_EXPR)
5549 {
5550 cvt_type
5551 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), 0);
5552 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5553 if (cvt_type == NULL_TREE)
5554 goto unsupported;
5555 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5556 &tc1))
5557 codecvt1 = tc1;
5558 else
5559 goto unsupported;
5560 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5561 &code1, &multi_step_cvt,
5562 &interm_types))
5563 break;
5564 }
5565 /* If op0 can be represented with low precision integer,
5566 truncate it to cvt_type and the do FLOAT_EXPR. */
5567 else if (code == FLOAT_EXPR)
5568 {
5569 wide_int op_min_value, op_max_value;
5570 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5571 goto unsupported;
5572
5573 cvt_type
5574 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: lhs_mode), 0);
5575 if (cvt_type == NULL_TREE
5576 || (wi::min_precision (x: op_max_value, sgn: SIGNED)
5577 > TYPE_PRECISION (cvt_type))
5578 || (wi::min_precision (x: op_min_value, sgn: SIGNED)
5579 > TYPE_PRECISION (cvt_type)))
5580 goto unsupported;
5581
5582 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5583 if (cvt_type == NULL_TREE)
5584 goto unsupported;
5585 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5586 &code1, &multi_step_cvt,
5587 &interm_types))
5588 goto unsupported;
5589 if (supportable_convert_operation ((tree_code) code, vectype_out,
5590 cvt_type, &tc1))
5591 {
5592 codecvt1 = tc1;
5593 modifier = NARROW_SRC;
5594 break;
5595 }
5596 }
5597
5598 goto unsupported;
5599
5600 default:
5601 gcc_unreachable ();
5602 }
5603
5604 if (!vec_stmt) /* transformation not required. */
5605 {
5606 if (slp_node
5607 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5608 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5609 {
5610 if (dump_enabled_p ())
5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5612 "incompatible vector types for invariants\n");
5613 return false;
5614 }
5615 DUMP_VECT_SCOPE ("vectorizable_conversion");
5616 if (modifier == NONE)
5617 {
5618 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5619 vect_model_simple_cost (vinfo, stmt_info,
5620 ncopies: ncopies * (1 + multi_step_cvt),
5621 dt, ndts, node: slp_node, cost_vec);
5622 }
5623 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5624 {
5625 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5626 /* The final packing step produces one vector result per copy. */
5627 unsigned int nvectors
5628 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5629 vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5630 pwr: multi_step_cvt, cost_vec,
5631 widen_arith);
5632 }
5633 else
5634 {
5635 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5636 /* The initial unpacking step produces two vector results
5637 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5638 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5639 unsigned int nvectors
5640 = (slp_node
5641 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5642 : ncopies * 2);
5643 vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5644 pwr: multi_step_cvt, cost_vec,
5645 widen_arith);
5646 }
5647 interm_types.release ();
5648 return true;
5649 }
5650
5651 /* Transform. */
5652 if (dump_enabled_p ())
5653 dump_printf_loc (MSG_NOTE, vect_location,
5654 "transform conversion. ncopies = %d.\n", ncopies);
5655
5656 if (op_type == binary_op)
5657 {
5658 if (CONSTANT_CLASS_P (op0))
5659 op0 = fold_convert (TREE_TYPE (op1), op0);
5660 else if (CONSTANT_CLASS_P (op1))
5661 op1 = fold_convert (TREE_TYPE (op0), op1);
5662 }
5663
5664 /* In case of multi-step conversion, we first generate conversion operations
5665 to the intermediate types, and then from that types to the final one.
5666 We create vector destinations for the intermediate type (TYPES) received
5667 from supportable_*_operation, and store them in the correct order
5668 for future use in vect_create_vectorized_*_stmts (). */
5669 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5670 bool widen_or_narrow_float_p
5671 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5672 vec_dest = vect_create_destination_var (scalar_dest,
5673 widen_or_narrow_float_p
5674 ? cvt_type : vectype_out);
5675 vec_dsts.quick_push (obj: vec_dest);
5676
5677 if (multi_step_cvt)
5678 {
5679 for (i = interm_types.length () - 1;
5680 interm_types.iterate (ix: i, ptr: &intermediate_type); i--)
5681 {
5682 vec_dest = vect_create_destination_var (scalar_dest,
5683 intermediate_type);
5684 vec_dsts.quick_push (obj: vec_dest);
5685 }
5686 }
5687
5688 if (cvt_type)
5689 vec_dest = vect_create_destination_var (scalar_dest,
5690 widen_or_narrow_float_p
5691 ? vectype_out : cvt_type);
5692
5693 int ninputs = 1;
5694 if (!slp_node)
5695 {
5696 if (modifier == WIDEN)
5697 ;
5698 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5699 {
5700 if (multi_step_cvt)
5701 ninputs = vect_pow2 (x: multi_step_cvt);
5702 ninputs *= 2;
5703 }
5704 }
5705
5706 switch (modifier)
5707 {
5708 case NONE:
5709 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5710 op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0);
5711 /* vec_dest is intermediate type operand when multi_step_cvt. */
5712 if (multi_step_cvt)
5713 {
5714 cvt_op = vec_dest;
5715 vec_dest = vec_dsts[0];
5716 }
5717
5718 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5719 {
5720 /* Arguments are ready, create the new vector stmt. */
5721 gimple* new_stmt;
5722 if (multi_step_cvt)
5723 {
5724 gcc_assert (multi_step_cvt == 1);
5725 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5726 new_temp = make_ssa_name (var: cvt_op, stmt: new_stmt);
5727 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5728 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5729 vop0 = new_temp;
5730 }
5731 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5732 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5733 gimple_set_lhs (new_stmt, new_temp);
5734 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5735
5736 if (slp_node)
5737 slp_node->push_vec_def (def: new_stmt);
5738 else
5739 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5740 }
5741 break;
5742
5743 case WIDEN:
5744 /* In case the vectorization factor (VF) is bigger than the number
5745 of elements that we can fit in a vectype (nunits), we have to
5746 generate more than one vector stmt - i.e - we need to "unroll"
5747 the vector stmt by a factor VF/nunits. */
5748 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5749 op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0,
5750 op1: code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5751 vectype1: vectype_in, vec_oprnds1: &vec_oprnds1);
5752 if (code == WIDEN_LSHIFT_EXPR)
5753 {
5754 int oprnds_size = vec_oprnds0.length ();
5755 vec_oprnds1.create (nelems: oprnds_size);
5756 for (i = 0; i < oprnds_size; ++i)
5757 vec_oprnds1.quick_push (obj: op1);
5758 }
5759 /* Arguments are ready. Create the new vector stmts. */
5760 for (i = multi_step_cvt; i >= 0; i--)
5761 {
5762 tree this_dest = vec_dsts[i];
5763 code_helper c1 = code1, c2 = code2;
5764 if (i == 0 && codecvt2 != ERROR_MARK)
5765 {
5766 c1 = codecvt1;
5767 c2 = codecvt2;
5768 }
5769 if (known_eq (nunits_out, nunits_in))
5770 vect_create_half_widening_stmts (vinfo, vec_oprnds0: &vec_oprnds0, vec_oprnds1: &vec_oprnds1,
5771 stmt_info, vec_dest: this_dest, gsi, code1: c1,
5772 op_type);
5773 else
5774 vect_create_vectorized_promotion_stmts (vinfo, vec_oprnds0: &vec_oprnds0,
5775 vec_oprnds1: &vec_oprnds1, stmt_info,
5776 vec_dest: this_dest, gsi,
5777 ch1: c1, ch2: c2, op_type);
5778 }
5779
5780 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5781 {
5782 gimple *new_stmt;
5783 if (cvt_type)
5784 {
5785 new_temp = make_ssa_name (var: vec_dest);
5786 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5787 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5788 }
5789 else
5790 new_stmt = SSA_NAME_DEF_STMT (vop0);
5791
5792 if (slp_node)
5793 slp_node->push_vec_def (def: new_stmt);
5794 else
5795 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5796 }
5797 break;
5798
5799 case NARROW_SRC:
5800 case NARROW_DST:
5801 /* In case the vectorization factor (VF) is bigger than the number
5802 of elements that we can fit in a vectype (nunits), we have to
5803 generate more than one vector stmt - i.e - we need to "unroll"
5804 the vector stmt by a factor VF/nunits. */
5805 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5806 op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0);
5807 /* Arguments are ready. Create the new vector stmts. */
5808 if (cvt_type && modifier == NARROW_DST)
5809 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5810 {
5811 new_temp = make_ssa_name (var: vec_dest);
5812 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5813 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5814 vec_oprnds0[i] = new_temp;
5815 }
5816
5817 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds: &vec_oprnds0,
5818 multi_step_cvt,
5819 stmt_info, vec_dsts, gsi,
5820 slp_node, code: code1,
5821 narrow_src_p: modifier == NARROW_SRC);
5822 /* After demoting op0 to cvt_type, convert it to dest. */
5823 if (cvt_type && code == FLOAT_EXPR)
5824 {
5825 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5826 {
5827 /* Arguments are ready, create the new vector stmt. */
5828 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5829 gimple *new_stmt
5830 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5831 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5832 gimple_set_lhs (new_stmt, new_temp);
5833 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5834
5835 /* This is the last step of the conversion sequence. Store the
5836 vectors in SLP_NODE or in vector info of the scalar statement
5837 (or in STMT_VINFO_RELATED_STMT chain). */
5838 if (slp_node)
5839 slp_node->push_vec_def (def: new_stmt);
5840 else
5841 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5842 }
5843 }
5844 break;
5845 }
5846 if (!slp_node)
5847 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5848
5849 vec_oprnds0.release ();
5850 vec_oprnds1.release ();
5851 interm_types.release ();
5852
5853 return true;
5854}
5855
5856/* Return true if we can assume from the scalar form of STMT_INFO that
5857 neither the scalar nor the vector forms will generate code. STMT_INFO
5858 is known not to involve a data reference. */
5859
5860bool
5861vect_nop_conversion_p (stmt_vec_info stmt_info)
5862{
5863 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
5864 if (!stmt)
5865 return false;
5866
5867 tree lhs = gimple_assign_lhs (gs: stmt);
5868 tree_code code = gimple_assign_rhs_code (gs: stmt);
5869 tree rhs = gimple_assign_rhs1 (gs: stmt);
5870
5871 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5872 return true;
5873
5874 if (CONVERT_EXPR_CODE_P (code))
5875 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5876
5877 return false;
5878}
5879
5880/* Function vectorizable_assignment.
5881
5882 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5883 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5884 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5885 Return true if STMT_INFO is vectorizable in this way. */
5886
5887static bool
5888vectorizable_assignment (vec_info *vinfo,
5889 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5890 gimple **vec_stmt, slp_tree slp_node,
5891 stmt_vector_for_cost *cost_vec)
5892{
5893 tree vec_dest;
5894 tree scalar_dest;
5895 tree op;
5896 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5897 tree new_temp;
5898 enum vect_def_type dt[1] = {vect_unknown_def_type};
5899 int ndts = 1;
5900 int ncopies;
5901 int i;
5902 vec<tree> vec_oprnds = vNULL;
5903 tree vop;
5904 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5905 enum tree_code code;
5906 tree vectype_in;
5907
5908 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5909 return false;
5910
5911 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5912 && ! vec_stmt)
5913 return false;
5914
5915 /* Is vectorizable assignment? */
5916 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
5917 if (!stmt)
5918 return false;
5919
5920 scalar_dest = gimple_assign_lhs (gs: stmt);
5921 if (TREE_CODE (scalar_dest) != SSA_NAME)
5922 return false;
5923
5924 if (STMT_VINFO_DATA_REF (stmt_info))
5925 return false;
5926
5927 code = gimple_assign_rhs_code (gs: stmt);
5928 if (!(gimple_assign_single_p (gs: stmt)
5929 || code == PAREN_EXPR
5930 || CONVERT_EXPR_CODE_P (code)))
5931 return false;
5932
5933 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5934 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
5935
5936 /* Multiple types in SLP are handled by creating the appropriate number of
5937 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5938 case of SLP. */
5939 if (slp_node)
5940 ncopies = 1;
5941 else
5942 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5943
5944 gcc_assert (ncopies >= 1);
5945
5946 slp_tree slp_op;
5947 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5948 &dt[0], &vectype_in))
5949 {
5950 if (dump_enabled_p ())
5951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5952 "use not simple.\n");
5953 return false;
5954 }
5955 if (!vectype_in)
5956 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5957
5958 /* We can handle NOP_EXPR conversions that do not change the number
5959 of elements or the vector size. */
5960 if ((CONVERT_EXPR_CODE_P (code)
5961 || code == VIEW_CONVERT_EXPR)
5962 && (!vectype_in
5963 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype_in), b: nunits)
5964 || maybe_ne (a: GET_MODE_SIZE (TYPE_MODE (vectype)),
5965 b: GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5966 return false;
5967
5968 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5969 {
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5972 "can't convert between boolean and non "
5973 "boolean vectors %T\n", TREE_TYPE (op));
5974
5975 return false;
5976 }
5977
5978 /* We do not handle bit-precision changes. */
5979 if ((CONVERT_EXPR_CODE_P (code)
5980 || code == VIEW_CONVERT_EXPR)
5981 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5982 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5983 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5984 && !type_has_mode_precision_p (TREE_TYPE (op))))
5985 /* But a conversion that does not change the bit-pattern is ok. */
5986 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5987 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5988 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5989 > TYPE_PRECISION (TREE_TYPE (op)))
5990 && TYPE_UNSIGNED (TREE_TYPE (op)))
5991 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5992 == TYPE_PRECISION (TREE_TYPE (op))))))
5993 {
5994 if (dump_enabled_p ())
5995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5996 "type conversion to/from bit-precision "
5997 "unsupported.\n");
5998 return false;
5999 }
6000
6001 if (!vec_stmt) /* transformation not required. */
6002 {
6003 if (slp_node
6004 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6005 {
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6008 "incompatible vector types for invariants\n");
6009 return false;
6010 }
6011 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6012 DUMP_VECT_SCOPE ("vectorizable_assignment");
6013 if (!vect_nop_conversion_p (stmt_info))
6014 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, node: slp_node,
6015 cost_vec);
6016 return true;
6017 }
6018
6019 /* Transform. */
6020 if (dump_enabled_p ())
6021 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6022
6023 /* Handle def. */
6024 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6025
6026 /* Handle use. */
6027 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op0: op, vec_oprnds0: &vec_oprnds);
6028
6029 /* Arguments are ready. create the new vector stmt. */
6030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6031 {
6032 if (CONVERT_EXPR_CODE_P (code)
6033 || code == VIEW_CONVERT_EXPR)
6034 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6035 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6036 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6037 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6038 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6039 if (slp_node)
6040 slp_node->push_vec_def (def: new_stmt);
6041 else
6042 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6043 }
6044 if (!slp_node)
6045 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6046
6047 vec_oprnds.release ();
6048 return true;
6049}
6050
6051
6052/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6053 either as shift by a scalar or by a vector. */
6054
6055bool
6056vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6057{
6058
6059 machine_mode vec_mode;
6060 optab optab;
6061 int icode;
6062 tree vectype;
6063
6064 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6065 if (!vectype)
6066 return false;
6067
6068 optab = optab_for_tree_code (code, vectype, optab_scalar);
6069 if (!optab
6070 || optab_handler (op: optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6071 {
6072 optab = optab_for_tree_code (code, vectype, optab_vector);
6073 if (!optab
6074 || (optab_handler (op: optab, TYPE_MODE (vectype))
6075 == CODE_FOR_nothing))
6076 return false;
6077 }
6078
6079 vec_mode = TYPE_MODE (vectype);
6080 icode = (int) optab_handler (op: optab, mode: vec_mode);
6081 if (icode == CODE_FOR_nothing)
6082 return false;
6083
6084 return true;
6085}
6086
6087
6088/* Function vectorizable_shift.
6089
6090 Check if STMT_INFO performs a shift operation that can be vectorized.
6091 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6092 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6093 Return true if STMT_INFO is vectorizable in this way. */
6094
6095static bool
6096vectorizable_shift (vec_info *vinfo,
6097 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6098 gimple **vec_stmt, slp_tree slp_node,
6099 stmt_vector_for_cost *cost_vec)
6100{
6101 tree vec_dest;
6102 tree scalar_dest;
6103 tree op0, op1 = NULL;
6104 tree vec_oprnd1 = NULL_TREE;
6105 tree vectype;
6106 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6107 enum tree_code code;
6108 machine_mode vec_mode;
6109 tree new_temp;
6110 optab optab;
6111 int icode;
6112 machine_mode optab_op2_mode;
6113 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6114 int ndts = 2;
6115 poly_uint64 nunits_in;
6116 poly_uint64 nunits_out;
6117 tree vectype_out;
6118 tree op1_vectype;
6119 int ncopies;
6120 int i;
6121 vec<tree> vec_oprnds0 = vNULL;
6122 vec<tree> vec_oprnds1 = vNULL;
6123 tree vop0, vop1;
6124 unsigned int k;
6125 bool scalar_shift_arg = true;
6126 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6127 bool incompatible_op1_vectype_p = false;
6128
6129 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6130 return false;
6131
6132 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6133 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6134 && ! vec_stmt)
6135 return false;
6136
6137 /* Is STMT a vectorizable binary/unary operation? */
6138 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
6139 if (!stmt)
6140 return false;
6141
6142 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6143 return false;
6144
6145 code = gimple_assign_rhs_code (gs: stmt);
6146
6147 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6148 || code == RROTATE_EXPR))
6149 return false;
6150
6151 scalar_dest = gimple_assign_lhs (gs: stmt);
6152 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6153 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6154 {
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "bit-precision shifts not supported.\n");
6158 return false;
6159 }
6160
6161 slp_tree slp_op0;
6162 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6163 0, &op0, &slp_op0, &dt[0], &vectype))
6164 {
6165 if (dump_enabled_p ())
6166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6167 "use not simple.\n");
6168 return false;
6169 }
6170 /* If op0 is an external or constant def, infer the vector type
6171 from the scalar type. */
6172 if (!vectype)
6173 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6174 if (vec_stmt)
6175 gcc_assert (vectype);
6176 if (!vectype)
6177 {
6178 if (dump_enabled_p ())
6179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6180 "no vectype for scalar type\n");
6181 return false;
6182 }
6183
6184 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6185 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6186 if (maybe_ne (a: nunits_out, b: nunits_in))
6187 return false;
6188
6189 stmt_vec_info op1_def_stmt_info;
6190 slp_tree slp_op1;
6191 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6192 &dt[1], &op1_vectype, &op1_def_stmt_info))
6193 {
6194 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6196 "use not simple.\n");
6197 return false;
6198 }
6199
6200 /* Multiple types in SLP are handled by creating the appropriate number of
6201 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6202 case of SLP. */
6203 if (slp_node)
6204 ncopies = 1;
6205 else
6206 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6207
6208 gcc_assert (ncopies >= 1);
6209
6210 /* Determine whether the shift amount is a vector, or scalar. If the
6211 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6212
6213 if ((dt[1] == vect_internal_def
6214 || dt[1] == vect_induction_def
6215 || dt[1] == vect_nested_cycle)
6216 && !slp_node)
6217 scalar_shift_arg = false;
6218 else if (dt[1] == vect_constant_def
6219 || dt[1] == vect_external_def
6220 || dt[1] == vect_internal_def)
6221 {
6222 /* In SLP, need to check whether the shift count is the same,
6223 in loops if it is a constant or invariant, it is always
6224 a scalar shift. */
6225 if (slp_node)
6226 {
6227 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6228 stmt_vec_info slpstmt_info;
6229
6230 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6231 {
6232 gassign *slpstmt = as_a <gassign *> (p: slpstmt_info->stmt);
6233 if (!operand_equal_p (gimple_assign_rhs2 (gs: slpstmt), op1, flags: 0))
6234 scalar_shift_arg = false;
6235 }
6236
6237 /* For internal SLP defs we have to make sure we see scalar stmts
6238 for all vector elements.
6239 ??? For different vectors we could resort to a different
6240 scalar shift operand but code-generation below simply always
6241 takes the first. */
6242 if (dt[1] == vect_internal_def
6243 && maybe_ne (a: nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6244 b: stmts.length ()))
6245 scalar_shift_arg = false;
6246 }
6247
6248 /* If the shift amount is computed by a pattern stmt we cannot
6249 use the scalar amount directly thus give up and use a vector
6250 shift. */
6251 if (op1_def_stmt_info && is_pattern_stmt_p (stmt_info: op1_def_stmt_info))
6252 scalar_shift_arg = false;
6253 }
6254 else
6255 {
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258 "operand mode requires invariant argument.\n");
6259 return false;
6260 }
6261
6262 /* Vector shifted by vector. */
6263 bool was_scalar_shift_arg = scalar_shift_arg;
6264 if (!scalar_shift_arg)
6265 {
6266 optab = optab_for_tree_code (code, vectype, optab_vector);
6267 if (dump_enabled_p ())
6268 dump_printf_loc (MSG_NOTE, vect_location,
6269 "vector/vector shift/rotate found.\n");
6270
6271 if (!op1_vectype)
6272 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6273 slp_op1);
6274 incompatible_op1_vectype_p
6275 = (op1_vectype == NULL_TREE
6276 || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: op1_vectype),
6277 b: TYPE_VECTOR_SUBPARTS (node: vectype))
6278 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6279 if (incompatible_op1_vectype_p
6280 && (!slp_node
6281 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6282 || slp_op1->refcnt != 1))
6283 {
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6286 "unusable type for last operand in"
6287 " vector/vector shift/rotate.\n");
6288 return false;
6289 }
6290 }
6291 /* See if the machine has a vector shifted by scalar insn and if not
6292 then see if it has a vector shifted by vector insn. */
6293 else
6294 {
6295 optab = optab_for_tree_code (code, vectype, optab_scalar);
6296 if (optab
6297 && optab_handler (op: optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6298 {
6299 if (dump_enabled_p ())
6300 dump_printf_loc (MSG_NOTE, vect_location,
6301 "vector/scalar shift/rotate found.\n");
6302 }
6303 else
6304 {
6305 optab = optab_for_tree_code (code, vectype, optab_vector);
6306 if (optab
6307 && (optab_handler (op: optab, TYPE_MODE (vectype))
6308 != CODE_FOR_nothing))
6309 {
6310 scalar_shift_arg = false;
6311
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_NOTE, vect_location,
6314 "vector/vector shift/rotate found.\n");
6315
6316 if (!op1_vectype)
6317 op1_vectype = get_vectype_for_scalar_type (vinfo,
6318 TREE_TYPE (op1),
6319 slp_op1);
6320
6321 /* Unlike the other binary operators, shifts/rotates have
6322 the rhs being int, instead of the same type as the lhs,
6323 so make sure the scalar is the right type if we are
6324 dealing with vectors of long long/long/short/char. */
6325 incompatible_op1_vectype_p
6326 = (!op1_vectype
6327 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6328 TREE_TYPE (op1)));
6329 if (incompatible_op1_vectype_p
6330 && dt[1] == vect_internal_def)
6331 {
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334 "unusable type for last operand in"
6335 " vector/vector shift/rotate.\n");
6336 return false;
6337 }
6338 }
6339 }
6340 }
6341
6342 /* Supportable by target? */
6343 if (!optab)
6344 {
6345 if (dump_enabled_p ())
6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6347 "no optab.\n");
6348 return false;
6349 }
6350 vec_mode = TYPE_MODE (vectype);
6351 icode = (int) optab_handler (op: optab, mode: vec_mode);
6352 if (icode == CODE_FOR_nothing)
6353 {
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356 "op not supported by target.\n");
6357 return false;
6358 }
6359 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6360 if (vect_emulated_vector_p (vectype))
6361 return false;
6362
6363 if (!vec_stmt) /* transformation not required. */
6364 {
6365 if (slp_node
6366 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6367 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6368 && (!incompatible_op1_vectype_p
6369 || dt[1] == vect_constant_def)
6370 && !vect_maybe_update_slp_op_vectype
6371 (slp_op1,
6372 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6373 {
6374 if (dump_enabled_p ())
6375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6376 "incompatible vector types for invariants\n");
6377 return false;
6378 }
6379 /* Now adjust the constant shift amount in place. */
6380 if (slp_node
6381 && incompatible_op1_vectype_p
6382 && dt[1] == vect_constant_def)
6383 {
6384 for (unsigned i = 0;
6385 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6386 {
6387 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6388 = fold_convert (TREE_TYPE (vectype),
6389 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6390 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6391 == INTEGER_CST));
6392 }
6393 }
6394 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6395 DUMP_VECT_SCOPE ("vectorizable_shift");
6396 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6397 ndts: scalar_shift_arg ? 1 : ndts, node: slp_node, cost_vec);
6398 return true;
6399 }
6400
6401 /* Transform. */
6402
6403 if (dump_enabled_p ())
6404 dump_printf_loc (MSG_NOTE, vect_location,
6405 "transform binary/unary operation.\n");
6406
6407 if (incompatible_op1_vectype_p && !slp_node)
6408 {
6409 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6410 op1 = fold_convert (TREE_TYPE (vectype), op1);
6411 if (dt[1] != vect_constant_def)
6412 op1 = vect_init_vector (vinfo, stmt_info, val: op1,
6413 TREE_TYPE (vectype), NULL);
6414 }
6415
6416 /* Handle def. */
6417 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6418
6419 if (scalar_shift_arg && dt[1] != vect_internal_def)
6420 {
6421 /* Vector shl and shr insn patterns can be defined with scalar
6422 operand 2 (shift operand). In this case, use constant or loop
6423 invariant op1 directly, without extending it to vector mode
6424 first. */
6425 optab_op2_mode = insn_data[icode].operand[2].mode;
6426 if (!VECTOR_MODE_P (optab_op2_mode))
6427 {
6428 if (dump_enabled_p ())
6429 dump_printf_loc (MSG_NOTE, vect_location,
6430 "operand 1 using scalar mode.\n");
6431 vec_oprnd1 = op1;
6432 vec_oprnds1.create (nelems: slp_node ? slp_node->vec_stmts_size : ncopies);
6433 vec_oprnds1.quick_push (obj: vec_oprnd1);
6434 /* Store vec_oprnd1 for every vector stmt to be created.
6435 We check during the analysis that all the shift arguments
6436 are the same.
6437 TODO: Allow different constants for different vector
6438 stmts generated for an SLP instance. */
6439 for (k = 0;
6440 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6441 vec_oprnds1.quick_push (obj: vec_oprnd1);
6442 }
6443 }
6444 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6445 {
6446 if (was_scalar_shift_arg)
6447 {
6448 /* If the argument was the same in all lanes create
6449 the correctly typed vector shift amount directly. */
6450 op1 = fold_convert (TREE_TYPE (vectype), op1);
6451 op1 = vect_init_vector (vinfo, stmt_info, val: op1, TREE_TYPE (vectype),
6452 gsi: !loop_vinfo ? gsi : NULL);
6453 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, val: op1, type: vectype,
6454 gsi: !loop_vinfo ? gsi : NULL);
6455 vec_oprnds1.create (nelems: slp_node->vec_stmts_size);
6456 for (k = 0; k < slp_node->vec_stmts_size; k++)
6457 vec_oprnds1.quick_push (obj: vec_oprnd1);
6458 }
6459 else if (dt[1] == vect_constant_def)
6460 /* The constant shift amount has been adjusted in place. */
6461 ;
6462 else
6463 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6464 }
6465
6466 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6467 (a special case for certain kind of vector shifts); otherwise,
6468 operand 1 should be of a vector type (the usual case). */
6469 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6470 op0, vec_oprnds0: &vec_oprnds0,
6471 op1: vec_oprnd1 ? NULL_TREE : op1, vec_oprnds1: &vec_oprnds1);
6472
6473 /* Arguments are ready. Create the new vector stmt. */
6474 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6475 {
6476 /* For internal defs where we need to use a scalar shift arg
6477 extract the first lane. */
6478 if (scalar_shift_arg && dt[1] == vect_internal_def)
6479 {
6480 vop1 = vec_oprnds1[0];
6481 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6482 gassign *new_stmt
6483 = gimple_build_assign (new_temp,
6484 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6485 vop1,
6486 TYPE_SIZE (TREE_TYPE (new_temp)),
6487 bitsize_zero_node));
6488 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6489 vop1 = new_temp;
6490 }
6491 else
6492 vop1 = vec_oprnds1[i];
6493 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6494 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6495 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6496 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6497 if (slp_node)
6498 slp_node->push_vec_def (def: new_stmt);
6499 else
6500 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6501 }
6502
6503 if (!slp_node)
6504 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6505
6506 vec_oprnds0.release ();
6507 vec_oprnds1.release ();
6508
6509 return true;
6510}
6511
6512/* Function vectorizable_operation.
6513
6514 Check if STMT_INFO performs a binary, unary or ternary operation that can
6515 be vectorized.
6516 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6517 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6518 Return true if STMT_INFO is vectorizable in this way. */
6519
6520static bool
6521vectorizable_operation (vec_info *vinfo,
6522 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6523 gimple **vec_stmt, slp_tree slp_node,
6524 stmt_vector_for_cost *cost_vec)
6525{
6526 tree vec_dest;
6527 tree scalar_dest;
6528 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6529 tree vectype;
6530 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6531 enum tree_code code, orig_code;
6532 machine_mode vec_mode;
6533 tree new_temp;
6534 int op_type;
6535 optab optab;
6536 bool target_support_p;
6537 enum vect_def_type dt[3]
6538 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6539 int ndts = 3;
6540 poly_uint64 nunits_in;
6541 poly_uint64 nunits_out;
6542 tree vectype_out;
6543 int ncopies, vec_num;
6544 int i;
6545 vec<tree> vec_oprnds0 = vNULL;
6546 vec<tree> vec_oprnds1 = vNULL;
6547 vec<tree> vec_oprnds2 = vNULL;
6548 tree vop0, vop1, vop2;
6549 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6550
6551 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6552 return false;
6553
6554 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6555 && ! vec_stmt)
6556 return false;
6557
6558 /* Is STMT a vectorizable binary/unary operation? */
6559 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
6560 if (!stmt)
6561 return false;
6562
6563 /* Loads and stores are handled in vectorizable_{load,store}. */
6564 if (STMT_VINFO_DATA_REF (stmt_info))
6565 return false;
6566
6567 orig_code = code = gimple_assign_rhs_code (gs: stmt);
6568
6569 /* Shifts are handled in vectorizable_shift. */
6570 if (code == LSHIFT_EXPR
6571 || code == RSHIFT_EXPR
6572 || code == LROTATE_EXPR
6573 || code == RROTATE_EXPR)
6574 return false;
6575
6576 /* Comparisons are handled in vectorizable_comparison. */
6577 if (TREE_CODE_CLASS (code) == tcc_comparison)
6578 return false;
6579
6580 /* Conditions are handled in vectorizable_condition. */
6581 if (code == COND_EXPR)
6582 return false;
6583
6584 /* For pointer addition and subtraction, we should use the normal
6585 plus and minus for the vector operation. */
6586 if (code == POINTER_PLUS_EXPR)
6587 code = PLUS_EXPR;
6588 if (code == POINTER_DIFF_EXPR)
6589 code = MINUS_EXPR;
6590
6591 /* Support only unary or binary operations. */
6592 op_type = TREE_CODE_LENGTH (code);
6593 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6594 {
6595 if (dump_enabled_p ())
6596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6597 "num. args = %d (not unary/binary/ternary op).\n",
6598 op_type);
6599 return false;
6600 }
6601
6602 scalar_dest = gimple_assign_lhs (gs: stmt);
6603 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6604
6605 /* Most operations cannot handle bit-precision types without extra
6606 truncations. */
6607 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6608 if (!mask_op_p
6609 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6610 /* Exception are bitwise binary operations. */
6611 && code != BIT_IOR_EXPR
6612 && code != BIT_XOR_EXPR
6613 && code != BIT_AND_EXPR)
6614 {
6615 if (dump_enabled_p ())
6616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6617 "bit-precision arithmetic not supported.\n");
6618 return false;
6619 }
6620
6621 slp_tree slp_op0;
6622 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6623 0, &op0, &slp_op0, &dt[0], &vectype))
6624 {
6625 if (dump_enabled_p ())
6626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6627 "use not simple.\n");
6628 return false;
6629 }
6630 bool is_invariant = (dt[0] == vect_external_def
6631 || dt[0] == vect_constant_def);
6632 /* If op0 is an external or constant def, infer the vector type
6633 from the scalar type. */
6634 if (!vectype)
6635 {
6636 /* For boolean type we cannot determine vectype by
6637 invariant value (don't know whether it is a vector
6638 of booleans or vector of integers). We use output
6639 vectype because operations on boolean don't change
6640 type. */
6641 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6642 {
6643 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6644 {
6645 if (dump_enabled_p ())
6646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6647 "not supported operation on bool value.\n");
6648 return false;
6649 }
6650 vectype = vectype_out;
6651 }
6652 else
6653 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6654 slp_node);
6655 }
6656 if (vec_stmt)
6657 gcc_assert (vectype);
6658 if (!vectype)
6659 {
6660 if (dump_enabled_p ())
6661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6662 "no vectype for scalar type %T\n",
6663 TREE_TYPE (op0));
6664
6665 return false;
6666 }
6667
6668 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6669 nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6670 if (maybe_ne (a: nunits_out, b: nunits_in)
6671 || !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6672 return false;
6673
6674 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6675 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6676 if (op_type == binary_op || op_type == ternary_op)
6677 {
6678 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6679 1, &op1, &slp_op1, &dt[1], &vectype2))
6680 {
6681 if (dump_enabled_p ())
6682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6683 "use not simple.\n");
6684 return false;
6685 }
6686 is_invariant &= (dt[1] == vect_external_def
6687 || dt[1] == vect_constant_def);
6688 if (vectype2
6689 && (maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype2))
6690 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6691 TREE_TYPE (vectype2))))
6692 return false;
6693 }
6694 if (op_type == ternary_op)
6695 {
6696 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6697 2, &op2, &slp_op2, &dt[2], &vectype3))
6698 {
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 "use not simple.\n");
6702 return false;
6703 }
6704 is_invariant &= (dt[2] == vect_external_def
6705 || dt[2] == vect_constant_def);
6706 if (vectype3
6707 && (maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype3))
6708 || !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6709 TREE_TYPE (vectype3))))
6710 return false;
6711 }
6712
6713 /* Multiple types in SLP are handled by creating the appropriate number of
6714 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6715 case of SLP. */
6716 if (slp_node)
6717 {
6718 ncopies = 1;
6719 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6720 }
6721 else
6722 {
6723 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6724 vec_num = 1;
6725 }
6726
6727 gcc_assert (ncopies >= 1);
6728
6729 /* Reject attempts to combine mask types with nonmask types, e.g. if
6730 we have an AND between a (nonmask) boolean loaded from memory and
6731 a (mask) boolean result of a comparison.
6732
6733 TODO: We could easily fix these cases up using pattern statements. */
6734 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6735 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6736 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6737 {
6738 if (dump_enabled_p ())
6739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6740 "mixed mask and nonmask vector types\n");
6741 return false;
6742 }
6743
6744 /* Supportable by target? */
6745
6746 vec_mode = TYPE_MODE (vectype);
6747 if (code == MULT_HIGHPART_EXPR)
6748 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6749 else
6750 {
6751 optab = optab_for_tree_code (code, vectype, optab_default);
6752 if (!optab)
6753 {
6754 if (dump_enabled_p ())
6755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756 "no optab.\n");
6757 return false;
6758 }
6759 target_support_p = (optab_handler (op: optab, mode: vec_mode) != CODE_FOR_nothing
6760 || optab_libfunc (optab, vec_mode));
6761 }
6762
6763 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6764 if (!target_support_p || using_emulated_vectors_p)
6765 {
6766 if (dump_enabled_p ())
6767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6768 "op not supported by target.\n");
6769 /* When vec_mode is not a vector mode and we verified ops we
6770 do not have to lower like AND are natively supported let
6771 those through even when the mode isn't word_mode. For
6772 ops we have to lower the lowering code assumes we are
6773 dealing with word_mode. */
6774 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6775 || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6776 || !target_support_p)
6777 && maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD))
6778 /* Check only during analysis. */
6779 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6780 {
6781 if (dump_enabled_p ())
6782 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6783 return false;
6784 }
6785 if (dump_enabled_p ())
6786 dump_printf_loc (MSG_NOTE, vect_location,
6787 "proceeding using word mode.\n");
6788 using_emulated_vectors_p = true;
6789 }
6790
6791 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6792 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6793 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6794 internal_fn cond_fn = get_conditional_internal_fn (code);
6795 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6796
6797 /* If operating on inactive elements could generate spurious traps,
6798 we need to restrict the operation to active lanes. Note that this
6799 specifically doesn't apply to unhoisted invariants, since they
6800 operate on the same value for every lane.
6801
6802 Similarly, if this operation is part of a reduction, a fully-masked
6803 loop should only change the active lanes of the reduction chain,
6804 keeping the inactive lanes as-is. */
6805 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6806 || reduc_idx >= 0);
6807
6808 if (!vec_stmt) /* transformation not required. */
6809 {
6810 if (loop_vinfo
6811 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6812 && mask_out_inactive)
6813 {
6814 if (cond_len_fn != IFN_LAST
6815 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6816 OPTIMIZE_FOR_SPEED))
6817 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6818 1);
6819 else if (cond_fn != IFN_LAST
6820 && direct_internal_fn_supported_p (cond_fn, vectype,
6821 OPTIMIZE_FOR_SPEED))
6822 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6823 vectype, NULL);
6824 else
6825 {
6826 if (dump_enabled_p ())
6827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6828 "can't use a fully-masked loop because no"
6829 " conditional operation is available.\n");
6830 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6831 }
6832 }
6833
6834 /* Put types on constant and invariant SLP children. */
6835 if (slp_node
6836 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6837 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6838 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6839 {
6840 if (dump_enabled_p ())
6841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6842 "incompatible vector types for invariants\n");
6843 return false;
6844 }
6845
6846 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6847 DUMP_VECT_SCOPE ("vectorizable_operation");
6848 vect_model_simple_cost (vinfo, stmt_info,
6849 ncopies, dt, ndts, node: slp_node, cost_vec);
6850 if (using_emulated_vectors_p)
6851 {
6852 /* The above vect_model_simple_cost call handles constants
6853 in the prologue and (mis-)costs one of the stmts as
6854 vector stmt. See below for the actual lowering that will
6855 be applied. */
6856 unsigned n
6857 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6858 switch (code)
6859 {
6860 case PLUS_EXPR:
6861 n *= 5;
6862 break;
6863 case MINUS_EXPR:
6864 n *= 6;
6865 break;
6866 case NEGATE_EXPR:
6867 n *= 4;
6868 break;
6869 default:
6870 /* Bit operations do not have extra cost and are accounted
6871 as vector stmt by vect_model_simple_cost. */
6872 n = 0;
6873 break;
6874 }
6875 if (n != 0)
6876 {
6877 /* We also need to materialize two large constants. */
6878 record_stmt_cost (body_cost_vec: cost_vec, count: 2, kind: scalar_stmt, stmt_info,
6879 misalign: 0, where: vect_prologue);
6880 record_stmt_cost (body_cost_vec: cost_vec, count: n, kind: scalar_stmt, stmt_info,
6881 misalign: 0, where: vect_body);
6882 }
6883 }
6884 return true;
6885 }
6886
6887 /* Transform. */
6888
6889 if (dump_enabled_p ())
6890 dump_printf_loc (MSG_NOTE, vect_location,
6891 "transform binary/unary operation.\n");
6892
6893 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6894 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6895
6896 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6897 vectors with unsigned elements, but the result is signed. So, we
6898 need to compute the MINUS_EXPR into vectype temporary and
6899 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6900 tree vec_cvt_dest = NULL_TREE;
6901 if (orig_code == POINTER_DIFF_EXPR)
6902 {
6903 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6904 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6905 }
6906 /* Handle def. */
6907 else
6908 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6909
6910 /* In case the vectorization factor (VF) is bigger than the number
6911 of elements that we can fit in a vectype (nunits), we have to generate
6912 more than one vector stmt - i.e - we need to "unroll" the
6913 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6914 from one copy of the vector stmt to the next, in the field
6915 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6916 stages to find the correct vector defs to be used when vectorizing
6917 stmts that use the defs of the current stmt. The example below
6918 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6919 we need to create 4 vectorized stmts):
6920
6921 before vectorization:
6922 RELATED_STMT VEC_STMT
6923 S1: x = memref - -
6924 S2: z = x + 1 - -
6925
6926 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6927 there):
6928 RELATED_STMT VEC_STMT
6929 VS1_0: vx0 = memref0 VS1_1 -
6930 VS1_1: vx1 = memref1 VS1_2 -
6931 VS1_2: vx2 = memref2 VS1_3 -
6932 VS1_3: vx3 = memref3 - -
6933 S1: x = load - VS1_0
6934 S2: z = x + 1 - -
6935
6936 step2: vectorize stmt S2 (done here):
6937 To vectorize stmt S2 we first need to find the relevant vector
6938 def for the first operand 'x'. This is, as usual, obtained from
6939 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6940 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6941 relevant vector def 'vx0'. Having found 'vx0' we can generate
6942 the vector stmt VS2_0, and as usual, record it in the
6943 STMT_VINFO_VEC_STMT of stmt S2.
6944 When creating the second copy (VS2_1), we obtain the relevant vector
6945 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6946 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6947 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6948 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6949 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6950 chain of stmts and pointers:
6951 RELATED_STMT VEC_STMT
6952 VS1_0: vx0 = memref0 VS1_1 -
6953 VS1_1: vx1 = memref1 VS1_2 -
6954 VS1_2: vx2 = memref2 VS1_3 -
6955 VS1_3: vx3 = memref3 - -
6956 S1: x = load - VS1_0
6957 VS2_0: vz0 = vx0 + v1 VS2_1 -
6958 VS2_1: vz1 = vx1 + v1 VS2_2 -
6959 VS2_2: vz2 = vx2 + v1 VS2_3 -
6960 VS2_3: vz3 = vx3 + v1 - -
6961 S2: z = x + 1 - VS2_0 */
6962
6963 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6964 op0, vec_oprnds0: &vec_oprnds0, op1, vec_oprnds1: &vec_oprnds1, op2, vec_oprnds2: &vec_oprnds2);
6965 /* Arguments are ready. Create the new vector stmt. */
6966 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6967 {
6968 gimple *new_stmt = NULL;
6969 vop1 = ((op_type == binary_op || op_type == ternary_op)
6970 ? vec_oprnds1[i] : NULL_TREE);
6971 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6972 if (using_emulated_vectors_p
6973 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6974 {
6975 /* Lower the operation. This follows vector lowering. */
6976 unsigned int width = vector_element_bits (vectype);
6977 tree inner_type = TREE_TYPE (vectype);
6978 tree word_type
6979 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: word_mode), 1);
6980 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6981 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6982 tree high_bits
6983 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6984 tree wvop0 = make_ssa_name (var: word_type);
6985 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6986 build1 (VIEW_CONVERT_EXPR,
6987 word_type, vop0));
6988 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6989 tree result_low, signs;
6990 if (code == PLUS_EXPR || code == MINUS_EXPR)
6991 {
6992 tree wvop1 = make_ssa_name (var: word_type);
6993 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6994 build1 (VIEW_CONVERT_EXPR,
6995 word_type, vop1));
6996 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6997 signs = make_ssa_name (var: word_type);
6998 new_stmt = gimple_build_assign (signs,
6999 BIT_XOR_EXPR, wvop0, wvop1);
7000 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7001 tree b_low = make_ssa_name (var: word_type);
7002 new_stmt = gimple_build_assign (b_low,
7003 BIT_AND_EXPR, wvop1, low_bits);
7004 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7005 tree a_low = make_ssa_name (var: word_type);
7006 if (code == PLUS_EXPR)
7007 new_stmt = gimple_build_assign (a_low,
7008 BIT_AND_EXPR, wvop0, low_bits);
7009 else
7010 new_stmt = gimple_build_assign (a_low,
7011 BIT_IOR_EXPR, wvop0, high_bits);
7012 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7013 if (code == MINUS_EXPR)
7014 {
7015 new_stmt = gimple_build_assign (NULL_TREE,
7016 BIT_NOT_EXPR, signs);
7017 signs = make_ssa_name (var: word_type);
7018 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7019 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7020 }
7021 new_stmt = gimple_build_assign (NULL_TREE,
7022 BIT_AND_EXPR, signs, high_bits);
7023 signs = make_ssa_name (var: word_type);
7024 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7025 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7026 result_low = make_ssa_name (var: word_type);
7027 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7028 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7029 }
7030 else
7031 {
7032 tree a_low = make_ssa_name (var: word_type);
7033 new_stmt = gimple_build_assign (a_low,
7034 BIT_AND_EXPR, wvop0, low_bits);
7035 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7036 signs = make_ssa_name (var: word_type);
7037 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7038 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7039 new_stmt = gimple_build_assign (NULL_TREE,
7040 BIT_AND_EXPR, signs, high_bits);
7041 signs = make_ssa_name (var: word_type);
7042 gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7043 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7044 result_low = make_ssa_name (var: word_type);
7045 new_stmt = gimple_build_assign (result_low,
7046 MINUS_EXPR, high_bits, a_low);
7047 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7048 }
7049 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7050 signs);
7051 result_low = make_ssa_name (var: word_type);
7052 gimple_assign_set_lhs (gs: new_stmt, lhs: result_low);
7053 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7054 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7055 build1 (VIEW_CONVERT_EXPR,
7056 vectype, result_low));
7057 new_temp = make_ssa_name (var: vectype);
7058 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7059 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7060 }
7061 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7062 {
7063 tree mask;
7064 if (masked_loop_p)
7065 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7066 vec_num * ncopies, vectype, i);
7067 else
7068 /* Dummy mask. */
7069 mask = build_minus_one_cst (truth_type_for (vectype));
7070 auto_vec<tree> vops (6);
7071 vops.quick_push (obj: mask);
7072 vops.quick_push (obj: vop0);
7073 if (vop1)
7074 vops.quick_push (obj: vop1);
7075 if (vop2)
7076 vops.quick_push (obj: vop2);
7077 if (reduc_idx >= 0)
7078 {
7079 /* Perform the operation on active elements only and take
7080 inactive elements from the reduction chain input. */
7081 gcc_assert (!vop2);
7082 vops.quick_push (obj: reduc_idx == 1 ? vop1 : vop0);
7083 }
7084 else
7085 {
7086 auto else_value = targetm.preferred_else_value
7087 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7088 vops.quick_push (obj: else_value);
7089 }
7090 if (len_loop_p)
7091 {
7092 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7093 vec_num * ncopies, vectype, i, 1);
7094 signed char biasval
7095 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7096 tree bias = build_int_cst (intQI_type_node, biasval);
7097 vops.quick_push (obj: len);
7098 vops.quick_push (obj: bias);
7099 }
7100 gcall *call
7101 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7102 : cond_len_fn,
7103 vops);
7104 new_temp = make_ssa_name (var: vec_dest, stmt: call);
7105 gimple_call_set_lhs (gs: call, lhs: new_temp);
7106 gimple_call_set_nothrow (s: call, nothrow_p: true);
7107 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
7108 new_stmt = call;
7109 }
7110 else
7111 {
7112 tree mask = NULL_TREE;
7113 /* When combining two masks check if either of them is elsewhere
7114 combined with a loop mask, if that's the case we can mark that the
7115 new combined mask doesn't need to be combined with a loop mask. */
7116 if (masked_loop_p
7117 && code == BIT_AND_EXPR
7118 && VECTOR_BOOLEAN_TYPE_P (vectype))
7119 {
7120 if (loop_vinfo->scalar_cond_masked_set.contains (k: { op0,
7121 ncopies}))
7122 {
7123 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7124 vec_num * ncopies, vectype, i);
7125
7126 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7127 vec_mask: vop0, gsi);
7128 }
7129
7130 if (loop_vinfo->scalar_cond_masked_set.contains (k: { op1,
7131 ncopies }))
7132 {
7133 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7134 vec_num * ncopies, vectype, i);
7135
7136 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7137 vec_mask: vop1, gsi);
7138 }
7139 }
7140
7141 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7142 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
7143 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7144 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7145 if (using_emulated_vectors_p)
7146 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7147
7148 /* Enter the combined value into the vector cond hash so we don't
7149 AND it with a loop mask again. */
7150 if (mask)
7151 loop_vinfo->vec_cond_masked_set.add (k: { new_temp, mask });
7152 }
7153
7154 if (vec_cvt_dest)
7155 {
7156 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7157 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7158 new_temp);
7159 new_temp = make_ssa_name (var: vec_cvt_dest, stmt: new_stmt);
7160 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7161 vect_finish_stmt_generation (vinfo, stmt_info,
7162 vec_stmt: new_stmt, gsi);
7163 }
7164
7165 if (slp_node)
7166 slp_node->push_vec_def (def: new_stmt);
7167 else
7168 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7169 }
7170
7171 if (!slp_node)
7172 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7173
7174 vec_oprnds0.release ();
7175 vec_oprnds1.release ();
7176 vec_oprnds2.release ();
7177
7178 return true;
7179}
7180
7181/* A helper function to ensure data reference DR_INFO's base alignment. */
7182
7183static void
7184ensure_base_align (dr_vec_info *dr_info)
7185{
7186 /* Alignment is only analyzed for the first element of a DR group,
7187 use that to look at base alignment we need to enforce. */
7188 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7189 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7190
7191 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7192
7193 if (dr_info->base_misaligned)
7194 {
7195 tree base_decl = dr_info->base_decl;
7196
7197 // We should only be able to increase the alignment of a base object if
7198 // we know what its new alignment should be at compile time.
7199 unsigned HOST_WIDE_INT align_base_to =
7200 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7201
7202 if (decl_in_symtab_p (decl: base_decl))
7203 symtab_node::get (decl: base_decl)->increase_alignment (align: align_base_to);
7204 else if (DECL_ALIGN (base_decl) < align_base_to)
7205 {
7206 SET_DECL_ALIGN (base_decl, align_base_to);
7207 DECL_USER_ALIGN (base_decl) = 1;
7208 }
7209 dr_info->base_misaligned = false;
7210 }
7211}
7212
7213
7214/* Function get_group_alias_ptr_type.
7215
7216 Return the alias type for the group starting at FIRST_STMT_INFO. */
7217
7218static tree
7219get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7220{
7221 struct data_reference *first_dr, *next_dr;
7222
7223 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7224 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7225 while (next_stmt_info)
7226 {
7227 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7228 if (get_alias_set (DR_REF (first_dr))
7229 != get_alias_set (DR_REF (next_dr)))
7230 {
7231 if (dump_enabled_p ())
7232 dump_printf_loc (MSG_NOTE, vect_location,
7233 "conflicting alias set types.\n");
7234 return ptr_type_node;
7235 }
7236 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7237 }
7238 return reference_alias_ptr_type (DR_REF (first_dr));
7239}
7240
7241
7242/* Function scan_operand_equal_p.
7243
7244 Helper function for check_scan_store. Compare two references
7245 with .GOMP_SIMD_LANE bases. */
7246
7247static bool
7248scan_operand_equal_p (tree ref1, tree ref2)
7249{
7250 tree ref[2] = { ref1, ref2 };
7251 poly_int64 bitsize[2], bitpos[2];
7252 tree offset[2], base[2];
7253 for (int i = 0; i < 2; ++i)
7254 {
7255 machine_mode mode;
7256 int unsignedp, reversep, volatilep = 0;
7257 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7258 &offset[i], &mode, &unsignedp,
7259 &reversep, &volatilep);
7260 if (reversep || volatilep || maybe_ne (a: bitpos[i], b: 0))
7261 return false;
7262 if (TREE_CODE (base[i]) == MEM_REF
7263 && offset[i] == NULL_TREE
7264 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7265 {
7266 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7267 if (is_gimple_assign (gs: def_stmt)
7268 && gimple_assign_rhs_code (gs: def_stmt) == POINTER_PLUS_EXPR
7269 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7270 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7271 {
7272 if (maybe_ne (a: mem_ref_offset (base[i]), b: 0))
7273 return false;
7274 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7275 offset[i] = gimple_assign_rhs2 (gs: def_stmt);
7276 }
7277 }
7278 }
7279
7280 if (!operand_equal_p (base[0], base[1], flags: 0))
7281 return false;
7282 if (maybe_ne (a: bitsize[0], b: bitsize[1]))
7283 return false;
7284 if (offset[0] != offset[1])
7285 {
7286 if (!offset[0] || !offset[1])
7287 return false;
7288 if (!operand_equal_p (offset[0], offset[1], flags: 0))
7289 {
7290 tree step[2];
7291 for (int i = 0; i < 2; ++i)
7292 {
7293 step[i] = integer_one_node;
7294 if (TREE_CODE (offset[i]) == SSA_NAME)
7295 {
7296 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7297 if (is_gimple_assign (gs: def_stmt)
7298 && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
7299 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7300 == INTEGER_CST))
7301 {
7302 step[i] = gimple_assign_rhs2 (gs: def_stmt);
7303 offset[i] = gimple_assign_rhs1 (gs: def_stmt);
7304 }
7305 }
7306 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7307 {
7308 step[i] = TREE_OPERAND (offset[i], 1);
7309 offset[i] = TREE_OPERAND (offset[i], 0);
7310 }
7311 tree rhs1 = NULL_TREE;
7312 if (TREE_CODE (offset[i]) == SSA_NAME)
7313 {
7314 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7315 if (gimple_assign_cast_p (s: def_stmt))
7316 rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7317 }
7318 else if (CONVERT_EXPR_P (offset[i]))
7319 rhs1 = TREE_OPERAND (offset[i], 0);
7320 if (rhs1
7321 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7322 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7323 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7324 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7325 offset[i] = rhs1;
7326 }
7327 if (!operand_equal_p (offset[0], offset[1], flags: 0)
7328 || !operand_equal_p (step[0], step[1], flags: 0))
7329 return false;
7330 }
7331 }
7332 return true;
7333}
7334
7335
7336enum scan_store_kind {
7337 /* Normal permutation. */
7338 scan_store_kind_perm,
7339
7340 /* Whole vector left shift permutation with zero init. */
7341 scan_store_kind_lshift_zero,
7342
7343 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7344 scan_store_kind_lshift_cond
7345};
7346
7347/* Function check_scan_store.
7348
7349 Verify if we can perform the needed permutations or whole vector shifts.
7350 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7351 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7352 to do at each step. */
7353
7354static int
7355scan_store_can_perm_p (tree vectype, tree init,
7356 vec<enum scan_store_kind> *use_whole_vector = NULL)
7357{
7358 enum machine_mode vec_mode = TYPE_MODE (vectype);
7359 unsigned HOST_WIDE_INT nunits;
7360 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7361 return -1;
7362 int units_log2 = exact_log2 (x: nunits);
7363 if (units_log2 <= 0)
7364 return -1;
7365
7366 int i;
7367 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7368 for (i = 0; i <= units_log2; ++i)
7369 {
7370 unsigned HOST_WIDE_INT j, k;
7371 enum scan_store_kind kind = scan_store_kind_perm;
7372 vec_perm_builder sel (nunits, nunits, 1);
7373 sel.quick_grow (len: nunits);
7374 if (i == units_log2)
7375 {
7376 for (j = 0; j < nunits; ++j)
7377 sel[j] = nunits - 1;
7378 }
7379 else
7380 {
7381 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7382 sel[j] = j;
7383 for (k = 0; j < nunits; ++j, ++k)
7384 sel[j] = nunits + k;
7385 }
7386 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7387 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7388 {
7389 if (i == units_log2)
7390 return -1;
7391
7392 if (whole_vector_shift_kind == scan_store_kind_perm)
7393 {
7394 if (optab_handler (op: vec_shl_optab, mode: vec_mode) == CODE_FOR_nothing)
7395 return -1;
7396 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7397 /* Whole vector shifts shift in zeros, so if init is all zero
7398 constant, there is no need to do anything further. */
7399 if ((TREE_CODE (init) != INTEGER_CST
7400 && TREE_CODE (init) != REAL_CST)
7401 || !initializer_zerop (init))
7402 {
7403 tree masktype = truth_type_for (vectype);
7404 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7405 return -1;
7406 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7407 }
7408 }
7409 kind = whole_vector_shift_kind;
7410 }
7411 if (use_whole_vector)
7412 {
7413 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7414 use_whole_vector->safe_grow_cleared (len: i, exact: true);
7415 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7416 use_whole_vector->safe_push (obj: kind);
7417 }
7418 }
7419
7420 return units_log2;
7421}
7422
7423
7424/* Function check_scan_store.
7425
7426 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7427
7428static bool
7429check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7430 enum vect_def_type rhs_dt, bool slp, tree mask,
7431 vect_memory_access_type memory_access_type)
7432{
7433 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7434 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7435 tree ref_type;
7436
7437 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7438 if (slp
7439 || mask
7440 || memory_access_type != VMAT_CONTIGUOUS
7441 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7442 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7443 || loop_vinfo == NULL
7444 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7445 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7446 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7447 || !integer_zerop (DR_INIT (dr_info->dr))
7448 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7449 || !alias_sets_conflict_p (get_alias_set (vectype),
7450 get_alias_set (TREE_TYPE (ref_type))))
7451 {
7452 if (dump_enabled_p ())
7453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7454 "unsupported OpenMP scan store.\n");
7455 return false;
7456 }
7457
7458 /* We need to pattern match code built by OpenMP lowering and simplified
7459 by following optimizations into something we can handle.
7460 #pragma omp simd reduction(inscan,+:r)
7461 for (...)
7462 {
7463 r += something ();
7464 #pragma omp scan inclusive (r)
7465 use (r);
7466 }
7467 shall have body with:
7468 // Initialization for input phase, store the reduction initializer:
7469 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7470 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7471 D.2042[_21] = 0;
7472 // Actual input phase:
7473 ...
7474 r.0_5 = D.2042[_20];
7475 _6 = _4 + r.0_5;
7476 D.2042[_20] = _6;
7477 // Initialization for scan phase:
7478 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7479 _26 = D.2043[_25];
7480 _27 = D.2042[_25];
7481 _28 = _26 + _27;
7482 D.2043[_25] = _28;
7483 D.2042[_25] = _28;
7484 // Actual scan phase:
7485 ...
7486 r.1_8 = D.2042[_20];
7487 ...
7488 The "omp simd array" variable D.2042 holds the privatized copy used
7489 inside of the loop and D.2043 is another one that holds copies of
7490 the current original list item. The separate GOMP_SIMD_LANE ifn
7491 kinds are there in order to allow optimizing the initializer store
7492 and combiner sequence, e.g. if it is originally some C++ish user
7493 defined reduction, but allow the vectorizer to pattern recognize it
7494 and turn into the appropriate vectorized scan.
7495
7496 For exclusive scan, this is slightly different:
7497 #pragma omp simd reduction(inscan,+:r)
7498 for (...)
7499 {
7500 use (r);
7501 #pragma omp scan exclusive (r)
7502 r += something ();
7503 }
7504 shall have body with:
7505 // Initialization for input phase, store the reduction initializer:
7506 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7507 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7508 D.2042[_21] = 0;
7509 // Actual input phase:
7510 ...
7511 r.0_5 = D.2042[_20];
7512 _6 = _4 + r.0_5;
7513 D.2042[_20] = _6;
7514 // Initialization for scan phase:
7515 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7516 _26 = D.2043[_25];
7517 D.2044[_25] = _26;
7518 _27 = D.2042[_25];
7519 _28 = _26 + _27;
7520 D.2043[_25] = _28;
7521 // Actual scan phase:
7522 ...
7523 r.1_8 = D.2044[_20];
7524 ... */
7525
7526 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7527 {
7528 /* Match the D.2042[_21] = 0; store above. Just require that
7529 it is a constant or external definition store. */
7530 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7531 {
7532 fail_init:
7533 if (dump_enabled_p ())
7534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7535 "unsupported OpenMP scan initializer store.\n");
7536 return false;
7537 }
7538
7539 if (! loop_vinfo->scan_map)
7540 loop_vinfo->scan_map = new hash_map<tree, tree>;
7541 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7542 tree &cached = loop_vinfo->scan_map->get_or_insert (k: var);
7543 if (cached)
7544 goto fail_init;
7545 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7546
7547 /* These stores can be vectorized normally. */
7548 return true;
7549 }
7550
7551 if (rhs_dt != vect_internal_def)
7552 {
7553 fail:
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556 "unsupported OpenMP scan combiner pattern.\n");
7557 return false;
7558 }
7559
7560 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7561 tree rhs = gimple_assign_rhs1 (gs: stmt);
7562 if (TREE_CODE (rhs) != SSA_NAME)
7563 goto fail;
7564
7565 gimple *other_store_stmt = NULL;
7566 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7567 bool inscan_var_store
7568 = lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7569
7570 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7571 {
7572 if (!inscan_var_store)
7573 {
7574 use_operand_p use_p;
7575 imm_use_iterator iter;
7576 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7577 {
7578 gimple *use_stmt = USE_STMT (use_p);
7579 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7580 continue;
7581 if (gimple_bb (g: use_stmt) != gimple_bb (g: stmt)
7582 || !is_gimple_assign (gs: use_stmt)
7583 || gimple_assign_rhs_class (gs: use_stmt) != GIMPLE_BINARY_RHS
7584 || other_store_stmt
7585 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7586 goto fail;
7587 other_store_stmt = use_stmt;
7588 }
7589 if (other_store_stmt == NULL)
7590 goto fail;
7591 rhs = gimple_assign_lhs (gs: other_store_stmt);
7592 if (!single_imm_use (var: rhs, use_p: &use_p, stmt: &other_store_stmt))
7593 goto fail;
7594 }
7595 }
7596 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7597 {
7598 use_operand_p use_p;
7599 imm_use_iterator iter;
7600 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7601 {
7602 gimple *use_stmt = USE_STMT (use_p);
7603 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7604 continue;
7605 if (other_store_stmt)
7606 goto fail;
7607 other_store_stmt = use_stmt;
7608 }
7609 }
7610 else
7611 goto fail;
7612
7613 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7614 if (gimple_bb (g: def_stmt) != gimple_bb (g: stmt)
7615 || !is_gimple_assign (gs: def_stmt)
7616 || gimple_assign_rhs_class (gs: def_stmt) != GIMPLE_BINARY_RHS)
7617 goto fail;
7618
7619 enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7620 /* For pointer addition, we should use the normal plus for the vector
7621 operation. */
7622 switch (code)
7623 {
7624 case POINTER_PLUS_EXPR:
7625 code = PLUS_EXPR;
7626 break;
7627 case MULT_HIGHPART_EXPR:
7628 goto fail;
7629 default:
7630 break;
7631 }
7632 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7633 goto fail;
7634
7635 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7636 tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7637 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7638 goto fail;
7639
7640 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7641 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7642 if (gimple_bb (g: load1_stmt) != gimple_bb (g: stmt)
7643 || !gimple_assign_load_p (load1_stmt)
7644 || gimple_bb (g: load2_stmt) != gimple_bb (g: stmt)
7645 || !gimple_assign_load_p (load2_stmt))
7646 goto fail;
7647
7648 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7649 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7650 if (load1_stmt_info == NULL
7651 || load2_stmt_info == NULL
7652 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7653 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7654 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7655 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7656 goto fail;
7657
7658 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7659 {
7660 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7661 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7662 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7663 goto fail;
7664 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7665 tree lrhs;
7666 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7667 lrhs = rhs1;
7668 else
7669 lrhs = rhs2;
7670 use_operand_p use_p;
7671 imm_use_iterator iter;
7672 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7673 {
7674 gimple *use_stmt = USE_STMT (use_p);
7675 if (use_stmt == def_stmt || is_gimple_debug (gs: use_stmt))
7676 continue;
7677 if (other_store_stmt)
7678 goto fail;
7679 other_store_stmt = use_stmt;
7680 }
7681 }
7682
7683 if (other_store_stmt == NULL)
7684 goto fail;
7685 if (gimple_bb (g: other_store_stmt) != gimple_bb (g: stmt)
7686 || !gimple_store_p (gs: other_store_stmt))
7687 goto fail;
7688
7689 stmt_vec_info other_store_stmt_info
7690 = loop_vinfo->lookup_stmt (other_store_stmt);
7691 if (other_store_stmt_info == NULL
7692 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7693 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7694 goto fail;
7695
7696 gimple *stmt1 = stmt;
7697 gimple *stmt2 = other_store_stmt;
7698 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7699 std::swap (a&: stmt1, b&: stmt2);
7700 if (scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7701 ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7702 {
7703 std::swap (a&: rhs1, b&: rhs2);
7704 std::swap (a&: load1_stmt, b&: load2_stmt);
7705 std::swap (a&: load1_stmt_info, b&: load2_stmt_info);
7706 }
7707 if (!scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7708 ref2: gimple_assign_rhs1 (gs: load1_stmt)))
7709 goto fail;
7710
7711 tree var3 = NULL_TREE;
7712 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7713 && !scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt2),
7714 ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7715 goto fail;
7716 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7717 {
7718 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7719 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7720 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7721 goto fail;
7722 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7723 if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var3))
7724 || lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var3))
7725 || lookup_attribute (attr_name: "omp simd inscan exclusive",
7726 DECL_ATTRIBUTES (var3)))
7727 goto fail;
7728 }
7729
7730 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7731 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7732 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7733 goto fail;
7734
7735 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7736 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7737 if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var1))
7738 || !lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var2))
7739 || (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7740 == (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var2))))
7741 goto fail;
7742
7743 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7744 std::swap (a&: var1, b&: var2);
7745
7746 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7747 {
7748 if (!lookup_attribute (attr_name: "omp simd inscan exclusive",
7749 DECL_ATTRIBUTES (var1)))
7750 goto fail;
7751 var1 = var3;
7752 }
7753
7754 if (loop_vinfo->scan_map == NULL)
7755 goto fail;
7756 tree *init = loop_vinfo->scan_map->get (k: var1);
7757 if (init == NULL)
7758 goto fail;
7759
7760 /* The IL is as expected, now check if we can actually vectorize it.
7761 Inclusive scan:
7762 _26 = D.2043[_25];
7763 _27 = D.2042[_25];
7764 _28 = _26 + _27;
7765 D.2043[_25] = _28;
7766 D.2042[_25] = _28;
7767 should be vectorized as (where _40 is the vectorized rhs
7768 from the D.2042[_21] = 0; store):
7769 _30 = MEM <vector(8) int> [(int *)&D.2043];
7770 _31 = MEM <vector(8) int> [(int *)&D.2042];
7771 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7772 _33 = _31 + _32;
7773 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7774 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7775 _35 = _33 + _34;
7776 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7777 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7778 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7779 _37 = _35 + _36;
7780 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7781 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7782 _38 = _30 + _37;
7783 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7784 MEM <vector(8) int> [(int *)&D.2043] = _39;
7785 MEM <vector(8) int> [(int *)&D.2042] = _38;
7786 Exclusive scan:
7787 _26 = D.2043[_25];
7788 D.2044[_25] = _26;
7789 _27 = D.2042[_25];
7790 _28 = _26 + _27;
7791 D.2043[_25] = _28;
7792 should be vectorized as (where _40 is the vectorized rhs
7793 from the D.2042[_21] = 0; store):
7794 _30 = MEM <vector(8) int> [(int *)&D.2043];
7795 _31 = MEM <vector(8) int> [(int *)&D.2042];
7796 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7797 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7798 _34 = _32 + _33;
7799 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7800 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7801 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7802 _36 = _34 + _35;
7803 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7804 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7805 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7806 _38 = _36 + _37;
7807 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7808 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7809 _39 = _30 + _38;
7810 _50 = _31 + _39;
7811 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7812 MEM <vector(8) int> [(int *)&D.2044] = _39;
7813 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7814 enum machine_mode vec_mode = TYPE_MODE (vectype);
7815 optab optab = optab_for_tree_code (code, vectype, optab_default);
7816 if (!optab || optab_handler (op: optab, mode: vec_mode) == CODE_FOR_nothing)
7817 goto fail;
7818
7819 int units_log2 = scan_store_can_perm_p (vectype, init: *init);
7820 if (units_log2 == -1)
7821 goto fail;
7822
7823 return true;
7824}
7825
7826
7827/* Function vectorizable_scan_store.
7828
7829 Helper of vectorizable_score, arguments like on vectorizable_store.
7830 Handle only the transformation, checking is done in check_scan_store. */
7831
7832static bool
7833vectorizable_scan_store (vec_info *vinfo,
7834 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7835 gimple **vec_stmt, int ncopies)
7836{
7837 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7838 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7839 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7841
7842 if (dump_enabled_p ())
7843 dump_printf_loc (MSG_NOTE, vect_location,
7844 "transform scan store. ncopies = %d\n", ncopies);
7845
7846 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7847 tree rhs = gimple_assign_rhs1 (gs: stmt);
7848 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7849
7850 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7851 bool inscan_var_store
7852 = lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7853
7854 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7855 {
7856 use_operand_p use_p;
7857 imm_use_iterator iter;
7858 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7859 {
7860 gimple *use_stmt = USE_STMT (use_p);
7861 if (use_stmt == stmt || is_gimple_debug (gs: use_stmt))
7862 continue;
7863 rhs = gimple_assign_lhs (gs: use_stmt);
7864 break;
7865 }
7866 }
7867
7868 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7869 enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7870 if (code == POINTER_PLUS_EXPR)
7871 code = PLUS_EXPR;
7872 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7873 && commutative_tree_code (code));
7874 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7875 tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7876 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7877 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7878 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7879 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7880 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7881 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7882 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7883 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7884 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7885
7886 if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7887 {
7888 std::swap (a&: rhs1, b&: rhs2);
7889 std::swap (a&: var1, b&: var2);
7890 std::swap (a&: load1_dr_info, b&: load2_dr_info);
7891 }
7892
7893 tree *init = loop_vinfo->scan_map->get (k: var1);
7894 gcc_assert (init);
7895
7896 unsigned HOST_WIDE_INT nunits;
7897 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7898 gcc_unreachable ();
7899 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7900 int units_log2 = scan_store_can_perm_p (vectype, init: *init, use_whole_vector: &use_whole_vector);
7901 gcc_assert (units_log2 > 0);
7902 auto_vec<tree, 16> perms;
7903 perms.quick_grow (len: units_log2 + 1);
7904 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7905 for (int i = 0; i <= units_log2; ++i)
7906 {
7907 unsigned HOST_WIDE_INT j, k;
7908 vec_perm_builder sel (nunits, nunits, 1);
7909 sel.quick_grow (len: nunits);
7910 if (i == units_log2)
7911 for (j = 0; j < nunits; ++j)
7912 sel[j] = nunits - 1;
7913 else
7914 {
7915 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7916 sel[j] = j;
7917 for (k = 0; j < nunits; ++j, ++k)
7918 sel[j] = nunits + k;
7919 }
7920 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7921 if (!use_whole_vector.is_empty ()
7922 && use_whole_vector[i] != scan_store_kind_perm)
7923 {
7924 if (zero_vec == NULL_TREE)
7925 zero_vec = build_zero_cst (vectype);
7926 if (masktype == NULL_TREE
7927 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7928 masktype = truth_type_for (vectype);
7929 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7930 }
7931 else
7932 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7933 }
7934
7935 tree vec_oprnd1 = NULL_TREE;
7936 tree vec_oprnd2 = NULL_TREE;
7937 tree vec_oprnd3 = NULL_TREE;
7938 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7939 tree dataref_offset = build_int_cst (ref_type, 0);
7940 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7941 aggr_type: vectype, memory_access_type: VMAT_CONTIGUOUS);
7942 tree ldataref_ptr = NULL_TREE;
7943 tree orig = NULL_TREE;
7944 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7945 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7946 auto_vec<tree> vec_oprnds1;
7947 auto_vec<tree> vec_oprnds2;
7948 auto_vec<tree> vec_oprnds3;
7949 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7950 op0: *init, vec_oprnds0: &vec_oprnds1,
7951 op1: ldataref_ptr == NULL ? rhs1 : NULL, vec_oprnds1: &vec_oprnds2,
7952 op2: rhs2, vec_oprnds2: &vec_oprnds3);
7953 for (int j = 0; j < ncopies; j++)
7954 {
7955 vec_oprnd1 = vec_oprnds1[j];
7956 if (ldataref_ptr == NULL)
7957 vec_oprnd2 = vec_oprnds2[j];
7958 vec_oprnd3 = vec_oprnds3[j];
7959 if (j == 0)
7960 orig = vec_oprnd3;
7961 else if (!inscan_var_store)
7962 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7963
7964 if (ldataref_ptr)
7965 {
7966 vec_oprnd2 = make_ssa_name (var: vectype);
7967 tree data_ref = fold_build2 (MEM_REF, vectype,
7968 unshare_expr (ldataref_ptr),
7969 dataref_offset);
7970 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7971 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7972 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7973 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7974 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7975 }
7976
7977 tree v = vec_oprnd2;
7978 for (int i = 0; i < units_log2; ++i)
7979 {
7980 tree new_temp = make_ssa_name (var: vectype);
7981 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7982 (zero_vec
7983 && (use_whole_vector[i]
7984 != scan_store_kind_perm))
7985 ? zero_vec : vec_oprnd1, v,
7986 perms[i]);
7987 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7988 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7989 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7990
7991 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7992 {
7993 /* Whole vector shift shifted in zero bits, but if *init
7994 is not initializer_zerop, we need to replace those elements
7995 with elements from vec_oprnd1. */
7996 tree_vector_builder vb (masktype, nunits, 1);
7997 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7998 vb.quick_push (obj: k < (HOST_WIDE_INT_1U << i)
7999 ? boolean_false_node : boolean_true_node);
8000
8001 tree new_temp2 = make_ssa_name (var: vectype);
8002 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8003 new_temp, vec_oprnd1);
8004 vect_finish_stmt_generation (vinfo, stmt_info,
8005 vec_stmt: g, gsi);
8006 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8007 new_temp = new_temp2;
8008 }
8009
8010 /* For exclusive scan, perform the perms[i] permutation once
8011 more. */
8012 if (i == 0
8013 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8014 && v == vec_oprnd2)
8015 {
8016 v = new_temp;
8017 --i;
8018 continue;
8019 }
8020
8021 tree new_temp2 = make_ssa_name (var: vectype);
8022 g = gimple_build_assign (new_temp2, code, v, new_temp);
8023 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8024 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8025
8026 v = new_temp2;
8027 }
8028
8029 tree new_temp = make_ssa_name (var: vectype);
8030 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8031 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8032 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8033
8034 tree last_perm_arg = new_temp;
8035 /* For exclusive scan, new_temp computed above is the exclusive scan
8036 prefix sum. Turn it into inclusive prefix sum for the broadcast
8037 of the last element into orig. */
8038 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8039 {
8040 last_perm_arg = make_ssa_name (var: vectype);
8041 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8042 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8043 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8044 }
8045
8046 orig = make_ssa_name (var: vectype);
8047 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8048 last_perm_arg, perms[units_log2]);
8049 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8050 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8051
8052 if (!inscan_var_store)
8053 {
8054 tree data_ref = fold_build2 (MEM_REF, vectype,
8055 unshare_expr (dataref_ptr),
8056 dataref_offset);
8057 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8058 g = gimple_build_assign (data_ref, new_temp);
8059 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8060 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8061 }
8062 }
8063
8064 if (inscan_var_store)
8065 for (int j = 0; j < ncopies; j++)
8066 {
8067 if (j != 0)
8068 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8069
8070 tree data_ref = fold_build2 (MEM_REF, vectype,
8071 unshare_expr (dataref_ptr),
8072 dataref_offset);
8073 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8074 gimple *g = gimple_build_assign (data_ref, orig);
8075 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8076 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8077 }
8078 return true;
8079}
8080
8081
8082/* Function vectorizable_store.
8083
8084 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8085 that can be vectorized.
8086 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8087 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8088 Return true if STMT_INFO is vectorizable in this way. */
8089
8090static bool
8091vectorizable_store (vec_info *vinfo,
8092 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8093 gimple **vec_stmt, slp_tree slp_node,
8094 stmt_vector_for_cost *cost_vec)
8095{
8096 tree data_ref;
8097 tree vec_oprnd = NULL_TREE;
8098 tree elem_type;
8099 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
8100 class loop *loop = NULL;
8101 machine_mode vec_mode;
8102 tree dummy;
8103 enum vect_def_type rhs_dt = vect_unknown_def_type;
8104 enum vect_def_type mask_dt = vect_unknown_def_type;
8105 tree dataref_ptr = NULL_TREE;
8106 tree dataref_offset = NULL_TREE;
8107 gimple *ptr_incr = NULL;
8108 int ncopies;
8109 int j;
8110 stmt_vec_info first_stmt_info;
8111 bool grouped_store;
8112 unsigned int group_size, i;
8113 bool slp = (slp_node != NULL);
8114 unsigned int vec_num;
8115 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
8116 tree aggr_type;
8117 gather_scatter_info gs_info;
8118 poly_uint64 vf;
8119 vec_load_store_type vls_type;
8120 tree ref_type;
8121
8122 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8123 return false;
8124
8125 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8126 && ! vec_stmt)
8127 return false;
8128
8129 /* Is vectorizable store? */
8130
8131 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8132 slp_tree mask_node = NULL;
8133 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
8134 {
8135 tree scalar_dest = gimple_assign_lhs (gs: assign);
8136 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8137 && is_pattern_stmt_p (stmt_info))
8138 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8139 if (TREE_CODE (scalar_dest) != ARRAY_REF
8140 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8141 && TREE_CODE (scalar_dest) != INDIRECT_REF
8142 && TREE_CODE (scalar_dest) != COMPONENT_REF
8143 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8144 && TREE_CODE (scalar_dest) != REALPART_EXPR
8145 && TREE_CODE (scalar_dest) != MEM_REF)
8146 return false;
8147 }
8148 else
8149 {
8150 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
8151 if (!call || !gimple_call_internal_p (gs: call))
8152 return false;
8153
8154 internal_fn ifn = gimple_call_internal_fn (gs: call);
8155 if (!internal_store_fn_p (ifn))
8156 return false;
8157
8158 int mask_index = internal_fn_mask_index (ifn);
8159 if (mask_index >= 0 && slp_node)
8160 mask_index = vect_slp_child_index_for_operand
8161 (call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8162 if (mask_index >= 0
8163 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8164 mask: &mask, mask_node: &mask_node, mask_dt_out: &mask_dt,
8165 mask_vectype_out: &mask_vectype))
8166 return false;
8167 }
8168
8169 /* Cannot have hybrid store SLP -- that would mean storing to the
8170 same location twice. */
8171 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8172
8173 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8174 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
8175
8176 if (loop_vinfo)
8177 {
8178 loop = LOOP_VINFO_LOOP (loop_vinfo);
8179 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8180 }
8181 else
8182 vf = 1;
8183
8184 /* Multiple types in SLP are handled by creating the appropriate number of
8185 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8186 case of SLP. */
8187 if (slp)
8188 ncopies = 1;
8189 else
8190 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8191
8192 gcc_assert (ncopies >= 1);
8193
8194 /* FORNOW. This restriction should be relaxed. */
8195 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8196 {
8197 if (dump_enabled_p ())
8198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8199 "multiple types in nested loop.\n");
8200 return false;
8201 }
8202
8203 tree op;
8204 slp_tree op_node;
8205 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8206 rhs: &op, rhs_node: &op_node, rhs_dt_out: &rhs_dt, rhs_vectype_out: &rhs_vectype, vls_type_out: &vls_type))
8207 return false;
8208
8209 elem_type = TREE_TYPE (vectype);
8210 vec_mode = TYPE_MODE (vectype);
8211
8212 if (!STMT_VINFO_DATA_REF (stmt_info))
8213 return false;
8214
8215 vect_memory_access_type memory_access_type;
8216 enum dr_alignment_support alignment_support_scheme;
8217 int misalignment;
8218 poly_int64 poffset;
8219 internal_fn lanes_ifn;
8220 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type,
8221 ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
8222 alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
8223 lanes_ifn: &lanes_ifn))
8224 return false;
8225
8226 if (mask)
8227 {
8228 if (memory_access_type == VMAT_CONTIGUOUS)
8229 {
8230 if (!VECTOR_MODE_P (vec_mode)
8231 || !can_vec_mask_load_store_p (vec_mode,
8232 TYPE_MODE (mask_vectype), false))
8233 return false;
8234 }
8235 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8236 && (memory_access_type != VMAT_GATHER_SCATTER
8237 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8238 {
8239 if (dump_enabled_p ())
8240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8241 "unsupported access type for masked store.\n");
8242 return false;
8243 }
8244 else if (memory_access_type == VMAT_GATHER_SCATTER
8245 && gs_info.ifn == IFN_LAST
8246 && !gs_info.decl)
8247 {
8248 if (dump_enabled_p ())
8249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250 "unsupported masked emulated scatter.\n");
8251 return false;
8252 }
8253 }
8254 else
8255 {
8256 /* FORNOW. In some cases can vectorize even if data-type not supported
8257 (e.g. - array initialization with 0). */
8258 if (optab_handler (op: mov_optab, mode: vec_mode) == CODE_FOR_nothing)
8259 return false;
8260 }
8261
8262 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8263 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8264 && memory_access_type != VMAT_GATHER_SCATTER
8265 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8266 if (grouped_store)
8267 {
8268 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8269 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8270 group_size = DR_GROUP_SIZE (first_stmt_info);
8271 }
8272 else
8273 {
8274 first_stmt_info = stmt_info;
8275 first_dr_info = dr_info;
8276 group_size = vec_num = 1;
8277 }
8278
8279 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8280 {
8281 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8282 memory_access_type))
8283 return false;
8284 }
8285
8286 bool costing_p = !vec_stmt;
8287 if (costing_p) /* transformation not required. */
8288 {
8289 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8290
8291 if (loop_vinfo
8292 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8293 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8294 vls_type, group_size,
8295 memory_access_type, gs_info: &gs_info,
8296 scalar_mask: mask);
8297
8298 if (slp_node
8299 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8300 || (mask
8301 && !vect_maybe_update_slp_op_vectype (mask_node,
8302 mask_vectype))))
8303 {
8304 if (dump_enabled_p ())
8305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8306 "incompatible vector types for invariants\n");
8307 return false;
8308 }
8309
8310 if (dump_enabled_p ()
8311 && memory_access_type != VMAT_ELEMENTWISE
8312 && memory_access_type != VMAT_GATHER_SCATTER
8313 && alignment_support_scheme != dr_aligned)
8314 dump_printf_loc (MSG_NOTE, vect_location,
8315 "Vectorizing an unaligned access.\n");
8316
8317 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8318
8319 /* As function vect_transform_stmt shows, for interleaving stores
8320 the whole chain is vectorized when the last store in the chain
8321 is reached, the other stores in the group are skipped. So we
8322 want to only cost the last one here, but it's not trivial to
8323 get the last, as it's equivalent to use the first one for
8324 costing, use the first one instead. */
8325 if (grouped_store
8326 && !slp
8327 && first_stmt_info != stmt_info)
8328 return true;
8329 }
8330 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8331
8332 /* Transform. */
8333
8334 ensure_base_align (dr_info);
8335
8336 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8337 {
8338 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8339 gcc_assert (!slp);
8340 if (costing_p)
8341 {
8342 unsigned int inside_cost = 0, prologue_cost = 0;
8343 if (vls_type == VLS_STORE_INVARIANT)
8344 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
8345 stmt_info, misalign: 0, where: vect_prologue);
8346 vect_get_store_cost (vinfo, stmt_info, ncopies,
8347 alignment_support_scheme, misalignment,
8348 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8349
8350 if (dump_enabled_p ())
8351 dump_printf_loc (MSG_NOTE, vect_location,
8352 "vect_model_store_cost: inside_cost = %d, "
8353 "prologue_cost = %d .\n",
8354 inside_cost, prologue_cost);
8355
8356 return true;
8357 }
8358 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8359 }
8360
8361 if (grouped_store)
8362 {
8363 /* FORNOW */
8364 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8365
8366 if (slp)
8367 {
8368 grouped_store = false;
8369 /* VEC_NUM is the number of vect stmts to be created for this
8370 group. */
8371 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8372 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8373 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8374 == first_stmt_info);
8375 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8376 op = vect_get_store_rhs (stmt_info: first_stmt_info);
8377 }
8378 else
8379 /* VEC_NUM is the number of vect stmts to be created for this
8380 group. */
8381 vec_num = group_size;
8382
8383 ref_type = get_group_alias_ptr_type (first_stmt_info);
8384 }
8385 else
8386 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8387
8388 if (!costing_p && dump_enabled_p ())
8389 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8390 ncopies);
8391
8392 /* Check if we need to update prologue cost for invariant,
8393 and update it accordingly if so. If it's not for
8394 interleaving store, we can just check vls_type; but if
8395 it's for interleaving store, need to check the def_type
8396 of the stored value since the current vls_type is just
8397 for first_stmt_info. */
8398 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8399 {
8400 gcc_assert (costing_p);
8401 if (slp)
8402 return;
8403 if (grouped_store)
8404 {
8405 gcc_assert (store_rhs);
8406 enum vect_def_type cdt;
8407 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8408 if (cdt != vect_constant_def && cdt != vect_external_def)
8409 return;
8410 }
8411 else if (vls_type != VLS_STORE_INVARIANT)
8412 return;
8413 *prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec, stmt_info,
8414 misalign: 0, where: vect_prologue);
8415 };
8416
8417 if (memory_access_type == VMAT_ELEMENTWISE
8418 || memory_access_type == VMAT_STRIDED_SLP)
8419 {
8420 unsigned inside_cost = 0, prologue_cost = 0;
8421 gimple_stmt_iterator incr_gsi;
8422 bool insert_after;
8423 gimple *incr;
8424 tree offvar;
8425 tree ivstep;
8426 tree running_off;
8427 tree stride_base, stride_step, alias_off;
8428 tree vec_oprnd = NULL_TREE;
8429 tree dr_offset;
8430 unsigned int g;
8431 /* Checked by get_load_store_type. */
8432 unsigned int const_nunits = nunits.to_constant ();
8433
8434 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8435 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8436
8437 dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
8438 stride_base
8439 = fold_build_pointer_plus
8440 (DR_BASE_ADDRESS (first_dr_info->dr),
8441 size_binop (PLUS_EXPR,
8442 convert_to_ptrofftype (dr_offset),
8443 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8444 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8445
8446 /* For a store with loop-invariant (but other than power-of-2)
8447 stride (i.e. not a grouped access) like so:
8448
8449 for (i = 0; i < n; i += stride)
8450 array[i] = ...;
8451
8452 we generate a new induction variable and new stores from
8453 the components of the (vectorized) rhs:
8454
8455 for (j = 0; ; j += VF*stride)
8456 vectemp = ...;
8457 tmp1 = vectemp[0];
8458 array[j] = tmp1;
8459 tmp2 = vectemp[1];
8460 array[j + stride] = tmp2;
8461 ...
8462 */
8463
8464 unsigned nstores = const_nunits;
8465 unsigned lnel = 1;
8466 tree ltype = elem_type;
8467 tree lvectype = vectype;
8468 if (slp)
8469 {
8470 if (group_size < const_nunits
8471 && const_nunits % group_size == 0)
8472 {
8473 nstores = const_nunits / group_size;
8474 lnel = group_size;
8475 ltype = build_vector_type (elem_type, group_size);
8476 lvectype = vectype;
8477
8478 /* First check if vec_extract optab doesn't support extraction
8479 of vector elts directly. */
8480 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8481 machine_mode vmode;
8482 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8483 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8484 group_size).exists (mode: &vmode)
8485 || (convert_optab_handler (op: vec_extract_optab,
8486 TYPE_MODE (vectype), from_mode: vmode)
8487 == CODE_FOR_nothing))
8488 {
8489 /* Try to avoid emitting an extract of vector elements
8490 by performing the extracts using an integer type of the
8491 same size, extracting from a vector of those and then
8492 re-interpreting it as the original vector type if
8493 supported. */
8494 unsigned lsize
8495 = group_size * GET_MODE_BITSIZE (mode: elmode);
8496 unsigned int lnunits = const_nunits / group_size;
8497 /* If we can't construct such a vector fall back to
8498 element extracts from the original vector type and
8499 element size stores. */
8500 if (int_mode_for_size (size: lsize, limit: 0).exists (mode: &elmode)
8501 && VECTOR_MODE_P (TYPE_MODE (vectype))
8502 && related_vector_mode (TYPE_MODE (vectype), elmode,
8503 lnunits).exists (mode: &vmode)
8504 && (convert_optab_handler (op: vec_extract_optab,
8505 to_mode: vmode, from_mode: elmode)
8506 != CODE_FOR_nothing))
8507 {
8508 nstores = lnunits;
8509 lnel = group_size;
8510 ltype = build_nonstandard_integer_type (lsize, 1);
8511 lvectype = build_vector_type (ltype, nstores);
8512 }
8513 /* Else fall back to vector extraction anyway.
8514 Fewer stores are more important than avoiding spilling
8515 of the vector we extract from. Compared to the
8516 construction case in vectorizable_load no store-forwarding
8517 issue exists here for reasonable archs. */
8518 }
8519 }
8520 else if (group_size >= const_nunits
8521 && group_size % const_nunits == 0)
8522 {
8523 int mis_align = dr_misalignment (dr_info: first_dr_info, vectype);
8524 dr_alignment_support dr_align
8525 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8526 mis_align);
8527 if (dr_align == dr_aligned
8528 || dr_align == dr_unaligned_supported)
8529 {
8530 nstores = 1;
8531 lnel = const_nunits;
8532 ltype = vectype;
8533 lvectype = vectype;
8534 alignment_support_scheme = dr_align;
8535 misalignment = mis_align;
8536 }
8537 }
8538 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8539 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8540 }
8541
8542 if (!costing_p)
8543 {
8544 ivstep = stride_step;
8545 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8546 build_int_cst (TREE_TYPE (ivstep), vf));
8547
8548 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8549
8550 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8551 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8552 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8553 insert_after, &offvar, NULL);
8554 incr = gsi_stmt (i: incr_gsi);
8555
8556 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8557 }
8558
8559 alias_off = build_int_cst (ref_type, 0);
8560 stmt_vec_info next_stmt_info = first_stmt_info;
8561 auto_vec<tree> vec_oprnds;
8562 /* For costing some adjacent vector stores, we'd like to cost with
8563 the total number of them once instead of cost each one by one. */
8564 unsigned int n_adjacent_stores = 0;
8565 for (g = 0; g < group_size; g++)
8566 {
8567 running_off = offvar;
8568 if (!costing_p)
8569 {
8570 if (g)
8571 {
8572 tree size = TYPE_SIZE_UNIT (ltype);
8573 tree pos
8574 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8575 tree newoff = copy_ssa_name (var: running_off, NULL);
8576 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8577 running_off, pos);
8578 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8579 running_off = newoff;
8580 }
8581 }
8582 if (!slp)
8583 op = vect_get_store_rhs (stmt_info: next_stmt_info);
8584 if (!costing_p)
8585 vect_get_vec_defs (vinfo, stmt_info: next_stmt_info, slp_node, ncopies, op0: op,
8586 vec_oprnds0: &vec_oprnds);
8587 else
8588 update_prologue_cost (&prologue_cost, op);
8589 unsigned int group_el = 0;
8590 unsigned HOST_WIDE_INT
8591 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8592 for (j = 0; j < ncopies; j++)
8593 {
8594 if (!costing_p)
8595 {
8596 vec_oprnd = vec_oprnds[j];
8597 /* Pun the vector to extract from if necessary. */
8598 if (lvectype != vectype)
8599 {
8600 tree tem = make_ssa_name (var: lvectype);
8601 tree cvt
8602 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8603 gimple *pun = gimple_build_assign (tem, cvt);
8604 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: pun, gsi);
8605 vec_oprnd = tem;
8606 }
8607 }
8608 for (i = 0; i < nstores; i++)
8609 {
8610 if (costing_p)
8611 {
8612 /* Only need vector extracting when there are more
8613 than one stores. */
8614 if (nstores > 1)
8615 inside_cost
8616 += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_to_scalar,
8617 stmt_info, misalign: 0, where: vect_body);
8618 /* Take a single lane vector type store as scalar
8619 store to avoid ICE like 110776. */
8620 if (VECTOR_TYPE_P (ltype)
8621 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8622 n_adjacent_stores++;
8623 else
8624 inside_cost
8625 += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_store,
8626 stmt_info, misalign: 0, where: vect_body);
8627 continue;
8628 }
8629 tree newref, newoff;
8630 gimple *incr, *assign;
8631 tree size = TYPE_SIZE (ltype);
8632 /* Extract the i'th component. */
8633 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8634 bitsize_int (i), size);
8635 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8636 size, pos);
8637
8638 elem = force_gimple_operand_gsi (gsi, elem, true,
8639 NULL_TREE, true,
8640 GSI_SAME_STMT);
8641
8642 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8643 group_el * elsz);
8644 newref = build2 (MEM_REF, ltype,
8645 running_off, this_off);
8646 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8647
8648 /* And store it to *running_off. */
8649 assign = gimple_build_assign (newref, elem);
8650 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: assign, gsi);
8651
8652 group_el += lnel;
8653 if (! slp
8654 || group_el == group_size)
8655 {
8656 newoff = copy_ssa_name (var: running_off, NULL);
8657 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8658 running_off, stride_step);
8659 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8660
8661 running_off = newoff;
8662 group_el = 0;
8663 }
8664 if (g == group_size - 1
8665 && !slp)
8666 {
8667 if (j == 0 && i == 0)
8668 *vec_stmt = assign;
8669 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: assign);
8670 }
8671 }
8672 }
8673 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8674 vec_oprnds.truncate(size: 0);
8675 if (slp)
8676 break;
8677 }
8678
8679 if (costing_p)
8680 {
8681 if (n_adjacent_stores > 0)
8682 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8683 alignment_support_scheme, misalignment,
8684 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8685 if (dump_enabled_p ())
8686 dump_printf_loc (MSG_NOTE, vect_location,
8687 "vect_model_store_cost: inside_cost = %d, "
8688 "prologue_cost = %d .\n",
8689 inside_cost, prologue_cost);
8690 }
8691
8692 return true;
8693 }
8694
8695 gcc_assert (alignment_support_scheme);
8696 vec_loop_masks *loop_masks
8697 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8698 ? &LOOP_VINFO_MASKS (loop_vinfo)
8699 : NULL);
8700 vec_loop_lens *loop_lens
8701 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8702 ? &LOOP_VINFO_LENS (loop_vinfo)
8703 : NULL);
8704
8705 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8706 are some difference here. We cannot enable both the lens and masks
8707 during transform but it is allowed during analysis.
8708 Shouldn't go with length-based approach if fully masked. */
8709 if (cost_vec == NULL)
8710 /* The cost_vec is NULL during transfrom. */
8711 gcc_assert ((!loop_lens || !loop_masks));
8712
8713 /* Targets with store-lane instructions must not require explicit
8714 realignment. vect_supportable_dr_alignment always returns either
8715 dr_aligned or dr_unaligned_supported for masked operations. */
8716 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8717 && !mask
8718 && !loop_masks)
8719 || alignment_support_scheme == dr_aligned
8720 || alignment_support_scheme == dr_unaligned_supported);
8721
8722 tree offset = NULL_TREE;
8723 if (!known_eq (poffset, 0))
8724 offset = size_int (poffset);
8725
8726 tree bump;
8727 tree vec_offset = NULL_TREE;
8728 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8729 {
8730 aggr_type = NULL_TREE;
8731 bump = NULL_TREE;
8732 }
8733 else if (memory_access_type == VMAT_GATHER_SCATTER)
8734 {
8735 aggr_type = elem_type;
8736 if (!costing_p)
8737 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
8738 dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
8739 }
8740 else
8741 {
8742 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8743 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8744 else
8745 aggr_type = vectype;
8746 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8747 memory_access_type, loop_lens);
8748 }
8749
8750 if (mask && !costing_p)
8751 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8752
8753 /* In case the vectorization factor (VF) is bigger than the number
8754 of elements that we can fit in a vectype (nunits), we have to generate
8755 more than one vector stmt - i.e - we need to "unroll" the
8756 vector stmt by a factor VF/nunits. */
8757
8758 /* In case of interleaving (non-unit grouped access):
8759
8760 S1: &base + 2 = x2
8761 S2: &base = x0
8762 S3: &base + 1 = x1
8763 S4: &base + 3 = x3
8764
8765 We create vectorized stores starting from base address (the access of the
8766 first stmt in the chain (S2 in the above example), when the last store stmt
8767 of the chain (S4) is reached:
8768
8769 VS1: &base = vx2
8770 VS2: &base + vec_size*1 = vx0
8771 VS3: &base + vec_size*2 = vx1
8772 VS4: &base + vec_size*3 = vx3
8773
8774 Then permutation statements are generated:
8775
8776 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8777 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8778 ...
8779
8780 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8781 (the order of the data-refs in the output of vect_permute_store_chain
8782 corresponds to the order of scalar stmts in the interleaving chain - see
8783 the documentation of vect_permute_store_chain()).
8784
8785 In case of both multiple types and interleaving, above vector stores and
8786 permutation stmts are created for every copy. The result vector stmts are
8787 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8788 STMT_VINFO_RELATED_STMT for the next copies.
8789 */
8790
8791 auto_vec<tree> dr_chain (group_size);
8792 auto_vec<tree> vec_masks;
8793 tree vec_mask = NULL;
8794 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8795 for (i = 0; i < group_size; i++)
8796 gvec_oprnds.quick_push (obj: new auto_vec<tree> ());
8797
8798 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8799 {
8800 gcc_assert (!slp && grouped_store);
8801 unsigned inside_cost = 0, prologue_cost = 0;
8802 /* For costing some adjacent vector stores, we'd like to cost with
8803 the total number of them once instead of cost each one by one. */
8804 unsigned int n_adjacent_stores = 0;
8805 for (j = 0; j < ncopies; j++)
8806 {
8807 gimple *new_stmt;
8808 if (j == 0)
8809 {
8810 /* For interleaved stores we collect vectorized defs for all
8811 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8812 as an input to vect_permute_store_chain(). */
8813 stmt_vec_info next_stmt_info = first_stmt_info;
8814 for (i = 0; i < group_size; i++)
8815 {
8816 /* Since gaps are not supported for interleaved stores,
8817 DR_GROUP_SIZE is the exact number of stmts in the
8818 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8819 op = vect_get_store_rhs (stmt_info: next_stmt_info);
8820 if (costing_p)
8821 update_prologue_cost (&prologue_cost, op);
8822 else
8823 {
8824 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
8825 ncopies, op,
8826 vec_oprnds: gvec_oprnds[i]);
8827 vec_oprnd = (*gvec_oprnds[i])[0];
8828 dr_chain.quick_push (obj: vec_oprnd);
8829 }
8830 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8831 }
8832
8833 if (!costing_p)
8834 {
8835 if (mask)
8836 {
8837 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
8838 op: mask, vec_oprnds: &vec_masks,
8839 vectype: mask_vectype);
8840 vec_mask = vec_masks[0];
8841 }
8842
8843 /* We should have catched mismatched types earlier. */
8844 gcc_assert (
8845 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8846 dataref_ptr
8847 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8848 aggr_type, NULL, offset, &dummy,
8849 gsi, &ptr_incr, false, bump);
8850 }
8851 }
8852 else if (!costing_p)
8853 {
8854 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8855 /* DR_CHAIN is then used as an input to
8856 vect_permute_store_chain(). */
8857 for (i = 0; i < group_size; i++)
8858 {
8859 vec_oprnd = (*gvec_oprnds[i])[j];
8860 dr_chain[i] = vec_oprnd;
8861 }
8862 if (mask)
8863 vec_mask = vec_masks[j];
8864 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8865 stmt_info, bump);
8866 }
8867
8868 if (costing_p)
8869 {
8870 n_adjacent_stores += vec_num;
8871 continue;
8872 }
8873
8874 /* Get an array into which we can store the individual vectors. */
8875 tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
8876
8877 /* Invalidate the current contents of VEC_ARRAY. This should
8878 become an RTL clobber too, which prevents the vector registers
8879 from being upward-exposed. */
8880 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8881
8882 /* Store the individual vectors into the array. */
8883 for (i = 0; i < vec_num; i++)
8884 {
8885 vec_oprnd = dr_chain[i];
8886 write_vector_array (vinfo, stmt_info, gsi, vect: vec_oprnd, array: vec_array,
8887 n: i);
8888 }
8889
8890 tree final_mask = NULL;
8891 tree final_len = NULL;
8892 tree bias = NULL;
8893 if (loop_masks)
8894 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8895 ncopies, vectype, j);
8896 if (vec_mask)
8897 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
8898 vec_mask, gsi);
8899
8900 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8901 {
8902 if (loop_lens)
8903 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8904 ncopies, vectype, j, 1);
8905 else
8906 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8907 signed char biasval
8908 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8909 bias = build_int_cst (intQI_type_node, biasval);
8910 if (!final_mask)
8911 {
8912 mask_vectype = truth_type_for (vectype);
8913 final_mask = build_minus_one_cst (mask_vectype);
8914 }
8915 }
8916
8917 gcall *call;
8918 if (final_len && final_mask)
8919 {
8920 /* Emit:
8921 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8922 LEN, BIAS, VEC_ARRAY). */
8923 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8924 tree alias_ptr = build_int_cst (ref_type, align);
8925 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8926 dataref_ptr, alias_ptr,
8927 final_mask, final_len, bias,
8928 vec_array);
8929 }
8930 else if (final_mask)
8931 {
8932 /* Emit:
8933 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8934 VEC_ARRAY). */
8935 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8936 tree alias_ptr = build_int_cst (ref_type, align);
8937 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8938 dataref_ptr, alias_ptr,
8939 final_mask, vec_array);
8940 }
8941 else
8942 {
8943 /* Emit:
8944 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8945 data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
8946 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8947 gimple_call_set_lhs (gs: call, lhs: data_ref);
8948 }
8949 gimple_call_set_nothrow (s: call, nothrow_p: true);
8950 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
8951 new_stmt = call;
8952
8953 /* Record that VEC_ARRAY is now dead. */
8954 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8955 if (j == 0)
8956 *vec_stmt = new_stmt;
8957 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8958 }
8959
8960 if (costing_p)
8961 {
8962 if (n_adjacent_stores > 0)
8963 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8964 alignment_support_scheme, misalignment,
8965 inside_cost: &inside_cost, body_cost_vec: cost_vec);
8966 if (dump_enabled_p ())
8967 dump_printf_loc (MSG_NOTE, vect_location,
8968 "vect_model_store_cost: inside_cost = %d, "
8969 "prologue_cost = %d .\n",
8970 inside_cost, prologue_cost);
8971 }
8972
8973 return true;
8974 }
8975
8976 if (memory_access_type == VMAT_GATHER_SCATTER)
8977 {
8978 gcc_assert (!grouped_store);
8979 auto_vec<tree> vec_offsets;
8980 unsigned int inside_cost = 0, prologue_cost = 0;
8981 for (j = 0; j < ncopies; j++)
8982 {
8983 gimple *new_stmt;
8984 if (j == 0)
8985 {
8986 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8987 prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec,
8988 stmt_info, misalign: 0, where: vect_prologue);
8989 else if (!costing_p)
8990 {
8991 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8992 DR_CHAIN is of size 1. */
8993 gcc_assert (group_size == 1);
8994 if (slp_node)
8995 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8996 else
8997 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: first_stmt_info,
8998 ncopies, op, vec_oprnds: gvec_oprnds[0]);
8999 if (mask)
9000 {
9001 if (slp_node)
9002 vect_get_slp_defs (mask_node, &vec_masks);
9003 else
9004 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
9005 ncopies,
9006 op: mask, vec_oprnds: &vec_masks,
9007 vectype: mask_vectype);
9008 }
9009
9010 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9011 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9012 slp_node, gs_info: &gs_info,
9013 dataref_ptr: &dataref_ptr, vec_offset: &vec_offsets);
9014 else
9015 dataref_ptr
9016 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9017 aggr_type, NULL, offset,
9018 &dummy, gsi, &ptr_incr, false,
9019 bump);
9020 }
9021 }
9022 else if (!costing_p)
9023 {
9024 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9025 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9026 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9027 gsi, stmt_info, bump);
9028 }
9029
9030 new_stmt = NULL;
9031 for (i = 0; i < vec_num; ++i)
9032 {
9033 if (!costing_p)
9034 {
9035 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
9036 if (mask)
9037 vec_mask = vec_masks[vec_num * j + i];
9038 /* We should have catched mismatched types earlier. */
9039 gcc_assert (useless_type_conversion_p (vectype,
9040 TREE_TYPE (vec_oprnd)));
9041 }
9042 unsigned HOST_WIDE_INT align;
9043 tree final_mask = NULL_TREE;
9044 tree final_len = NULL_TREE;
9045 tree bias = NULL_TREE;
9046 if (!costing_p)
9047 {
9048 if (loop_masks)
9049 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9050 loop_masks, ncopies,
9051 vectype, j);
9052 if (vec_mask)
9053 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
9054 loop_mask: final_mask, vec_mask, gsi);
9055 }
9056
9057 if (gs_info.ifn != IFN_LAST)
9058 {
9059 if (costing_p)
9060 {
9061 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9062 inside_cost
9063 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9064 stmt_info, misalign: 0, where: vect_body);
9065 continue;
9066 }
9067
9068 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9069 vec_offset = vec_offsets[vec_num * j + i];
9070 tree scale = size_int (gs_info.scale);
9071
9072 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9073 {
9074 if (loop_lens)
9075 final_len = vect_get_loop_len (loop_vinfo, gsi,
9076 loop_lens, ncopies,
9077 vectype, j, 1);
9078 else
9079 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9080 signed char biasval
9081 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9082 bias = build_int_cst (intQI_type_node, biasval);
9083 if (!final_mask)
9084 {
9085 mask_vectype = truth_type_for (vectype);
9086 final_mask = build_minus_one_cst (mask_vectype);
9087 }
9088 }
9089
9090 gcall *call;
9091 if (final_len && final_mask)
9092 call = gimple_build_call_internal
9093 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9094 vec_offset, scale, vec_oprnd, final_mask,
9095 final_len, bias);
9096 else if (final_mask)
9097 call = gimple_build_call_internal
9098 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9099 vec_offset, scale, vec_oprnd, final_mask);
9100 else
9101 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9102 dataref_ptr, vec_offset,
9103 scale, vec_oprnd);
9104 gimple_call_set_nothrow (s: call, nothrow_p: true);
9105 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9106 new_stmt = call;
9107 }
9108 else if (gs_info.decl)
9109 {
9110 /* The builtin decls path for scatter is legacy, x86 only. */
9111 gcc_assert (nunits.is_constant ()
9112 && (!final_mask
9113 || SCALAR_INT_MODE_P
9114 (TYPE_MODE (TREE_TYPE (final_mask)))));
9115 if (costing_p)
9116 {
9117 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9118 inside_cost
9119 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9120 stmt_info, misalign: 0, where: vect_body);
9121 continue;
9122 }
9123 poly_uint64 offset_nunits
9124 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
9125 if (known_eq (nunits, offset_nunits))
9126 {
9127 new_stmt = vect_build_one_scatter_store_call
9128 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9129 ptr: dataref_ptr, offset: vec_offsets[vec_num * j + i],
9130 oprnd: vec_oprnd, mask: final_mask);
9131 vect_finish_stmt_generation (vinfo, stmt_info,
9132 vec_stmt: new_stmt, gsi);
9133 }
9134 else if (known_eq (nunits, offset_nunits * 2))
9135 {
9136 /* We have a offset vector with half the number of
9137 lanes but the builtins will store full vectype
9138 data from the lower lanes. */
9139 new_stmt = vect_build_one_scatter_store_call
9140 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9141 ptr: dataref_ptr,
9142 offset: vec_offsets[2 * vec_num * j + 2 * i],
9143 oprnd: vec_oprnd, mask: final_mask);
9144 vect_finish_stmt_generation (vinfo, stmt_info,
9145 vec_stmt: new_stmt, gsi);
9146 int count = nunits.to_constant ();
9147 vec_perm_builder sel (count, count, 1);
9148 sel.quick_grow (len: count);
9149 for (int i = 0; i < count; ++i)
9150 sel[i] = i | (count / 2);
9151 vec_perm_indices indices (sel, 2, count);
9152 tree perm_mask
9153 = vect_gen_perm_mask_checked (vectype, indices);
9154 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9155 vec_oprnd, vec_oprnd,
9156 perm_mask);
9157 vec_oprnd = make_ssa_name (var: vectype);
9158 gimple_set_lhs (new_stmt, vec_oprnd);
9159 vect_finish_stmt_generation (vinfo, stmt_info,
9160 vec_stmt: new_stmt, gsi);
9161 if (final_mask)
9162 {
9163 new_stmt = gimple_build_assign (NULL_TREE,
9164 VEC_UNPACK_HI_EXPR,
9165 final_mask);
9166 final_mask = make_ssa_name
9167 (var: truth_type_for (gs_info.offset_vectype));
9168 gimple_set_lhs (new_stmt, final_mask);
9169 vect_finish_stmt_generation (vinfo, stmt_info,
9170 vec_stmt: new_stmt, gsi);
9171 }
9172 new_stmt = vect_build_one_scatter_store_call
9173 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9174 ptr: dataref_ptr,
9175 offset: vec_offsets[2 * vec_num * j + 2 * i + 1],
9176 oprnd: vec_oprnd, mask: final_mask);
9177 vect_finish_stmt_generation (vinfo, stmt_info,
9178 vec_stmt: new_stmt, gsi);
9179 }
9180 else if (known_eq (nunits * 2, offset_nunits))
9181 {
9182 /* We have a offset vector with double the number of
9183 lanes. Select the low/high part accordingly. */
9184 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9185 if ((vec_num * j + i) & 1)
9186 {
9187 int count = offset_nunits.to_constant ();
9188 vec_perm_builder sel (count, count, 1);
9189 sel.quick_grow (len: count);
9190 for (int i = 0; i < count; ++i)
9191 sel[i] = i | (count / 2);
9192 vec_perm_indices indices (sel, 2, count);
9193 tree perm_mask = vect_gen_perm_mask_checked
9194 (TREE_TYPE (vec_offset), indices);
9195 new_stmt = gimple_build_assign (NULL_TREE,
9196 VEC_PERM_EXPR,
9197 vec_offset,
9198 vec_offset,
9199 perm_mask);
9200 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9201 gimple_set_lhs (new_stmt, vec_offset);
9202 vect_finish_stmt_generation (vinfo, stmt_info,
9203 vec_stmt: new_stmt, gsi);
9204 }
9205 new_stmt = vect_build_one_scatter_store_call
9206 (vinfo, stmt_info, gsi, gs_info: &gs_info,
9207 ptr: dataref_ptr, offset: vec_offset,
9208 oprnd: vec_oprnd, mask: final_mask);
9209 vect_finish_stmt_generation (vinfo, stmt_info,
9210 vec_stmt: new_stmt, gsi);
9211 }
9212 else
9213 gcc_unreachable ();
9214 }
9215 else
9216 {
9217 /* Emulated scatter. */
9218 gcc_assert (!final_mask);
9219 if (costing_p)
9220 {
9221 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9222 /* For emulated scatter N offset vector element extracts
9223 (we assume the scalar scaling and ptr + offset add is
9224 consumed by the load). */
9225 inside_cost
9226 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9227 stmt_info, misalign: 0, where: vect_body);
9228 /* N scalar stores plus extracting the elements. */
9229 inside_cost
9230 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9231 stmt_info, misalign: 0, where: vect_body);
9232 inside_cost
9233 += record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9234 stmt_info, misalign: 0, where: vect_body);
9235 continue;
9236 }
9237
9238 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9239 unsigned HOST_WIDE_INT const_offset_nunits
9240 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype).to_constant ();
9241 vec<constructor_elt, va_gc> *ctor_elts;
9242 vec_alloc (v&: ctor_elts, nelems: const_nunits);
9243 gimple_seq stmts = NULL;
9244 tree elt_type = TREE_TYPE (vectype);
9245 unsigned HOST_WIDE_INT elt_size
9246 = tree_to_uhwi (TYPE_SIZE (elt_type));
9247 /* We support offset vectors with more elements
9248 than the data vector for now. */
9249 unsigned HOST_WIDE_INT factor
9250 = const_offset_nunits / const_nunits;
9251 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9252 unsigned elt_offset
9253 = ((vec_num * j + i) % factor) * const_nunits;
9254 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9255 tree scale = size_int (gs_info.scale);
9256 align = get_object_alignment (DR_REF (first_dr_info->dr));
9257 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9258 for (unsigned k = 0; k < const_nunits; ++k)
9259 {
9260 /* Compute the offsetted pointer. */
9261 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9262 bitsize_int (k + elt_offset));
9263 tree idx
9264 = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
9265 ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
9266 idx = gimple_convert (seq: &stmts, sizetype, op: idx);
9267 idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype,
9268 ops: idx, ops: scale);
9269 tree ptr
9270 = gimple_build (seq: &stmts, code: PLUS_EXPR,
9271 TREE_TYPE (dataref_ptr),
9272 ops: dataref_ptr, ops: idx);
9273 ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
9274 /* Extract the element to be stored. */
9275 tree elt
9276 = gimple_build (seq: &stmts, code: BIT_FIELD_REF,
9277 TREE_TYPE (vectype),
9278 ops: vec_oprnd, TYPE_SIZE (elt_type),
9279 bitsize_int (k * elt_size));
9280 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9281 stmts = NULL;
9282 tree ref
9283 = build2 (MEM_REF, ltype, ptr,
9284 build_int_cst (ref_type, 0));
9285 new_stmt = gimple_build_assign (ref, elt);
9286 vect_finish_stmt_generation (vinfo, stmt_info,
9287 vec_stmt: new_stmt, gsi);
9288 }
9289 if (slp)
9290 slp_node->push_vec_def (def: new_stmt);
9291 }
9292 }
9293 if (!slp && !costing_p)
9294 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9295 }
9296
9297 if (!slp && !costing_p)
9298 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9299
9300 if (costing_p && dump_enabled_p ())
9301 dump_printf_loc (MSG_NOTE, vect_location,
9302 "vect_model_store_cost: inside_cost = %d, "
9303 "prologue_cost = %d .\n",
9304 inside_cost, prologue_cost);
9305
9306 return true;
9307 }
9308
9309 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9310 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9311 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9312 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9313
9314 unsigned inside_cost = 0, prologue_cost = 0;
9315 /* For costing some adjacent vector stores, we'd like to cost with
9316 the total number of them once instead of cost each one by one. */
9317 unsigned int n_adjacent_stores = 0;
9318 auto_vec<tree> result_chain (group_size);
9319 auto_vec<tree, 1> vec_oprnds;
9320 for (j = 0; j < ncopies; j++)
9321 {
9322 gimple *new_stmt;
9323 if (j == 0)
9324 {
9325 if (slp && !costing_p)
9326 {
9327 /* Get vectorized arguments for SLP_NODE. */
9328 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: 1, op0: op,
9329 vec_oprnds0: &vec_oprnds, op1: mask, vec_oprnds1: &vec_masks);
9330 vec_oprnd = vec_oprnds[0];
9331 if (mask)
9332 vec_mask = vec_masks[0];
9333 }
9334 else
9335 {
9336 /* For interleaved stores we collect vectorized defs for all the
9337 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9338 input to vect_permute_store_chain().
9339
9340 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9341 is of size 1. */
9342 stmt_vec_info next_stmt_info = first_stmt_info;
9343 for (i = 0; i < group_size; i++)
9344 {
9345 /* Since gaps are not supported for interleaved stores,
9346 DR_GROUP_SIZE is the exact number of stmts in the chain.
9347 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9348 that there is no interleaving, DR_GROUP_SIZE is 1,
9349 and only one iteration of the loop will be executed. */
9350 op = vect_get_store_rhs (stmt_info: next_stmt_info);
9351 if (costing_p)
9352 update_prologue_cost (&prologue_cost, op);
9353 else
9354 {
9355 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
9356 ncopies, op,
9357 vec_oprnds: gvec_oprnds[i]);
9358 vec_oprnd = (*gvec_oprnds[i])[0];
9359 dr_chain.quick_push (obj: vec_oprnd);
9360 }
9361 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9362 }
9363 if (mask && !costing_p)
9364 {
9365 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
9366 op: mask, vec_oprnds: &vec_masks,
9367 vectype: mask_vectype);
9368 vec_mask = vec_masks[0];
9369 }
9370 }
9371
9372 /* We should have catched mismatched types earlier. */
9373 gcc_assert (costing_p
9374 || useless_type_conversion_p (vectype,
9375 TREE_TYPE (vec_oprnd)));
9376 bool simd_lane_access_p
9377 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9378 if (!costing_p
9379 && simd_lane_access_p
9380 && !loop_masks
9381 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9382 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9383 && integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
9384 && integer_zerop (DR_INIT (first_dr_info->dr))
9385 && alias_sets_conflict_p (get_alias_set (aggr_type),
9386 get_alias_set (TREE_TYPE (ref_type))))
9387 {
9388 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9389 dataref_offset = build_int_cst (ref_type, 0);
9390 }
9391 else if (!costing_p)
9392 dataref_ptr
9393 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9394 simd_lane_access_p ? loop : NULL,
9395 offset, &dummy, gsi, &ptr_incr,
9396 simd_lane_access_p, bump);
9397 }
9398 else if (!costing_p)
9399 {
9400 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9401 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9402 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9403 of size 1. */
9404 for (i = 0; i < group_size; i++)
9405 {
9406 vec_oprnd = (*gvec_oprnds[i])[j];
9407 dr_chain[i] = vec_oprnd;
9408 }
9409 if (mask)
9410 vec_mask = vec_masks[j];
9411 if (dataref_offset)
9412 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9413 else
9414 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9415 stmt_info, bump);
9416 }
9417
9418 new_stmt = NULL;
9419 if (grouped_store)
9420 {
9421 /* Permute. */
9422 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9423 if (costing_p)
9424 {
9425 int group_size = DR_GROUP_SIZE (first_stmt_info);
9426 int nstmts = ceil_log2 (x: group_size) * group_size;
9427 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
9428 stmt_info, misalign: 0, where: vect_body);
9429 if (dump_enabled_p ())
9430 dump_printf_loc (MSG_NOTE, vect_location,
9431 "vect_model_store_cost: "
9432 "strided group_size = %d .\n",
9433 group_size);
9434 }
9435 else
9436 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9437 gsi, &result_chain);
9438 }
9439
9440 stmt_vec_info next_stmt_info = first_stmt_info;
9441 for (i = 0; i < vec_num; i++)
9442 {
9443 if (!costing_p)
9444 {
9445 if (slp)
9446 vec_oprnd = vec_oprnds[i];
9447 else if (grouped_store)
9448 /* For grouped stores vectorized defs are interleaved in
9449 vect_permute_store_chain(). */
9450 vec_oprnd = result_chain[i];
9451 }
9452
9453 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9454 {
9455 if (costing_p)
9456 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_perm,
9457 stmt_info, misalign: 0, where: vect_body);
9458 else
9459 {
9460 tree perm_mask = perm_mask_for_reverse (vectype);
9461 tree perm_dest = vect_create_destination_var (
9462 vect_get_store_rhs (stmt_info), vectype);
9463 tree new_temp = make_ssa_name (var: perm_dest);
9464
9465 /* Generate the permute statement. */
9466 gimple *perm_stmt
9467 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9468 vec_oprnd, perm_mask);
9469 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt,
9470 gsi);
9471
9472 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9473 vec_oprnd = new_temp;
9474 }
9475 }
9476
9477 if (costing_p)
9478 {
9479 n_adjacent_stores++;
9480
9481 if (!slp)
9482 {
9483 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9484 if (!next_stmt_info)
9485 break;
9486 }
9487
9488 continue;
9489 }
9490
9491 tree final_mask = NULL_TREE;
9492 tree final_len = NULL_TREE;
9493 tree bias = NULL_TREE;
9494 if (loop_masks)
9495 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9496 vec_num * ncopies, vectype,
9497 vec_num * j + i);
9498 if (slp && vec_mask)
9499 vec_mask = vec_masks[i];
9500 if (vec_mask)
9501 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
9502 vec_mask, gsi);
9503
9504 if (i > 0)
9505 /* Bump the vector pointer. */
9506 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9507 stmt_info, bump);
9508
9509 unsigned misalign;
9510 unsigned HOST_WIDE_INT align;
9511 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9512 if (alignment_support_scheme == dr_aligned)
9513 misalign = 0;
9514 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9515 {
9516 align = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
9517 misalign = 0;
9518 }
9519 else
9520 misalign = misalignment;
9521 if (dataref_offset == NULL_TREE
9522 && TREE_CODE (dataref_ptr) == SSA_NAME)
9523 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9524 misalign);
9525 align = least_bit_hwi (x: misalign | align);
9526
9527 /* Compute IFN when LOOP_LENS or final_mask valid. */
9528 machine_mode vmode = TYPE_MODE (vectype);
9529 machine_mode new_vmode = vmode;
9530 internal_fn partial_ifn = IFN_LAST;
9531 if (loop_lens)
9532 {
9533 opt_machine_mode new_ovmode
9534 = get_len_load_store_mode (vmode, false, &partial_ifn);
9535 new_vmode = new_ovmode.require ();
9536 unsigned factor
9537 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9538 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9539 vec_num * ncopies, vectype,
9540 vec_num * j + i, factor);
9541 }
9542 else if (final_mask)
9543 {
9544 if (!can_vec_mask_load_store_p (
9545 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9546 &partial_ifn))
9547 gcc_unreachable ();
9548 }
9549
9550 if (partial_ifn == IFN_MASK_LEN_STORE)
9551 {
9552 if (!final_len)
9553 {
9554 /* Pass VF value to 'len' argument of
9555 MASK_LEN_STORE if LOOP_LENS is invalid. */
9556 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9557 }
9558 if (!final_mask)
9559 {
9560 /* Pass all ones value to 'mask' argument of
9561 MASK_LEN_STORE if final_mask is invalid. */
9562 mask_vectype = truth_type_for (vectype);
9563 final_mask = build_minus_one_cst (mask_vectype);
9564 }
9565 }
9566 if (final_len)
9567 {
9568 signed char biasval
9569 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9570
9571 bias = build_int_cst (intQI_type_node, biasval);
9572 }
9573
9574 /* Arguments are ready. Create the new vector stmt. */
9575 if (final_len)
9576 {
9577 gcall *call;
9578 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9579 /* Need conversion if it's wrapped with VnQI. */
9580 if (vmode != new_vmode)
9581 {
9582 tree new_vtype
9583 = build_vector_type_for_mode (unsigned_intQI_type_node,
9584 new_vmode);
9585 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9586 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9587 gassign *new_stmt
9588 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9589 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9590 vec_oprnd = var;
9591 }
9592
9593 if (partial_ifn == IFN_MASK_LEN_STORE)
9594 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9595 dataref_ptr, ptr, final_mask,
9596 final_len, bias, vec_oprnd);
9597 else
9598 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9599 dataref_ptr, ptr, final_len,
9600 bias, vec_oprnd);
9601 gimple_call_set_nothrow (s: call, nothrow_p: true);
9602 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9603 new_stmt = call;
9604 }
9605 else if (final_mask)
9606 {
9607 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9608 gcall *call
9609 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9610 ptr, final_mask, vec_oprnd);
9611 gimple_call_set_nothrow (s: call, nothrow_p: true);
9612 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9613 new_stmt = call;
9614 }
9615 else
9616 {
9617 data_ref
9618 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9619 dataref_offset ? dataref_offset
9620 : build_int_cst (ref_type, 0));
9621 if (alignment_support_scheme == dr_aligned)
9622 ;
9623 else
9624 TREE_TYPE (data_ref)
9625 = build_aligned_type (TREE_TYPE (data_ref),
9626 align * BITS_PER_UNIT);
9627 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9628 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9629 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9630 }
9631
9632 if (slp)
9633 continue;
9634
9635 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9636 if (!next_stmt_info)
9637 break;
9638 }
9639 if (!slp && !costing_p)
9640 {
9641 if (j == 0)
9642 *vec_stmt = new_stmt;
9643 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9644 }
9645 }
9646
9647 if (costing_p)
9648 {
9649 if (n_adjacent_stores > 0)
9650 vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
9651 alignment_support_scheme, misalignment,
9652 inside_cost: &inside_cost, body_cost_vec: cost_vec);
9653
9654 /* When vectorizing a store into the function result assign
9655 a penalty if the function returns in a multi-register location.
9656 In this case we assume we'll end up with having to spill the
9657 vector result and do piecewise loads as a conservative estimate. */
9658 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9659 if (base
9660 && (TREE_CODE (base) == RESULT_DECL
9661 || (DECL_P (base) && cfun_returns (decl: base)))
9662 && !aggregate_value_p (base, cfun->decl))
9663 {
9664 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9665 /* ??? Handle PARALLEL in some way. */
9666 if (REG_P (reg))
9667 {
9668 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9669 /* Assume that a single reg-reg move is possible and cheap,
9670 do not account for vector to gp register move cost. */
9671 if (nregs > 1)
9672 {
9673 /* Spill. */
9674 prologue_cost
9675 += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_store,
9676 stmt_info, misalign: 0, where: vect_epilogue);
9677 /* Loads. */
9678 prologue_cost
9679 += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * nregs, kind: scalar_load,
9680 stmt_info, misalign: 0, where: vect_epilogue);
9681 }
9682 }
9683 }
9684 if (dump_enabled_p ())
9685 dump_printf_loc (MSG_NOTE, vect_location,
9686 "vect_model_store_cost: inside_cost = %d, "
9687 "prologue_cost = %d .\n",
9688 inside_cost, prologue_cost);
9689 }
9690
9691 return true;
9692}
9693
9694/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9695 VECTOR_CST mask. No checks are made that the target platform supports the
9696 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9697 vect_gen_perm_mask_checked. */
9698
9699tree
9700vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9701{
9702 tree mask_type;
9703
9704 poly_uint64 nunits = sel.length ();
9705 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9706
9707 mask_type = build_vector_type (ssizetype, nunits);
9708 return vec_perm_indices_to_tree (mask_type, sel);
9709}
9710
9711/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9712 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9713
9714tree
9715vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9716{
9717 machine_mode vmode = TYPE_MODE (vectype);
9718 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9719 return vect_gen_perm_mask_any (vectype, sel);
9720}
9721
9722/* Given a vector variable X and Y, that was generated for the scalar
9723 STMT_INFO, generate instructions to permute the vector elements of X and Y
9724 using permutation mask MASK_VEC, insert them at *GSI and return the
9725 permuted vector variable. */
9726
9727static tree
9728permute_vec_elements (vec_info *vinfo,
9729 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9730 gimple_stmt_iterator *gsi)
9731{
9732 tree vectype = TREE_TYPE (x);
9733 tree perm_dest, data_ref;
9734 gimple *perm_stmt;
9735
9736 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9737 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9738 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9739 else
9740 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9741 data_ref = make_ssa_name (var: perm_dest);
9742
9743 /* Generate the permute statement. */
9744 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9745 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt, gsi);
9746
9747 return data_ref;
9748}
9749
9750/* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9751 inserting them on the loops preheader edge. Returns true if we
9752 were successful in doing so (and thus STMT_INFO can be moved then),
9753 otherwise returns false. HOIST_P indicates if we want to hoist the
9754 definitions of all SSA uses, it would be false when we are costing. */
9755
9756static bool
9757hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9758{
9759 ssa_op_iter i;
9760 tree op;
9761 bool any = false;
9762
9763 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9764 {
9765 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9766 if (!gimple_nop_p (g: def_stmt)
9767 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9768 {
9769 /* Make sure we don't need to recurse. While we could do
9770 so in simple cases when there are more complex use webs
9771 we don't have an easy way to preserve stmt order to fulfil
9772 dependencies within them. */
9773 tree op2;
9774 ssa_op_iter i2;
9775 if (gimple_code (g: def_stmt) == GIMPLE_PHI)
9776 return false;
9777 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9778 {
9779 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9780 if (!gimple_nop_p (g: def_stmt2)
9781 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt2)))
9782 return false;
9783 }
9784 any = true;
9785 }
9786 }
9787
9788 if (!any)
9789 return true;
9790
9791 if (!hoist_p)
9792 return true;
9793
9794 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9795 {
9796 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9797 if (!gimple_nop_p (g: def_stmt)
9798 && flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9799 {
9800 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9801 gsi_remove (&gsi, false);
9802 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9803 }
9804 }
9805
9806 return true;
9807}
9808
9809/* vectorizable_load.
9810
9811 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9812 that can be vectorized.
9813 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9814 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9815 Return true if STMT_INFO is vectorizable in this way. */
9816
9817static bool
9818vectorizable_load (vec_info *vinfo,
9819 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9820 gimple **vec_stmt, slp_tree slp_node,
9821 stmt_vector_for_cost *cost_vec)
9822{
9823 tree scalar_dest;
9824 tree vec_dest = NULL;
9825 tree data_ref = NULL;
9826 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
9827 class loop *loop = NULL;
9828 class loop *containing_loop = gimple_bb (g: stmt_info->stmt)->loop_father;
9829 bool nested_in_vect_loop = false;
9830 tree elem_type;
9831 /* Avoid false positive uninitialized warning, see PR110652. */
9832 tree new_temp = NULL_TREE;
9833 machine_mode mode;
9834 tree dummy;
9835 tree dataref_ptr = NULL_TREE;
9836 tree dataref_offset = NULL_TREE;
9837 gimple *ptr_incr = NULL;
9838 int ncopies;
9839 int i, j;
9840 unsigned int group_size;
9841 poly_uint64 group_gap_adj;
9842 tree msq = NULL_TREE, lsq;
9843 tree realignment_token = NULL_TREE;
9844 gphi *phi = NULL;
9845 vec<tree> dr_chain = vNULL;
9846 bool grouped_load = false;
9847 stmt_vec_info first_stmt_info;
9848 stmt_vec_info first_stmt_info_for_drptr = NULL;
9849 bool compute_in_loop = false;
9850 class loop *at_loop;
9851 int vec_num;
9852 bool slp = (slp_node != NULL);
9853 bool slp_perm = false;
9854 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
9855 poly_uint64 vf;
9856 tree aggr_type;
9857 gather_scatter_info gs_info;
9858 tree ref_type;
9859 enum vect_def_type mask_dt = vect_unknown_def_type;
9860
9861 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9862 return false;
9863
9864 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9865 && ! vec_stmt)
9866 return false;
9867
9868 if (!STMT_VINFO_DATA_REF (stmt_info))
9869 return false;
9870
9871 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9872 int mask_index = -1;
9873 slp_tree slp_op = NULL;
9874 if (gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt))
9875 {
9876 scalar_dest = gimple_assign_lhs (gs: assign);
9877 if (TREE_CODE (scalar_dest) != SSA_NAME)
9878 return false;
9879
9880 tree_code code = gimple_assign_rhs_code (gs: assign);
9881 if (code != ARRAY_REF
9882 && code != BIT_FIELD_REF
9883 && code != INDIRECT_REF
9884 && code != COMPONENT_REF
9885 && code != IMAGPART_EXPR
9886 && code != REALPART_EXPR
9887 && code != MEM_REF
9888 && TREE_CODE_CLASS (code) != tcc_declaration)
9889 return false;
9890 }
9891 else
9892 {
9893 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
9894 if (!call || !gimple_call_internal_p (gs: call))
9895 return false;
9896
9897 internal_fn ifn = gimple_call_internal_fn (gs: call);
9898 if (!internal_load_fn_p (ifn))
9899 return false;
9900
9901 scalar_dest = gimple_call_lhs (gs: call);
9902 if (!scalar_dest)
9903 return false;
9904
9905 mask_index = internal_fn_mask_index (ifn);
9906 if (mask_index >= 0 && slp_node)
9907 mask_index = vect_slp_child_index_for_operand
9908 (call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9909 if (mask_index >= 0
9910 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9911 mask: &mask, mask_node: &slp_op, mask_dt_out: &mask_dt, mask_vectype_out: &mask_vectype))
9912 return false;
9913 }
9914
9915 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9916 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9917
9918 if (loop_vinfo)
9919 {
9920 loop = LOOP_VINFO_LOOP (loop_vinfo);
9921 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9922 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9923 }
9924 else
9925 vf = 1;
9926
9927 /* Multiple types in SLP are handled by creating the appropriate number of
9928 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9929 case of SLP. */
9930 if (slp)
9931 ncopies = 1;
9932 else
9933 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9934
9935 gcc_assert (ncopies >= 1);
9936
9937 /* FORNOW. This restriction should be relaxed. */
9938 if (nested_in_vect_loop && ncopies > 1)
9939 {
9940 if (dump_enabled_p ())
9941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9942 "multiple types in nested loop.\n");
9943 return false;
9944 }
9945
9946 /* Invalidate assumptions made by dependence analysis when vectorization
9947 on the unrolled body effectively re-orders stmts. */
9948 if (ncopies > 1
9949 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9950 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9951 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9952 {
9953 if (dump_enabled_p ())
9954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9955 "cannot perform implicit CSE when unrolling "
9956 "with negative dependence distance\n");
9957 return false;
9958 }
9959
9960 elem_type = TREE_TYPE (vectype);
9961 mode = TYPE_MODE (vectype);
9962
9963 /* FORNOW. In some cases can vectorize even if data-type not supported
9964 (e.g. - data copies). */
9965 if (optab_handler (op: mov_optab, mode) == CODE_FOR_nothing)
9966 {
9967 if (dump_enabled_p ())
9968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9969 "Aligned load, but unsupported type.\n");
9970 return false;
9971 }
9972
9973 /* Check if the load is a part of an interleaving chain. */
9974 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9975 {
9976 grouped_load = true;
9977 /* FORNOW */
9978 gcc_assert (!nested_in_vect_loop);
9979 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9980
9981 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9982 group_size = DR_GROUP_SIZE (first_stmt_info);
9983
9984 /* Refuse non-SLP vectorization of SLP-only groups. */
9985 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9986 {
9987 if (dump_enabled_p ())
9988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9989 "cannot vectorize load in non-SLP mode.\n");
9990 return false;
9991 }
9992
9993 /* Invalidate assumptions made by dependence analysis when vectorization
9994 on the unrolled body effectively re-orders stmts. */
9995 if (!PURE_SLP_STMT (stmt_info)
9996 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9997 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9998 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9999 {
10000 if (dump_enabled_p ())
10001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10002 "cannot perform implicit CSE when performing "
10003 "group loads with negative dependence distance\n");
10004 return false;
10005 }
10006 }
10007 else
10008 group_size = 1;
10009
10010 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10011 {
10012 slp_perm = true;
10013
10014 if (!loop_vinfo)
10015 {
10016 /* In BB vectorization we may not actually use a loaded vector
10017 accessing elements in excess of DR_GROUP_SIZE. */
10018 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10019 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
10020 unsigned HOST_WIDE_INT nunits;
10021 unsigned j, k, maxk = 0;
10022 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10023 if (k > maxk)
10024 maxk = k;
10025 tree vectype = SLP_TREE_VECTYPE (slp_node);
10026 if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits)
10027 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
10028 {
10029 if (dump_enabled_p ())
10030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10031 "BB vectorization with gaps at the end of "
10032 "a load is not supported\n");
10033 return false;
10034 }
10035 }
10036
10037 auto_vec<tree> tem;
10038 unsigned n_perms;
10039 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10040 true, &n_perms))
10041 {
10042 if (dump_enabled_p ())
10043 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10044 vect_location,
10045 "unsupported load permutation\n");
10046 return false;
10047 }
10048 }
10049
10050 vect_memory_access_type memory_access_type;
10051 enum dr_alignment_support alignment_support_scheme;
10052 int misalignment;
10053 poly_int64 poffset;
10054 internal_fn lanes_ifn;
10055 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type: VLS_LOAD,
10056 ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
10057 alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
10058 lanes_ifn: &lanes_ifn))
10059 return false;
10060
10061 if (mask)
10062 {
10063 if (memory_access_type == VMAT_CONTIGUOUS)
10064 {
10065 machine_mode vec_mode = TYPE_MODE (vectype);
10066 if (!VECTOR_MODE_P (vec_mode)
10067 || !can_vec_mask_load_store_p (vec_mode,
10068 TYPE_MODE (mask_vectype), true))
10069 return false;
10070 }
10071 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10072 && memory_access_type != VMAT_GATHER_SCATTER)
10073 {
10074 if (dump_enabled_p ())
10075 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10076 "unsupported access type for masked load.\n");
10077 return false;
10078 }
10079 else if (memory_access_type == VMAT_GATHER_SCATTER
10080 && gs_info.ifn == IFN_LAST
10081 && !gs_info.decl)
10082 {
10083 if (dump_enabled_p ())
10084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10085 "unsupported masked emulated gather.\n");
10086 return false;
10087 }
10088 else if (memory_access_type == VMAT_ELEMENTWISE
10089 || memory_access_type == VMAT_STRIDED_SLP)
10090 {
10091 if (dump_enabled_p ())
10092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10093 "unsupported masked strided access.\n");
10094 return false;
10095 }
10096 }
10097
10098 bool costing_p = !vec_stmt;
10099
10100 if (costing_p) /* transformation not required. */
10101 {
10102 if (slp_node
10103 && mask
10104 && !vect_maybe_update_slp_op_vectype (slp_op,
10105 mask_vectype))
10106 {
10107 if (dump_enabled_p ())
10108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10109 "incompatible vector types for invariants\n");
10110 return false;
10111 }
10112
10113 if (!slp)
10114 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10115
10116 if (loop_vinfo
10117 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10118 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10119 vls_type: VLS_LOAD, group_size,
10120 memory_access_type, gs_info: &gs_info,
10121 scalar_mask: mask);
10122
10123 if (dump_enabled_p ()
10124 && memory_access_type != VMAT_ELEMENTWISE
10125 && memory_access_type != VMAT_GATHER_SCATTER
10126 && alignment_support_scheme != dr_aligned)
10127 dump_printf_loc (MSG_NOTE, vect_location,
10128 "Vectorizing an unaligned access.\n");
10129
10130 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10131 vinfo->any_known_not_updated_vssa = true;
10132
10133 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10134 }
10135
10136 if (!slp)
10137 gcc_assert (memory_access_type
10138 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10139
10140 if (dump_enabled_p () && !costing_p)
10141 dump_printf_loc (MSG_NOTE, vect_location,
10142 "transform load. ncopies = %d\n", ncopies);
10143
10144 /* Transform. */
10145
10146 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10147 ensure_base_align (dr_info);
10148
10149 if (memory_access_type == VMAT_INVARIANT)
10150 {
10151 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10152 /* If we have versioned for aliasing or the loop doesn't
10153 have any data dependencies that would preclude this,
10154 then we are sure this is a loop invariant load and
10155 thus we can insert it on the preheader edge.
10156 TODO: hoist_defs_of_uses should ideally be computed
10157 once at analysis time, remembered and used in the
10158 transform time. */
10159 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10160 && !nested_in_vect_loop
10161 && hoist_defs_of_uses (stmt_info, loop, hoist_p: !costing_p));
10162 if (costing_p)
10163 {
10164 enum vect_cost_model_location cost_loc
10165 = hoist_p ? vect_prologue : vect_body;
10166 unsigned int cost = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_load,
10167 stmt_info, misalign: 0, where: cost_loc);
10168 cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_to_vec, stmt_info, misalign: 0,
10169 where: cost_loc);
10170 unsigned int prologue_cost = hoist_p ? cost : 0;
10171 unsigned int inside_cost = hoist_p ? 0 : cost;
10172 if (dump_enabled_p ())
10173 dump_printf_loc (MSG_NOTE, vect_location,
10174 "vect_model_load_cost: inside_cost = %d, "
10175 "prologue_cost = %d .\n",
10176 inside_cost, prologue_cost);
10177 return true;
10178 }
10179 if (hoist_p)
10180 {
10181 gassign *stmt = as_a <gassign *> (p: stmt_info->stmt);
10182 if (dump_enabled_p ())
10183 dump_printf_loc (MSG_NOTE, vect_location,
10184 "hoisting out of the vectorized loop: %G",
10185 (gimple *) stmt);
10186 scalar_dest = copy_ssa_name (var: scalar_dest);
10187 tree rhs = unshare_expr (gimple_assign_rhs1 (gs: stmt));
10188 edge pe = loop_preheader_edge (loop);
10189 gphi *vphi = get_virtual_phi (loop->header);
10190 tree vuse;
10191 if (vphi)
10192 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10193 else
10194 vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
10195 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10196 gimple_set_vuse (g: new_stmt, vuse);
10197 gsi_insert_on_edge_immediate (pe, new_stmt);
10198 }
10199 /* These copies are all equivalent. */
10200 if (hoist_p)
10201 new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10202 type: vectype, NULL);
10203 else
10204 {
10205 gimple_stmt_iterator gsi2 = *gsi;
10206 gsi_next (i: &gsi2);
10207 new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10208 type: vectype, gsi: &gsi2);
10209 }
10210 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10211 if (slp)
10212 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10213 slp_node->push_vec_def (def: new_stmt);
10214 else
10215 {
10216 for (j = 0; j < ncopies; ++j)
10217 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10218 *vec_stmt = new_stmt;
10219 }
10220 return true;
10221 }
10222
10223 if (memory_access_type == VMAT_ELEMENTWISE
10224 || memory_access_type == VMAT_STRIDED_SLP)
10225 {
10226 gimple_stmt_iterator incr_gsi;
10227 bool insert_after;
10228 tree offvar;
10229 tree ivstep;
10230 tree running_off;
10231 vec<constructor_elt, va_gc> *v = NULL;
10232 tree stride_base, stride_step, alias_off;
10233 /* Checked by get_load_store_type. */
10234 unsigned int const_nunits = nunits.to_constant ();
10235 unsigned HOST_WIDE_INT cst_offset = 0;
10236 tree dr_offset;
10237 unsigned int inside_cost = 0;
10238
10239 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10240 gcc_assert (!nested_in_vect_loop);
10241
10242 if (grouped_load)
10243 {
10244 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10245 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10246 }
10247 else
10248 {
10249 first_stmt_info = stmt_info;
10250 first_dr_info = dr_info;
10251 }
10252
10253 if (slp && grouped_load)
10254 {
10255 group_size = DR_GROUP_SIZE (first_stmt_info);
10256 ref_type = get_group_alias_ptr_type (first_stmt_info);
10257 }
10258 else
10259 {
10260 if (grouped_load)
10261 cst_offset
10262 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10263 * vect_get_place_in_interleaving_chain (stmt_info,
10264 first_stmt_info));
10265 group_size = 1;
10266 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10267 }
10268
10269 if (!costing_p)
10270 {
10271 dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
10272 stride_base = fold_build_pointer_plus (
10273 DR_BASE_ADDRESS (first_dr_info->dr),
10274 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10275 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10276 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10277
10278 /* For a load with loop-invariant (but other than power-of-2)
10279 stride (i.e. not a grouped access) like so:
10280
10281 for (i = 0; i < n; i += stride)
10282 ... = array[i];
10283
10284 we generate a new induction variable and new accesses to
10285 form a new vector (or vectors, depending on ncopies):
10286
10287 for (j = 0; ; j += VF*stride)
10288 tmp1 = array[j];
10289 tmp2 = array[j + stride];
10290 ...
10291 vectemp = {tmp1, tmp2, ...}
10292 */
10293
10294 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10295 build_int_cst (TREE_TYPE (stride_step), vf));
10296
10297 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10298
10299 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10300 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10301 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10302 loop, &incr_gsi, insert_after,
10303 &offvar, NULL);
10304
10305 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10306 }
10307
10308 running_off = offvar;
10309 alias_off = build_int_cst (ref_type, 0);
10310 int nloads = const_nunits;
10311 int lnel = 1;
10312 tree ltype = TREE_TYPE (vectype);
10313 tree lvectype = vectype;
10314 auto_vec<tree> dr_chain;
10315 if (memory_access_type == VMAT_STRIDED_SLP)
10316 {
10317 if (group_size < const_nunits)
10318 {
10319 /* First check if vec_init optab supports construction from vector
10320 elts directly. Otherwise avoid emitting a constructor of
10321 vector elements by performing the loads using an integer type
10322 of the same size, constructing a vector of those and then
10323 re-interpreting it as the original vector type. This avoids a
10324 huge runtime penalty due to the general inability to perform
10325 store forwarding from smaller stores to a larger load. */
10326 tree ptype;
10327 tree vtype
10328 = vector_vector_composition_type (vtype: vectype,
10329 nelts: const_nunits / group_size,
10330 ptype: &ptype);
10331 if (vtype != NULL_TREE)
10332 {
10333 nloads = const_nunits / group_size;
10334 lnel = group_size;
10335 lvectype = vtype;
10336 ltype = ptype;
10337 }
10338 }
10339 else
10340 {
10341 nloads = 1;
10342 lnel = const_nunits;
10343 ltype = vectype;
10344 }
10345 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10346 }
10347 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10348 else if (nloads == 1)
10349 ltype = vectype;
10350
10351 if (slp)
10352 {
10353 /* For SLP permutation support we need to load the whole group,
10354 not only the number of vector stmts the permutation result
10355 fits in. */
10356 if (slp_perm)
10357 {
10358 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10359 variable VF. */
10360 unsigned int const_vf = vf.to_constant ();
10361 ncopies = CEIL (group_size * const_vf, const_nunits);
10362 dr_chain.create (nelems: ncopies);
10363 }
10364 else
10365 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10366 }
10367 unsigned int group_el = 0;
10368 unsigned HOST_WIDE_INT
10369 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10370 unsigned int n_groups = 0;
10371 /* For costing some adjacent vector loads, we'd like to cost with
10372 the total number of them once instead of cost each one by one. */
10373 unsigned int n_adjacent_loads = 0;
10374 for (j = 0; j < ncopies; j++)
10375 {
10376 if (nloads > 1 && !costing_p)
10377 vec_alloc (v, nelems: nloads);
10378 gimple *new_stmt = NULL;
10379 for (i = 0; i < nloads; i++)
10380 {
10381 if (costing_p)
10382 {
10383 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10384 avoid ICE, see PR110776. */
10385 if (VECTOR_TYPE_P (ltype)
10386 && memory_access_type != VMAT_ELEMENTWISE)
10387 n_adjacent_loads++;
10388 else
10389 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: scalar_load,
10390 stmt_info, misalign: 0, where: vect_body);
10391 continue;
10392 }
10393 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10394 group_el * elsz + cst_offset);
10395 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10396 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10397 new_stmt = gimple_build_assign (make_ssa_name (var: ltype), data_ref);
10398 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
10399 if (nloads > 1)
10400 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10401 gimple_assign_lhs (new_stmt));
10402
10403 group_el += lnel;
10404 if (! slp
10405 || group_el == group_size)
10406 {
10407 n_groups++;
10408 /* When doing SLP make sure to not load elements from
10409 the next vector iteration, those will not be accessed
10410 so just use the last element again. See PR107451. */
10411 if (!slp || known_lt (n_groups, vf))
10412 {
10413 tree newoff = copy_ssa_name (var: running_off);
10414 gimple *incr
10415 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10416 running_off, stride_step);
10417 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
10418 running_off = newoff;
10419 }
10420 group_el = 0;
10421 }
10422 }
10423
10424 if (nloads > 1)
10425 {
10426 if (costing_p)
10427 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_construct,
10428 stmt_info, misalign: 0, where: vect_body);
10429 else
10430 {
10431 tree vec_inv = build_constructor (lvectype, v);
10432 new_temp = vect_init_vector (vinfo, stmt_info, val: vec_inv,
10433 type: lvectype, gsi);
10434 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10435 if (lvectype != vectype)
10436 {
10437 new_stmt
10438 = gimple_build_assign (make_ssa_name (var: vectype),
10439 VIEW_CONVERT_EXPR,
10440 build1 (VIEW_CONVERT_EXPR,
10441 vectype, new_temp));
10442 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
10443 gsi);
10444 }
10445 }
10446 }
10447
10448 if (!costing_p)
10449 {
10450 if (slp)
10451 {
10452 if (slp_perm)
10453 dr_chain.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
10454 else
10455 slp_node->push_vec_def (def: new_stmt);
10456 }
10457 else
10458 {
10459 if (j == 0)
10460 *vec_stmt = new_stmt;
10461 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10462 }
10463 }
10464 }
10465 if (slp_perm)
10466 {
10467 unsigned n_perms;
10468 if (costing_p)
10469 {
10470 unsigned n_loads;
10471 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10472 true, &n_perms, &n_loads);
10473 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
10474 stmt_info: first_stmt_info, misalign: 0, where: vect_body);
10475 }
10476 else
10477 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10478 false, &n_perms);
10479 }
10480
10481 if (costing_p)
10482 {
10483 if (n_adjacent_loads > 0)
10484 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10485 alignment_support_scheme, misalignment, add_realign_cost: false,
10486 inside_cost: &inside_cost, prologue_cost: nullptr, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10487 record_prologue_costs: true);
10488 if (dump_enabled_p ())
10489 dump_printf_loc (MSG_NOTE, vect_location,
10490 "vect_model_load_cost: inside_cost = %u, "
10491 "prologue_cost = 0 .\n",
10492 inside_cost);
10493 }
10494
10495 return true;
10496 }
10497
10498 if (memory_access_type == VMAT_GATHER_SCATTER
10499 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10500 grouped_load = false;
10501
10502 if (grouped_load
10503 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10504 {
10505 if (grouped_load)
10506 {
10507 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10508 group_size = DR_GROUP_SIZE (first_stmt_info);
10509 }
10510 else
10511 {
10512 first_stmt_info = stmt_info;
10513 group_size = 1;
10514 }
10515 /* For SLP vectorization we directly vectorize a subchain
10516 without permutation. */
10517 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10518 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10519 /* For BB vectorization always use the first stmt to base
10520 the data ref pointer on. */
10521 if (bb_vinfo)
10522 first_stmt_info_for_drptr
10523 = vect_find_first_scalar_stmt_in_slp (slp_node);
10524
10525 /* Check if the chain of loads is already vectorized. */
10526 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10527 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10528 ??? But we can only do so if there is exactly one
10529 as we have no way to get at the rest. Leave the CSE
10530 opportunity alone.
10531 ??? With the group load eventually participating
10532 in multiple different permutations (having multiple
10533 slp nodes which refer to the same group) the CSE
10534 is even wrong code. See PR56270. */
10535 && !slp)
10536 {
10537 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10538 return true;
10539 }
10540 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10541 group_gap_adj = 0;
10542
10543 /* VEC_NUM is the number of vect stmts to be created for this group. */
10544 if (slp)
10545 {
10546 grouped_load = false;
10547 /* If an SLP permutation is from N elements to N elements,
10548 and if one vector holds a whole number of N, we can load
10549 the inputs to the permutation in the same way as an
10550 unpermuted sequence. In other cases we need to load the
10551 whole group, not only the number of vector stmts the
10552 permutation result fits in. */
10553 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10554 if (slp_perm
10555 && (group_size != scalar_lanes
10556 || !multiple_p (a: nunits, b: group_size)))
10557 {
10558 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10559 variable VF; see vect_transform_slp_perm_load. */
10560 unsigned int const_vf = vf.to_constant ();
10561 unsigned int const_nunits = nunits.to_constant ();
10562 vec_num = CEIL (group_size * const_vf, const_nunits);
10563 group_gap_adj = vf * group_size - nunits * vec_num;
10564 }
10565 else
10566 {
10567 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10568 group_gap_adj
10569 = group_size - scalar_lanes;
10570 }
10571 }
10572 else
10573 vec_num = group_size;
10574
10575 ref_type = get_group_alias_ptr_type (first_stmt_info);
10576 }
10577 else
10578 {
10579 first_stmt_info = stmt_info;
10580 first_dr_info = dr_info;
10581 group_size = vec_num = 1;
10582 group_gap_adj = 0;
10583 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10584 if (slp)
10585 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10586 }
10587
10588 gcc_assert (alignment_support_scheme);
10589 vec_loop_masks *loop_masks
10590 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10591 ? &LOOP_VINFO_MASKS (loop_vinfo)
10592 : NULL);
10593 vec_loop_lens *loop_lens
10594 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10595 ? &LOOP_VINFO_LENS (loop_vinfo)
10596 : NULL);
10597
10598 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10599 are some difference here. We cannot enable both the lens and masks
10600 during transform but it is allowed during analysis.
10601 Shouldn't go with length-based approach if fully masked. */
10602 if (cost_vec == NULL)
10603 /* The cost_vec is NULL during transfrom. */
10604 gcc_assert ((!loop_lens || !loop_masks));
10605
10606 /* Targets with store-lane instructions must not require explicit
10607 realignment. vect_supportable_dr_alignment always returns either
10608 dr_aligned or dr_unaligned_supported for masked operations. */
10609 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10610 && !mask
10611 && !loop_masks)
10612 || alignment_support_scheme == dr_aligned
10613 || alignment_support_scheme == dr_unaligned_supported);
10614
10615 /* In case the vectorization factor (VF) is bigger than the number
10616 of elements that we can fit in a vectype (nunits), we have to generate
10617 more than one vector stmt - i.e - we need to "unroll" the
10618 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10619 from one copy of the vector stmt to the next, in the field
10620 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10621 stages to find the correct vector defs to be used when vectorizing
10622 stmts that use the defs of the current stmt. The example below
10623 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10624 need to create 4 vectorized stmts):
10625
10626 before vectorization:
10627 RELATED_STMT VEC_STMT
10628 S1: x = memref - -
10629 S2: z = x + 1 - -
10630
10631 step 1: vectorize stmt S1:
10632 We first create the vector stmt VS1_0, and, as usual, record a
10633 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10634 Next, we create the vector stmt VS1_1, and record a pointer to
10635 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10636 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10637 stmts and pointers:
10638 RELATED_STMT VEC_STMT
10639 VS1_0: vx0 = memref0 VS1_1 -
10640 VS1_1: vx1 = memref1 VS1_2 -
10641 VS1_2: vx2 = memref2 VS1_3 -
10642 VS1_3: vx3 = memref3 - -
10643 S1: x = load - VS1_0
10644 S2: z = x + 1 - -
10645 */
10646
10647 /* In case of interleaving (non-unit grouped access):
10648
10649 S1: x2 = &base + 2
10650 S2: x0 = &base
10651 S3: x1 = &base + 1
10652 S4: x3 = &base + 3
10653
10654 Vectorized loads are created in the order of memory accesses
10655 starting from the access of the first stmt of the chain:
10656
10657 VS1: vx0 = &base
10658 VS2: vx1 = &base + vec_size*1
10659 VS3: vx3 = &base + vec_size*2
10660 VS4: vx4 = &base + vec_size*3
10661
10662 Then permutation statements are generated:
10663
10664 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10665 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10666 ...
10667
10668 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10669 (the order of the data-refs in the output of vect_permute_load_chain
10670 corresponds to the order of scalar stmts in the interleaving chain - see
10671 the documentation of vect_permute_load_chain()).
10672 The generation of permutation stmts and recording them in
10673 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10674
10675 In case of both multiple types and interleaving, the vector loads and
10676 permutation stmts above are created for every copy. The result vector
10677 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10678 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10679
10680 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10681 on a target that supports unaligned accesses (dr_unaligned_supported)
10682 we generate the following code:
10683 p = initial_addr;
10684 indx = 0;
10685 loop {
10686 p = p + indx * vectype_size;
10687 vec_dest = *(p);
10688 indx = indx + 1;
10689 }
10690
10691 Otherwise, the data reference is potentially unaligned on a target that
10692 does not support unaligned accesses (dr_explicit_realign_optimized) -
10693 then generate the following code, in which the data in each iteration is
10694 obtained by two vector loads, one from the previous iteration, and one
10695 from the current iteration:
10696 p1 = initial_addr;
10697 msq_init = *(floor(p1))
10698 p2 = initial_addr + VS - 1;
10699 realignment_token = call target_builtin;
10700 indx = 0;
10701 loop {
10702 p2 = p2 + indx * vectype_size
10703 lsq = *(floor(p2))
10704 vec_dest = realign_load (msq, lsq, realignment_token)
10705 indx = indx + 1;
10706 msq = lsq;
10707 } */
10708
10709 /* If the misalignment remains the same throughout the execution of the
10710 loop, we can create the init_addr and permutation mask at the loop
10711 preheader. Otherwise, it needs to be created inside the loop.
10712 This can only occur when vectorizing memory accesses in the inner-loop
10713 nested within an outer-loop that is being vectorized. */
10714
10715 if (nested_in_vect_loop
10716 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10717 b: GET_MODE_SIZE (TYPE_MODE (vectype))))
10718 {
10719 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10720 compute_in_loop = true;
10721 }
10722
10723 bool diff_first_stmt_info
10724 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10725
10726 tree offset = NULL_TREE;
10727 if ((alignment_support_scheme == dr_explicit_realign_optimized
10728 || alignment_support_scheme == dr_explicit_realign)
10729 && !compute_in_loop)
10730 {
10731 /* If we have different first_stmt_info, we can't set up realignment
10732 here, since we can't guarantee first_stmt_info DR has been
10733 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10734 distance from first_stmt_info DR instead as below. */
10735 if (!costing_p)
10736 {
10737 if (!diff_first_stmt_info)
10738 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10739 &realignment_token,
10740 alignment_support_scheme, NULL_TREE,
10741 &at_loop);
10742 if (alignment_support_scheme == dr_explicit_realign_optimized)
10743 {
10744 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10745 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10746 size_one_node);
10747 gcc_assert (!first_stmt_info_for_drptr);
10748 }
10749 }
10750 }
10751 else
10752 at_loop = loop;
10753
10754 if (!known_eq (poffset, 0))
10755 offset = (offset
10756 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10757 : size_int (poffset));
10758
10759 tree bump;
10760 tree vec_offset = NULL_TREE;
10761 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10762 {
10763 aggr_type = NULL_TREE;
10764 bump = NULL_TREE;
10765 }
10766 else if (memory_access_type == VMAT_GATHER_SCATTER)
10767 {
10768 aggr_type = elem_type;
10769 if (!costing_p)
10770 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
10771 dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
10772 }
10773 else
10774 {
10775 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10776 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10777 else
10778 aggr_type = vectype;
10779 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10780 memory_access_type, loop_lens);
10781 }
10782
10783 auto_vec<tree> vec_offsets;
10784 auto_vec<tree> vec_masks;
10785 if (mask && !costing_p)
10786 {
10787 if (slp_node)
10788 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10789 &vec_masks);
10790 else
10791 vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies, op: mask,
10792 vec_oprnds: &vec_masks, vectype: mask_vectype);
10793 }
10794
10795 tree vec_mask = NULL_TREE;
10796 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10797 {
10798 gcc_assert (alignment_support_scheme == dr_aligned
10799 || alignment_support_scheme == dr_unaligned_supported);
10800 gcc_assert (grouped_load && !slp);
10801
10802 unsigned int inside_cost = 0, prologue_cost = 0;
10803 /* For costing some adjacent vector loads, we'd like to cost with
10804 the total number of them once instead of cost each one by one. */
10805 unsigned int n_adjacent_loads = 0;
10806 for (j = 0; j < ncopies; j++)
10807 {
10808 if (costing_p)
10809 {
10810 /* An IFN_LOAD_LANES will load all its vector results,
10811 regardless of which ones we actually need. Account
10812 for the cost of unused results. */
10813 if (first_stmt_info == stmt_info)
10814 {
10815 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10816 stmt_vec_info next_stmt_info = first_stmt_info;
10817 do
10818 {
10819 gaps -= 1;
10820 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10821 }
10822 while (next_stmt_info);
10823 if (gaps)
10824 {
10825 if (dump_enabled_p ())
10826 dump_printf_loc (MSG_NOTE, vect_location,
10827 "vect_model_load_cost: %d "
10828 "unused vectors.\n",
10829 gaps);
10830 vect_get_load_cost (vinfo, stmt_info, ncopies: gaps,
10831 alignment_support_scheme,
10832 misalignment, add_realign_cost: false, inside_cost: &inside_cost,
10833 prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10834 record_prologue_costs: true);
10835 }
10836 }
10837 n_adjacent_loads++;
10838 continue;
10839 }
10840
10841 /* 1. Create the vector or array pointer update chain. */
10842 if (j == 0)
10843 dataref_ptr
10844 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10845 at_loop, offset, &dummy, gsi,
10846 &ptr_incr, false, bump);
10847 else
10848 {
10849 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10850 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10851 stmt_info, bump);
10852 }
10853 if (mask)
10854 vec_mask = vec_masks[j];
10855
10856 tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
10857
10858 tree final_mask = NULL_TREE;
10859 tree final_len = NULL_TREE;
10860 tree bias = NULL_TREE;
10861 if (loop_masks)
10862 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10863 ncopies, vectype, j);
10864 if (vec_mask)
10865 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
10866 vec_mask, gsi);
10867
10868 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10869 {
10870 if (loop_lens)
10871 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10872 ncopies, vectype, j, 1);
10873 else
10874 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10875 signed char biasval
10876 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10877 bias = build_int_cst (intQI_type_node, biasval);
10878 if (!final_mask)
10879 {
10880 mask_vectype = truth_type_for (vectype);
10881 final_mask = build_minus_one_cst (mask_vectype);
10882 }
10883 }
10884
10885 gcall *call;
10886 if (final_len && final_mask)
10887 {
10888 /* Emit:
10889 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10890 VEC_MASK, LEN, BIAS). */
10891 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10892 tree alias_ptr = build_int_cst (ref_type, align);
10893 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10894 dataref_ptr, alias_ptr,
10895 final_mask, final_len, bias);
10896 }
10897 else if (final_mask)
10898 {
10899 /* Emit:
10900 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10901 VEC_MASK). */
10902 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10903 tree alias_ptr = build_int_cst (ref_type, align);
10904 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10905 dataref_ptr, alias_ptr,
10906 final_mask);
10907 }
10908 else
10909 {
10910 /* Emit:
10911 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10912 data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
10913 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10914 }
10915 gimple_call_set_lhs (gs: call, lhs: vec_array);
10916 gimple_call_set_nothrow (s: call, nothrow_p: true);
10917 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
10918
10919 dr_chain.create (nelems: vec_num);
10920 /* Extract each vector into an SSA_NAME. */
10921 for (i = 0; i < vec_num; i++)
10922 {
10923 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10924 array: vec_array, n: i);
10925 dr_chain.quick_push (obj: new_temp);
10926 }
10927
10928 /* Record the mapping between SSA_NAMEs and statements. */
10929 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10930
10931 /* Record that VEC_ARRAY is now dead. */
10932 vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
10933
10934 dr_chain.release ();
10935
10936 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10937 }
10938
10939 if (costing_p)
10940 {
10941 if (n_adjacent_loads > 0)
10942 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10943 alignment_support_scheme, misalignment, add_realign_cost: false,
10944 inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec,
10945 body_cost_vec: cost_vec, record_prologue_costs: true);
10946 if (dump_enabled_p ())
10947 dump_printf_loc (MSG_NOTE, vect_location,
10948 "vect_model_load_cost: inside_cost = %u, "
10949 "prologue_cost = %u .\n",
10950 inside_cost, prologue_cost);
10951 }
10952
10953 return true;
10954 }
10955
10956 if (memory_access_type == VMAT_GATHER_SCATTER)
10957 {
10958 gcc_assert (alignment_support_scheme == dr_aligned
10959 || alignment_support_scheme == dr_unaligned_supported);
10960 gcc_assert (!grouped_load && !slp_perm);
10961
10962 unsigned int inside_cost = 0, prologue_cost = 0;
10963 for (j = 0; j < ncopies; j++)
10964 {
10965 /* 1. Create the vector or array pointer update chain. */
10966 if (j == 0 && !costing_p)
10967 {
10968 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10969 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10970 slp_node, gs_info: &gs_info, dataref_ptr: &dataref_ptr,
10971 vec_offset: &vec_offsets);
10972 else
10973 dataref_ptr
10974 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10975 at_loop, offset, &dummy, gsi,
10976 &ptr_incr, false, bump);
10977 }
10978 else if (!costing_p)
10979 {
10980 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10981 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10982 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10983 gsi, stmt_info, bump);
10984 }
10985
10986 gimple *new_stmt = NULL;
10987 for (i = 0; i < vec_num; i++)
10988 {
10989 tree final_mask = NULL_TREE;
10990 tree final_len = NULL_TREE;
10991 tree bias = NULL_TREE;
10992 if (!costing_p)
10993 {
10994 if (mask)
10995 vec_mask = vec_masks[vec_num * j + i];
10996 if (loop_masks)
10997 final_mask
10998 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10999 vec_num * ncopies, vectype,
11000 vec_num * j + i);
11001 if (vec_mask)
11002 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11003 loop_mask: final_mask, vec_mask, gsi);
11004
11005 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11006 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11007 gsi, stmt_info, bump);
11008 }
11009
11010 /* 2. Create the vector-load in the loop. */
11011 unsigned HOST_WIDE_INT align;
11012 if (gs_info.ifn != IFN_LAST)
11013 {
11014 if (costing_p)
11015 {
11016 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
11017 inside_cost
11018 = record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
11019 stmt_info, misalign: 0, where: vect_body);
11020 continue;
11021 }
11022 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11023 vec_offset = vec_offsets[vec_num * j + i];
11024 tree zero = build_zero_cst (vectype);
11025 tree scale = size_int (gs_info.scale);
11026
11027 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
11028 {
11029 if (loop_lens)
11030 final_len
11031 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11032 vec_num * ncopies, vectype,
11033 vec_num * j + i, 1);
11034 else
11035 final_len
11036 = build_int_cst (sizetype,
11037 TYPE_VECTOR_SUBPARTS (node: vectype));
11038 signed char biasval
11039 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11040 bias = build_int_cst (intQI_type_node, biasval);
11041 if (!final_mask)
11042 {
11043 mask_vectype = truth_type_for (vectype);
11044 final_mask = build_minus_one_cst (mask_vectype);
11045 }
11046 }
11047
11048 gcall *call;
11049 if (final_len && final_mask)
11050 call
11051 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11052 dataref_ptr, vec_offset,
11053 scale, zero, final_mask,
11054 final_len, bias);
11055 else if (final_mask)
11056 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11057 dataref_ptr, vec_offset,
11058 scale, zero, final_mask);
11059 else
11060 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11061 dataref_ptr, vec_offset,
11062 scale, zero);
11063 gimple_call_set_nothrow (s: call, nothrow_p: true);
11064 new_stmt = call;
11065 data_ref = NULL_TREE;
11066 }
11067 else if (gs_info.decl)
11068 {
11069 /* The builtin decls path for gather is legacy, x86 only. */
11070 gcc_assert (!final_len && nunits.is_constant ());
11071 if (costing_p)
11072 {
11073 unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
11074 inside_cost
11075 = record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
11076 stmt_info, misalign: 0, where: vect_body);
11077 continue;
11078 }
11079 poly_uint64 offset_nunits
11080 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
11081 if (known_eq (nunits, offset_nunits))
11082 {
11083 new_stmt = vect_build_one_gather_load_call
11084 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11085 ptr: dataref_ptr, offset: vec_offsets[vec_num * j + i],
11086 mask: final_mask);
11087 data_ref = NULL_TREE;
11088 }
11089 else if (known_eq (nunits, offset_nunits * 2))
11090 {
11091 /* We have a offset vector with half the number of
11092 lanes but the builtins will produce full vectype
11093 data with just the lower lanes filled. */
11094 new_stmt = vect_build_one_gather_load_call
11095 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11096 ptr: dataref_ptr, offset: vec_offsets[2 * vec_num * j + 2 * i],
11097 mask: final_mask);
11098 tree low = make_ssa_name (var: vectype);
11099 gimple_set_lhs (new_stmt, low);
11100 vect_finish_stmt_generation (vinfo, stmt_info,
11101 vec_stmt: new_stmt, gsi);
11102
11103 /* now put upper half of final_mask in final_mask low. */
11104 if (final_mask
11105 && !SCALAR_INT_MODE_P
11106 (TYPE_MODE (TREE_TYPE (final_mask))))
11107 {
11108 int count = nunits.to_constant ();
11109 vec_perm_builder sel (count, count, 1);
11110 sel.quick_grow (len: count);
11111 for (int i = 0; i < count; ++i)
11112 sel[i] = i | (count / 2);
11113 vec_perm_indices indices (sel, 2, count);
11114 tree perm_mask = vect_gen_perm_mask_checked
11115 (TREE_TYPE (final_mask), sel: indices);
11116 new_stmt = gimple_build_assign (NULL_TREE,
11117 VEC_PERM_EXPR,
11118 final_mask,
11119 final_mask,
11120 perm_mask);
11121 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11122 gimple_set_lhs (new_stmt, final_mask);
11123 vect_finish_stmt_generation (vinfo, stmt_info,
11124 vec_stmt: new_stmt, gsi);
11125 }
11126 else if (final_mask)
11127 {
11128 new_stmt = gimple_build_assign (NULL_TREE,
11129 VEC_UNPACK_HI_EXPR,
11130 final_mask);
11131 final_mask = make_ssa_name
11132 (var: truth_type_for (gs_info.offset_vectype));
11133 gimple_set_lhs (new_stmt, final_mask);
11134 vect_finish_stmt_generation (vinfo, stmt_info,
11135 vec_stmt: new_stmt, gsi);
11136 }
11137
11138 new_stmt = vect_build_one_gather_load_call
11139 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11140 ptr: dataref_ptr,
11141 offset: vec_offsets[2 * vec_num * j + 2 * i + 1],
11142 mask: final_mask);
11143 tree high = make_ssa_name (var: vectype);
11144 gimple_set_lhs (new_stmt, high);
11145 vect_finish_stmt_generation (vinfo, stmt_info,
11146 vec_stmt: new_stmt, gsi);
11147
11148 /* compose low + high. */
11149 int count = nunits.to_constant ();
11150 vec_perm_builder sel (count, count, 1);
11151 sel.quick_grow (len: count);
11152 for (int i = 0; i < count; ++i)
11153 sel[i] = i < count / 2 ? i : i + count / 2;
11154 vec_perm_indices indices (sel, 2, count);
11155 tree perm_mask
11156 = vect_gen_perm_mask_checked (vectype, sel: indices);
11157 new_stmt = gimple_build_assign (NULL_TREE,
11158 VEC_PERM_EXPR,
11159 low, high, perm_mask);
11160 data_ref = NULL_TREE;
11161 }
11162 else if (known_eq (nunits * 2, offset_nunits))
11163 {
11164 /* We have a offset vector with double the number of
11165 lanes. Select the low/high part accordingly. */
11166 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11167 if ((vec_num * j + i) & 1)
11168 {
11169 int count = offset_nunits.to_constant ();
11170 vec_perm_builder sel (count, count, 1);
11171 sel.quick_grow (len: count);
11172 for (int i = 0; i < count; ++i)
11173 sel[i] = i | (count / 2);
11174 vec_perm_indices indices (sel, 2, count);
11175 tree perm_mask = vect_gen_perm_mask_checked
11176 (TREE_TYPE (vec_offset), sel: indices);
11177 new_stmt = gimple_build_assign (NULL_TREE,
11178 VEC_PERM_EXPR,
11179 vec_offset,
11180 vec_offset,
11181 perm_mask);
11182 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11183 gimple_set_lhs (new_stmt, vec_offset);
11184 vect_finish_stmt_generation (vinfo, stmt_info,
11185 vec_stmt: new_stmt, gsi);
11186 }
11187 new_stmt = vect_build_one_gather_load_call
11188 (vinfo, stmt_info, gsi, gs_info: &gs_info,
11189 ptr: dataref_ptr, offset: vec_offset, mask: final_mask);
11190 data_ref = NULL_TREE;
11191 }
11192 else
11193 gcc_unreachable ();
11194 }
11195 else
11196 {
11197 /* Emulated gather-scatter. */
11198 gcc_assert (!final_mask);
11199 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11200 if (costing_p)
11201 {
11202 /* For emulated gathers N offset vector element
11203 offset add is consumed by the load). */
11204 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits,
11205 kind: vec_to_scalar, stmt_info,
11206 misalign: 0, where: vect_body);
11207 /* N scalar loads plus gathering them into a
11208 vector. */
11209 inside_cost
11210 = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits, kind: scalar_load,
11211 stmt_info, misalign: 0, where: vect_body);
11212 inside_cost
11213 = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_construct,
11214 stmt_info, misalign: 0, where: vect_body);
11215 continue;
11216 }
11217 unsigned HOST_WIDE_INT const_offset_nunits
11218 = TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype)
11219 .to_constant ();
11220 vec<constructor_elt, va_gc> *ctor_elts;
11221 vec_alloc (v&: ctor_elts, nelems: const_nunits);
11222 gimple_seq stmts = NULL;
11223 /* We support offset vectors with more elements
11224 than the data vector for now. */
11225 unsigned HOST_WIDE_INT factor
11226 = const_offset_nunits / const_nunits;
11227 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11228 unsigned elt_offset
11229 = ((vec_num * j + i) % factor) * const_nunits;
11230 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11231 tree scale = size_int (gs_info.scale);
11232 align = get_object_alignment (DR_REF (first_dr_info->dr));
11233 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11234 for (unsigned k = 0; k < const_nunits; ++k)
11235 {
11236 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11237 bitsize_int (k + elt_offset));
11238 tree idx
11239 = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
11240 ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
11241 idx = gimple_convert (seq: &stmts, sizetype, op: idx);
11242 idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype, ops: idx,
11243 ops: scale);
11244 tree ptr = gimple_build (seq: &stmts, code: PLUS_EXPR,
11245 TREE_TYPE (dataref_ptr),
11246 ops: dataref_ptr, ops: idx);
11247 ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
11248 tree elt = make_ssa_name (TREE_TYPE (vectype));
11249 tree ref = build2 (MEM_REF, ltype, ptr,
11250 build_int_cst (ref_type, 0));
11251 new_stmt = gimple_build_assign (elt, ref);
11252 gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi)));
11253 gimple_seq_add_stmt (&stmts, new_stmt);
11254 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11255 }
11256 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11257 new_stmt = gimple_build_assign (
11258 NULL_TREE, build_constructor (vectype, ctor_elts));
11259 data_ref = NULL_TREE;
11260 }
11261
11262 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11263 /* DATA_REF is null if we've already built the statement. */
11264 if (data_ref)
11265 {
11266 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11267 new_stmt = gimple_build_assign (vec_dest, data_ref);
11268 }
11269 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11270 gimple_set_lhs (new_stmt, new_temp);
11271 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11272
11273 /* Store vector loads in the corresponding SLP_NODE. */
11274 if (slp)
11275 slp_node->push_vec_def (def: new_stmt);
11276 }
11277
11278 if (!slp && !costing_p)
11279 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11280 }
11281
11282 if (!slp && !costing_p)
11283 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11284
11285 if (costing_p && dump_enabled_p ())
11286 dump_printf_loc (MSG_NOTE, vect_location,
11287 "vect_model_load_cost: inside_cost = %u, "
11288 "prologue_cost = %u .\n",
11289 inside_cost, prologue_cost);
11290 return true;
11291 }
11292
11293 poly_uint64 group_elt = 0;
11294 unsigned int inside_cost = 0, prologue_cost = 0;
11295 /* For costing some adjacent vector loads, we'd like to cost with
11296 the total number of them once instead of cost each one by one. */
11297 unsigned int n_adjacent_loads = 0;
11298 for (j = 0; j < ncopies; j++)
11299 {
11300 /* 1. Create the vector or array pointer update chain. */
11301 if (j == 0 && !costing_p)
11302 {
11303 bool simd_lane_access_p
11304 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11305 if (simd_lane_access_p
11306 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11307 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11308 && integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
11309 && integer_zerop (DR_INIT (first_dr_info->dr))
11310 && alias_sets_conflict_p (get_alias_set (aggr_type),
11311 get_alias_set (TREE_TYPE (ref_type)))
11312 && (alignment_support_scheme == dr_aligned
11313 || alignment_support_scheme == dr_unaligned_supported))
11314 {
11315 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11316 dataref_offset = build_int_cst (ref_type, 0);
11317 }
11318 else if (diff_first_stmt_info)
11319 {
11320 dataref_ptr
11321 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11322 aggr_type, at_loop, offset, &dummy,
11323 gsi, &ptr_incr, simd_lane_access_p,
11324 bump);
11325 /* Adjust the pointer by the difference to first_stmt. */
11326 data_reference_p ptrdr
11327 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11328 tree diff
11329 = fold_convert (sizetype,
11330 size_binop (MINUS_EXPR,
11331 DR_INIT (first_dr_info->dr),
11332 DR_INIT (ptrdr)));
11333 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11334 stmt_info, diff);
11335 if (alignment_support_scheme == dr_explicit_realign)
11336 {
11337 msq = vect_setup_realignment (vinfo,
11338 first_stmt_info_for_drptr, gsi,
11339 &realignment_token,
11340 alignment_support_scheme,
11341 dataref_ptr, &at_loop);
11342 gcc_assert (!compute_in_loop);
11343 }
11344 }
11345 else
11346 dataref_ptr
11347 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11348 at_loop,
11349 offset, &dummy, gsi, &ptr_incr,
11350 simd_lane_access_p, bump);
11351 }
11352 else if (!costing_p)
11353 {
11354 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11355 if (dataref_offset)
11356 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11357 bump);
11358 else
11359 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11360 stmt_info, bump);
11361 }
11362
11363 if (grouped_load || slp_perm)
11364 dr_chain.create (nelems: vec_num);
11365
11366 gimple *new_stmt = NULL;
11367 for (i = 0; i < vec_num; i++)
11368 {
11369 tree final_mask = NULL_TREE;
11370 tree final_len = NULL_TREE;
11371 tree bias = NULL_TREE;
11372 if (!costing_p)
11373 {
11374 if (mask)
11375 vec_mask = vec_masks[vec_num * j + i];
11376 if (loop_masks)
11377 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11378 vec_num * ncopies, vectype,
11379 vec_num * j + i);
11380 if (vec_mask)
11381 final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11382 loop_mask: final_mask, vec_mask, gsi);
11383
11384 if (i > 0)
11385 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11386 gsi, stmt_info, bump);
11387 }
11388
11389 /* 2. Create the vector-load in the loop. */
11390 switch (alignment_support_scheme)
11391 {
11392 case dr_aligned:
11393 case dr_unaligned_supported:
11394 {
11395 if (costing_p)
11396 break;
11397
11398 unsigned int misalign;
11399 unsigned HOST_WIDE_INT align;
11400 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11401 if (alignment_support_scheme == dr_aligned)
11402 misalign = 0;
11403 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11404 {
11405 align
11406 = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
11407 misalign = 0;
11408 }
11409 else
11410 misalign = misalignment;
11411 if (dataref_offset == NULL_TREE
11412 && TREE_CODE (dataref_ptr) == SSA_NAME)
11413 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11414 misalign);
11415 align = least_bit_hwi (x: misalign | align);
11416
11417 /* Compute IFN when LOOP_LENS or final_mask valid. */
11418 machine_mode vmode = TYPE_MODE (vectype);
11419 machine_mode new_vmode = vmode;
11420 internal_fn partial_ifn = IFN_LAST;
11421 if (loop_lens)
11422 {
11423 opt_machine_mode new_ovmode
11424 = get_len_load_store_mode (vmode, true, &partial_ifn);
11425 new_vmode = new_ovmode.require ();
11426 unsigned factor
11427 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11428 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11429 vec_num * ncopies, vectype,
11430 vec_num * j + i, factor);
11431 }
11432 else if (final_mask)
11433 {
11434 if (!can_vec_mask_load_store_p (
11435 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11436 &partial_ifn))
11437 gcc_unreachable ();
11438 }
11439
11440 if (partial_ifn == IFN_MASK_LEN_LOAD)
11441 {
11442 if (!final_len)
11443 {
11444 /* Pass VF value to 'len' argument of
11445 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11446 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11447 }
11448 if (!final_mask)
11449 {
11450 /* Pass all ones value to 'mask' argument of
11451 MASK_LEN_LOAD if final_mask is invalid. */
11452 mask_vectype = truth_type_for (vectype);
11453 final_mask = build_minus_one_cst (mask_vectype);
11454 }
11455 }
11456 if (final_len)
11457 {
11458 signed char biasval
11459 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11460
11461 bias = build_int_cst (intQI_type_node, biasval);
11462 }
11463
11464 if (final_len)
11465 {
11466 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11467 gcall *call;
11468 if (partial_ifn == IFN_MASK_LEN_LOAD)
11469 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11470 dataref_ptr, ptr,
11471 final_mask, final_len,
11472 bias);
11473 else
11474 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11475 dataref_ptr, ptr,
11476 final_len, bias);
11477 gimple_call_set_nothrow (s: call, nothrow_p: true);
11478 new_stmt = call;
11479 data_ref = NULL_TREE;
11480
11481 /* Need conversion if it's wrapped with VnQI. */
11482 if (vmode != new_vmode)
11483 {
11484 tree new_vtype = build_vector_type_for_mode (
11485 unsigned_intQI_type_node, new_vmode);
11486 tree var
11487 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11488 gimple_set_lhs (call, var);
11489 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call,
11490 gsi);
11491 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11492 new_stmt = gimple_build_assign (vec_dest,
11493 VIEW_CONVERT_EXPR, op);
11494 }
11495 }
11496 else if (final_mask)
11497 {
11498 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11499 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11500 dataref_ptr, ptr,
11501 final_mask);
11502 gimple_call_set_nothrow (s: call, nothrow_p: true);
11503 new_stmt = call;
11504 data_ref = NULL_TREE;
11505 }
11506 else
11507 {
11508 tree ltype = vectype;
11509 tree new_vtype = NULL_TREE;
11510 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11511 unsigned int vect_align
11512 = vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype);
11513 unsigned int scalar_dr_size
11514 = vect_get_scalar_dr_size (dr_info: first_dr_info);
11515 /* If there's no peeling for gaps but we have a gap
11516 with slp loads then load the lower half of the
11517 vector only. See get_group_load_store_type for
11518 when we apply this optimization. */
11519 if (slp
11520 && loop_vinfo
11521 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11522 && known_eq (nunits, (group_size - gap) * 2)
11523 && known_eq (nunits, group_size)
11524 && gap >= (vect_align / scalar_dr_size))
11525 {
11526 tree half_vtype;
11527 new_vtype
11528 = vector_vector_composition_type (vtype: vectype, nelts: 2,
11529 ptype: &half_vtype);
11530 if (new_vtype != NULL_TREE)
11531 ltype = half_vtype;
11532 }
11533 /* Try to use a single smaller load when we are about
11534 to load excess elements compared to the unrolled
11535 scalar loop.
11536 ??? This should cover the above case as well. */
11537 else if (known_gt ((vec_num * j + i + 1) * nunits,
11538 (group_size * vf - gap)))
11539 {
11540 if (known_ge ((vec_num * j + i + 1) * nunits
11541 - (group_size * vf - gap), nunits))
11542 /* DR will be unused. */
11543 ltype = NULL_TREE;
11544 else if (known_ge (vect_align,
11545 tree_to_poly_uint64
11546 (TYPE_SIZE_UNIT (vectype))))
11547 /* Aligned access to excess elements is OK if
11548 at least one element is accessed in the
11549 scalar loop. */
11550 ;
11551 else
11552 {
11553 auto remain
11554 = ((group_size * vf - gap)
11555 - (vec_num * j + i) * nunits);
11556 /* remain should now be > 0 and < nunits. */
11557 unsigned num;
11558 if (constant_multiple_p (a: nunits, b: remain, multiple: &num))
11559 {
11560 tree ptype;
11561 new_vtype
11562 = vector_vector_composition_type (vtype: vectype,
11563 nelts: num,
11564 ptype: &ptype);
11565 if (new_vtype)
11566 ltype = ptype;
11567 }
11568 /* Else use multiple loads or a masked load? */
11569 }
11570 }
11571 tree offset
11572 = (dataref_offset ? dataref_offset
11573 : build_int_cst (ref_type, 0));
11574 if (!ltype)
11575 ;
11576 else if (ltype != vectype
11577 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11578 {
11579 poly_uint64 gap_offset
11580 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11581 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11582 tree gapcst = build_int_cstu (type: ref_type, gap_offset);
11583 offset = size_binop (PLUS_EXPR, offset, gapcst);
11584 }
11585 if (ltype)
11586 {
11587 data_ref
11588 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11589 if (alignment_support_scheme == dr_aligned)
11590 ;
11591 else
11592 TREE_TYPE (data_ref)
11593 = build_aligned_type (TREE_TYPE (data_ref),
11594 align * BITS_PER_UNIT);
11595 }
11596 if (!ltype)
11597 data_ref = build_constructor (vectype, NULL);
11598 else if (ltype != vectype)
11599 {
11600 vect_copy_ref_info (data_ref,
11601 DR_REF (first_dr_info->dr));
11602 tree tem = make_ssa_name (var: ltype);
11603 new_stmt = gimple_build_assign (tem, data_ref);
11604 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
11605 gsi);
11606 data_ref = NULL;
11607 vec<constructor_elt, va_gc> *v;
11608 /* We've computed 'num' above to statically two
11609 or via constant_multiple_p. */
11610 unsigned num
11611 = (exact_div (a: tree_to_poly_uint64
11612 (TYPE_SIZE_UNIT (vectype)),
11613 b: tree_to_poly_uint64
11614 (TYPE_SIZE_UNIT (ltype)))
11615 .to_constant ());
11616 vec_alloc (v, nelems: num);
11617 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11618 {
11619 while (--num)
11620 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11621 build_zero_cst (ltype));
11622 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11623 }
11624 else
11625 {
11626 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11627 while (--num)
11628 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11629 build_zero_cst (ltype));
11630 }
11631 gcc_assert (new_vtype != NULL_TREE);
11632 if (new_vtype == vectype)
11633 new_stmt = gimple_build_assign (
11634 vec_dest, build_constructor (vectype, v));
11635 else
11636 {
11637 tree new_vname = make_ssa_name (var: new_vtype);
11638 new_stmt = gimple_build_assign (
11639 new_vname, build_constructor (new_vtype, v));
11640 vect_finish_stmt_generation (vinfo, stmt_info,
11641 vec_stmt: new_stmt, gsi);
11642 new_stmt = gimple_build_assign (
11643 vec_dest,
11644 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11645 }
11646 }
11647 }
11648 break;
11649 }
11650 case dr_explicit_realign:
11651 {
11652 if (costing_p)
11653 break;
11654 tree ptr, bump;
11655
11656 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11657
11658 if (compute_in_loop)
11659 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11660 &realignment_token,
11661 dr_explicit_realign,
11662 dataref_ptr, NULL);
11663
11664 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11665 ptr = copy_ssa_name (var: dataref_ptr);
11666 else
11667 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11668 // For explicit realign the target alignment should be
11669 // known at compile time.
11670 unsigned HOST_WIDE_INT align
11671 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11672 new_stmt = gimple_build_assign (
11673 ptr, BIT_AND_EXPR, dataref_ptr,
11674 build_int_cst (TREE_TYPE (dataref_ptr),
11675 -(HOST_WIDE_INT) align));
11676 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11677 data_ref
11678 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11679 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11680 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11681 new_stmt = gimple_build_assign (vec_dest, data_ref);
11682 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11683 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11684 gimple_move_vops (new_stmt, stmt_info->stmt);
11685 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11686 msq = new_temp;
11687
11688 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11689 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11690 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11691 bump);
11692 new_stmt = gimple_build_assign (
11693 NULL_TREE, BIT_AND_EXPR, ptr,
11694 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11695 if (TREE_CODE (ptr) == SSA_NAME)
11696 ptr = copy_ssa_name (var: ptr, stmt: new_stmt);
11697 else
11698 ptr = make_ssa_name (TREE_TYPE (ptr), stmt: new_stmt);
11699 gimple_assign_set_lhs (gs: new_stmt, lhs: ptr);
11700 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11701 data_ref
11702 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11703 break;
11704 }
11705 case dr_explicit_realign_optimized:
11706 {
11707 if (costing_p)
11708 break;
11709 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11710 new_temp = copy_ssa_name (var: dataref_ptr);
11711 else
11712 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11713 // We should only be doing this if we know the target
11714 // alignment at compile time.
11715 unsigned HOST_WIDE_INT align
11716 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11717 new_stmt = gimple_build_assign (
11718 new_temp, BIT_AND_EXPR, dataref_ptr,
11719 build_int_cst (TREE_TYPE (dataref_ptr),
11720 -(HOST_WIDE_INT) align));
11721 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11722 data_ref = build2 (MEM_REF, vectype, new_temp,
11723 build_int_cst (ref_type, 0));
11724 break;
11725 }
11726 default:
11727 gcc_unreachable ();
11728 }
11729
11730 /* One common place to cost the above vect load for different
11731 alignment support schemes. */
11732 if (costing_p)
11733 {
11734 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11735 only need to take care of the first stmt, whose
11736 stmt_info is first_stmt_info, vec_num iterating on it
11737 will cover the cost for the remaining, it's consistent
11738 with transforming. For the prologue cost for realign,
11739 we only need to count it once for the whole group. */
11740 bool first_stmt_info_p = first_stmt_info == stmt_info;
11741 bool add_realign_cost = first_stmt_info_p && i == 0;
11742 if (memory_access_type == VMAT_CONTIGUOUS
11743 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11744 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11745 && (!grouped_load || first_stmt_info_p)))
11746 {
11747 /* Leave realign cases alone to keep them simple. */
11748 if (alignment_support_scheme == dr_explicit_realign_optimized
11749 || alignment_support_scheme == dr_explicit_realign)
11750 vect_get_load_cost (vinfo, stmt_info, ncopies: 1,
11751 alignment_support_scheme, misalignment,
11752 add_realign_cost, inside_cost: &inside_cost,
11753 prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11754 record_prologue_costs: true);
11755 else
11756 n_adjacent_loads++;
11757 }
11758 }
11759 else
11760 {
11761 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11762 /* DATA_REF is null if we've already built the statement. */
11763 if (data_ref)
11764 {
11765 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11766 new_stmt = gimple_build_assign (vec_dest, data_ref);
11767 }
11768 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11769 gimple_set_lhs (new_stmt, new_temp);
11770 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11771 }
11772
11773 /* 3. Handle explicit realignment if necessary/supported.
11774 Create in loop:
11775 vec_dest = realign_load (msq, lsq, realignment_token) */
11776 if (!costing_p
11777 && (alignment_support_scheme == dr_explicit_realign_optimized
11778 || alignment_support_scheme == dr_explicit_realign))
11779 {
11780 lsq = gimple_assign_lhs (gs: new_stmt);
11781 if (!realignment_token)
11782 realignment_token = dataref_ptr;
11783 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11784 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11785 lsq, realignment_token);
11786 new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11787 gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11788 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11789
11790 if (alignment_support_scheme == dr_explicit_realign_optimized)
11791 {
11792 gcc_assert (phi);
11793 if (i == vec_num - 1 && j == ncopies - 1)
11794 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11795 UNKNOWN_LOCATION);
11796 msq = lsq;
11797 }
11798 }
11799
11800 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11801 {
11802 if (costing_p)
11803 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: 1, kind: vec_perm,
11804 stmt_info, misalign: 0, where: vect_body);
11805 else
11806 {
11807 tree perm_mask = perm_mask_for_reverse (vectype);
11808 new_temp = permute_vec_elements (vinfo, x: new_temp, y: new_temp,
11809 mask_vec: perm_mask, stmt_info, gsi);
11810 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11811 }
11812 }
11813
11814 /* Collect vector loads and later create their permutation in
11815 vect_transform_grouped_load (). */
11816 if (!costing_p && (grouped_load || slp_perm))
11817 dr_chain.quick_push (obj: new_temp);
11818
11819 /* Store vector loads in the corresponding SLP_NODE. */
11820 if (!costing_p && slp && !slp_perm)
11821 slp_node->push_vec_def (def: new_stmt);
11822
11823 /* With SLP permutation we load the gaps as well, without
11824 we need to skip the gaps after we manage to fully load
11825 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11826 group_elt += nunits;
11827 if (!costing_p
11828 && maybe_ne (a: group_gap_adj, b: 0U)
11829 && !slp_perm
11830 && known_eq (group_elt, group_size - group_gap_adj))
11831 {
11832 poly_wide_int bump_val
11833 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11834 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11835 == -1)
11836 bump_val = -bump_val;
11837 tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11838 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11839 stmt_info, bump);
11840 group_elt = 0;
11841 }
11842 }
11843 /* Bump the vector pointer to account for a gap or for excess
11844 elements loaded for a permuted SLP load. */
11845 if (!costing_p
11846 && maybe_ne (a: group_gap_adj, b: 0U)
11847 && slp_perm)
11848 {
11849 poly_wide_int bump_val
11850 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11851 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11852 bump_val = -bump_val;
11853 tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11854 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11855 stmt_info, bump);
11856 }
11857
11858 if (slp && !slp_perm)
11859 continue;
11860
11861 if (slp_perm)
11862 {
11863 unsigned n_perms;
11864 /* For SLP we know we've seen all possible uses of dr_chain so
11865 direct vect_transform_slp_perm_load to DCE the unused parts.
11866 ??? This is a hack to prevent compile-time issues as seen
11867 in PR101120 and friends. */
11868 if (costing_p)
11869 {
11870 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11871 true, &n_perms, nullptr);
11872 inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
11873 stmt_info, misalign: 0, where: vect_body);
11874 }
11875 else
11876 {
11877 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11878 gsi, vf, false, &n_perms,
11879 nullptr, true);
11880 gcc_assert (ok);
11881 }
11882 }
11883 else
11884 {
11885 if (grouped_load)
11886 {
11887 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11888 /* We assume that the cost of a single load-lanes instruction
11889 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11890 If a grouped access is instead being provided by a
11891 load-and-permute operation, include the cost of the
11892 permutes. */
11893 if (costing_p && first_stmt_info == stmt_info)
11894 {
11895 /* Uses an even and odd extract operations or shuffle
11896 operations for each needed permute. */
11897 int group_size = DR_GROUP_SIZE (first_stmt_info);
11898 int nstmts = ceil_log2 (x: group_size) * group_size;
11899 inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
11900 stmt_info, misalign: 0, where: vect_body);
11901
11902 if (dump_enabled_p ())
11903 dump_printf_loc (MSG_NOTE, vect_location,
11904 "vect_model_load_cost:"
11905 "strided group_size = %d .\n",
11906 group_size);
11907 }
11908 else if (!costing_p)
11909 {
11910 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11911 group_size, gsi);
11912 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11913 }
11914 }
11915 else if (!costing_p)
11916 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11917 }
11918 dr_chain.release ();
11919 }
11920 if (!slp && !costing_p)
11921 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11922
11923 if (costing_p)
11924 {
11925 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11926 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11927 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11928 if (n_adjacent_loads > 0)
11929 vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
11930 alignment_support_scheme, misalignment, add_realign_cost: false,
11931 inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11932 record_prologue_costs: true);
11933 if (dump_enabled_p ())
11934 dump_printf_loc (MSG_NOTE, vect_location,
11935 "vect_model_load_cost: inside_cost = %u, "
11936 "prologue_cost = %u .\n",
11937 inside_cost, prologue_cost);
11938 }
11939
11940 return true;
11941}
11942
11943/* Function vect_is_simple_cond.
11944
11945 Input:
11946 LOOP - the loop that is being vectorized.
11947 COND - Condition that is checked for simple use.
11948
11949 Output:
11950 *COMP_VECTYPE - the vector type for the comparison.
11951 *DTS - The def types for the arguments of the comparison
11952
11953 Returns whether a COND can be vectorized. Checks whether
11954 condition operands are supportable using vec_is_simple_use. */
11955
11956static bool
11957vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11958 slp_tree slp_node, tree *comp_vectype,
11959 enum vect_def_type *dts, tree vectype)
11960{
11961 tree lhs, rhs;
11962 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11963 slp_tree slp_op;
11964
11965 /* Mask case. */
11966 if (TREE_CODE (cond) == SSA_NAME
11967 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11968 {
11969 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11970 &slp_op, &dts[0], comp_vectype)
11971 || !*comp_vectype
11972 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11973 return false;
11974 return true;
11975 }
11976
11977 if (!COMPARISON_CLASS_P (cond))
11978 return false;
11979
11980 lhs = TREE_OPERAND (cond, 0);
11981 rhs = TREE_OPERAND (cond, 1);
11982
11983 if (TREE_CODE (lhs) == SSA_NAME)
11984 {
11985 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11986 &lhs, &slp_op, &dts[0], &vectype1))
11987 return false;
11988 }
11989 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11990 || TREE_CODE (lhs) == FIXED_CST)
11991 dts[0] = vect_constant_def;
11992 else
11993 return false;
11994
11995 if (TREE_CODE (rhs) == SSA_NAME)
11996 {
11997 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11998 &rhs, &slp_op, &dts[1], &vectype2))
11999 return false;
12000 }
12001 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
12002 || TREE_CODE (rhs) == FIXED_CST)
12003 dts[1] = vect_constant_def;
12004 else
12005 return false;
12006
12007 if (vectype1 && vectype2
12008 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12009 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12010 return false;
12011
12012 *comp_vectype = vectype1 ? vectype1 : vectype2;
12013 /* Invariant comparison. */
12014 if (! *comp_vectype)
12015 {
12016 tree scalar_type = TREE_TYPE (lhs);
12017 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12018 *comp_vectype = truth_type_for (vectype);
12019 else
12020 {
12021 /* If we can widen the comparison to match vectype do so. */
12022 if (INTEGRAL_TYPE_P (scalar_type)
12023 && !slp_node
12024 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
12025 TYPE_SIZE (TREE_TYPE (vectype))))
12026 scalar_type = build_nonstandard_integer_type
12027 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12028 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12029 slp_node);
12030 }
12031 }
12032
12033 return true;
12034}
12035
12036/* vectorizable_condition.
12037
12038 Check if STMT_INFO is conditional modify expression that can be vectorized.
12039 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12040 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12041 at GSI.
12042
12043 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12044
12045 Return true if STMT_INFO is vectorizable in this way. */
12046
12047static bool
12048vectorizable_condition (vec_info *vinfo,
12049 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12050 gimple **vec_stmt,
12051 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12052{
12053 tree scalar_dest = NULL_TREE;
12054 tree vec_dest = NULL_TREE;
12055 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12056 tree then_clause, else_clause;
12057 tree comp_vectype = NULL_TREE;
12058 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12059 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12060 tree vec_compare;
12061 tree new_temp;
12062 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12063 enum vect_def_type dts[4]
12064 = {vect_unknown_def_type, vect_unknown_def_type,
12065 vect_unknown_def_type, vect_unknown_def_type};
12066 int ndts = 4;
12067 int ncopies;
12068 int vec_num;
12069 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12070 int i;
12071 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12072 vec<tree> vec_oprnds0 = vNULL;
12073 vec<tree> vec_oprnds1 = vNULL;
12074 vec<tree> vec_oprnds2 = vNULL;
12075 vec<tree> vec_oprnds3 = vNULL;
12076 tree vec_cmp_type;
12077 bool masked = false;
12078
12079 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12080 return false;
12081
12082 /* Is vectorizable conditional operation? */
12083 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
12084 if (!stmt)
12085 return false;
12086
12087 code = gimple_assign_rhs_code (gs: stmt);
12088 if (code != COND_EXPR)
12089 return false;
12090
12091 stmt_vec_info reduc_info = NULL;
12092 int reduc_index = -1;
12093 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12094 bool for_reduction
12095 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12096 if (for_reduction)
12097 {
12098 if (slp_node)
12099 return false;
12100 reduc_info = info_for_reduction (vinfo, stmt_info);
12101 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12102 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12103 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12104 || reduc_index != -1);
12105 }
12106 else
12107 {
12108 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12109 return false;
12110 }
12111
12112 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12113 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12114
12115 if (slp_node)
12116 {
12117 ncopies = 1;
12118 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12119 }
12120 else
12121 {
12122 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12123 vec_num = 1;
12124 }
12125
12126 gcc_assert (ncopies >= 1);
12127 if (for_reduction && ncopies > 1)
12128 return false; /* FORNOW */
12129
12130 cond_expr = gimple_assign_rhs1 (gs: stmt);
12131
12132 if (!vect_is_simple_cond (cond: cond_expr, vinfo, stmt_info, slp_node,
12133 comp_vectype: &comp_vectype, dts: &dts[0], vectype)
12134 || !comp_vectype)
12135 return false;
12136
12137 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12138 slp_tree then_slp_node, else_slp_node;
12139 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12140 &then_clause, &then_slp_node, &dts[2], &vectype1))
12141 return false;
12142 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12143 &else_clause, &else_slp_node, &dts[3], &vectype2))
12144 return false;
12145
12146 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12147 return false;
12148
12149 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12150 return false;
12151
12152 masked = !COMPARISON_CLASS_P (cond_expr);
12153 vec_cmp_type = truth_type_for (comp_vectype);
12154
12155 if (vec_cmp_type == NULL_TREE)
12156 return false;
12157
12158 cond_code = TREE_CODE (cond_expr);
12159 if (!masked)
12160 {
12161 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12162 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12163 }
12164
12165 /* For conditional reductions, the "then" value needs to be the candidate
12166 value calculated by this iteration while the "else" value needs to be
12167 the result carried over from previous iterations. If the COND_EXPR
12168 is the other way around, we need to swap it. */
12169 bool must_invert_cmp_result = false;
12170 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12171 {
12172 if (masked)
12173 must_invert_cmp_result = true;
12174 else
12175 {
12176 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12177 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12178 if (new_code == ERROR_MARK)
12179 must_invert_cmp_result = true;
12180 else
12181 {
12182 cond_code = new_code;
12183 /* Make sure we don't accidentally use the old condition. */
12184 cond_expr = NULL_TREE;
12185 }
12186 }
12187 std::swap (a&: then_clause, b&: else_clause);
12188 }
12189
12190 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12191 {
12192 /* Boolean values may have another representation in vectors
12193 and therefore we prefer bit operations over comparison for
12194 them (which also works for scalar masks). We store opcodes
12195 to use in bitop1 and bitop2. Statement is vectorized as
12196 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12197 depending on bitop1 and bitop2 arity. */
12198 switch (cond_code)
12199 {
12200 case GT_EXPR:
12201 bitop1 = BIT_NOT_EXPR;
12202 bitop2 = BIT_AND_EXPR;
12203 break;
12204 case GE_EXPR:
12205 bitop1 = BIT_NOT_EXPR;
12206 bitop2 = BIT_IOR_EXPR;
12207 break;
12208 case LT_EXPR:
12209 bitop1 = BIT_NOT_EXPR;
12210 bitop2 = BIT_AND_EXPR;
12211 std::swap (a&: cond_expr0, b&: cond_expr1);
12212 break;
12213 case LE_EXPR:
12214 bitop1 = BIT_NOT_EXPR;
12215 bitop2 = BIT_IOR_EXPR;
12216 std::swap (a&: cond_expr0, b&: cond_expr1);
12217 break;
12218 case NE_EXPR:
12219 bitop1 = BIT_XOR_EXPR;
12220 break;
12221 case EQ_EXPR:
12222 bitop1 = BIT_XOR_EXPR;
12223 bitop2 = BIT_NOT_EXPR;
12224 break;
12225 default:
12226 return false;
12227 }
12228 cond_code = SSA_NAME;
12229 }
12230
12231 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12232 && reduction_type == EXTRACT_LAST_REDUCTION
12233 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12234 {
12235 if (dump_enabled_p ())
12236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12237 "reduction comparison operation not supported.\n");
12238 return false;
12239 }
12240
12241 if (!vec_stmt)
12242 {
12243 if (bitop1 != NOP_EXPR)
12244 {
12245 machine_mode mode = TYPE_MODE (comp_vectype);
12246 optab optab;
12247
12248 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12249 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12250 return false;
12251
12252 if (bitop2 != NOP_EXPR)
12253 {
12254 optab = optab_for_tree_code (bitop2, comp_vectype,
12255 optab_default);
12256 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12257 return false;
12258 }
12259 }
12260
12261 vect_cost_for_stmt kind = vector_stmt;
12262 if (reduction_type == EXTRACT_LAST_REDUCTION)
12263 /* Count one reduction-like operation per vector. */
12264 kind = vec_to_scalar;
12265 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12266 && (masked
12267 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12268 cond_code)
12269 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12270 ERROR_MARK))))
12271 return false;
12272
12273 if (slp_node
12274 && (!vect_maybe_update_slp_op_vectype
12275 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12276 || (op_adjust == 1
12277 && !vect_maybe_update_slp_op_vectype
12278 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12279 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12280 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12281 {
12282 if (dump_enabled_p ())
12283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12284 "incompatible vector types for invariants\n");
12285 return false;
12286 }
12287
12288 if (loop_vinfo && for_reduction
12289 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12290 {
12291 if (reduction_type == EXTRACT_LAST_REDUCTION)
12292 {
12293 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12294 vectype, OPTIMIZE_FOR_SPEED))
12295 vect_record_loop_len (loop_vinfo,
12296 &LOOP_VINFO_LENS (loop_vinfo),
12297 ncopies * vec_num, vectype, 1);
12298 else
12299 vect_record_loop_mask (loop_vinfo,
12300 &LOOP_VINFO_MASKS (loop_vinfo),
12301 ncopies * vec_num, vectype, NULL);
12302 }
12303 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12304 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12305 {
12306 if (dump_enabled_p ())
12307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12308 "conditional reduction prevents the use"
12309 " of partial vectors.\n");
12310 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12311 }
12312 }
12313
12314 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12315 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt: dts, ndts, node: slp_node,
12316 cost_vec, kind);
12317 return true;
12318 }
12319
12320 /* Transform. */
12321
12322 /* Handle def. */
12323 scalar_dest = gimple_assign_lhs (gs: stmt);
12324 if (reduction_type != EXTRACT_LAST_REDUCTION)
12325 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12326
12327 bool swap_cond_operands = false;
12328
12329 /* See whether another part of the vectorized code applies a loop
12330 mask to the condition, or to its inverse. */
12331
12332 vec_loop_masks *masks = NULL;
12333 vec_loop_lens *lens = NULL;
12334 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12335 {
12336 if (reduction_type == EXTRACT_LAST_REDUCTION)
12337 lens = &LOOP_VINFO_LENS (loop_vinfo);
12338 }
12339 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12340 {
12341 if (reduction_type == EXTRACT_LAST_REDUCTION)
12342 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12343 else
12344 {
12345 scalar_cond_masked_key cond (cond_expr, ncopies);
12346 if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12347 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12348 else
12349 {
12350 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12351 tree_code orig_code = cond.code;
12352 cond.code = invert_tree_comparison (cond.code, honor_nans);
12353 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12354 {
12355 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12356 cond_code = cond.code;
12357 swap_cond_operands = true;
12358 }
12359 else
12360 {
12361 /* Try the inverse of the current mask. We check if the
12362 inverse mask is live and if so we generate a negate of
12363 the current mask such that we still honor NaNs. */
12364 cond.inverted_p = true;
12365 cond.code = orig_code;
12366 if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12367 {
12368 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12369 cond_code = cond.code;
12370 swap_cond_operands = true;
12371 must_invert_cmp_result = true;
12372 }
12373 }
12374 }
12375 }
12376 }
12377
12378 /* Handle cond expr. */
12379 if (masked)
12380 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12381 op0: cond_expr, vectype0: comp_vectype, vec_oprnds0: &vec_oprnds0,
12382 op1: then_clause, vectype1: vectype, vec_oprnds1: &vec_oprnds2,
12383 op2: reduction_type != EXTRACT_LAST_REDUCTION
12384 ? else_clause : NULL, vectype2: vectype, vec_oprnds2: &vec_oprnds3);
12385 else
12386 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12387 op0: cond_expr0, vectype0: comp_vectype, vec_oprnds0: &vec_oprnds0,
12388 op1: cond_expr1, vectype1: comp_vectype, vec_oprnds1: &vec_oprnds1,
12389 op2: then_clause, vectype2: vectype, vec_oprnds2: &vec_oprnds2,
12390 op3: reduction_type != EXTRACT_LAST_REDUCTION
12391 ? else_clause : NULL, vectype3: vectype, vec_oprnds3: &vec_oprnds3);
12392
12393 /* Arguments are ready. Create the new vector stmt. */
12394 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12395 {
12396 vec_then_clause = vec_oprnds2[i];
12397 if (reduction_type != EXTRACT_LAST_REDUCTION)
12398 vec_else_clause = vec_oprnds3[i];
12399
12400 if (swap_cond_operands)
12401 std::swap (a&: vec_then_clause, b&: vec_else_clause);
12402
12403 if (masked)
12404 vec_compare = vec_cond_lhs;
12405 else
12406 {
12407 vec_cond_rhs = vec_oprnds1[i];
12408 if (bitop1 == NOP_EXPR)
12409 {
12410 gimple_seq stmts = NULL;
12411 vec_compare = gimple_build (seq: &stmts, code: cond_code, type: vec_cmp_type,
12412 ops: vec_cond_lhs, ops: vec_cond_rhs);
12413 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12414 }
12415 else
12416 {
12417 new_temp = make_ssa_name (var: vec_cmp_type);
12418 gassign *new_stmt;
12419 if (bitop1 == BIT_NOT_EXPR)
12420 new_stmt = gimple_build_assign (new_temp, bitop1,
12421 vec_cond_rhs);
12422 else
12423 new_stmt
12424 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12425 vec_cond_rhs);
12426 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12427 if (bitop2 == NOP_EXPR)
12428 vec_compare = new_temp;
12429 else if (bitop2 == BIT_NOT_EXPR
12430 && reduction_type != EXTRACT_LAST_REDUCTION)
12431 {
12432 /* Instead of doing ~x ? y : z do x ? z : y. */
12433 vec_compare = new_temp;
12434 std::swap (a&: vec_then_clause, b&: vec_else_clause);
12435 }
12436 else
12437 {
12438 vec_compare = make_ssa_name (var: vec_cmp_type);
12439 if (bitop2 == BIT_NOT_EXPR)
12440 new_stmt
12441 = gimple_build_assign (vec_compare, bitop2, new_temp);
12442 else
12443 new_stmt
12444 = gimple_build_assign (vec_compare, bitop2,
12445 vec_cond_lhs, new_temp);
12446 vect_finish_stmt_generation (vinfo, stmt_info,
12447 vec_stmt: new_stmt, gsi);
12448 }
12449 }
12450 }
12451
12452 /* If we decided to apply a loop mask to the result of the vector
12453 comparison, AND the comparison with the mask now. Later passes
12454 should then be able to reuse the AND results between mulitple
12455 vector statements.
12456
12457 For example:
12458 for (int i = 0; i < 100; ++i)
12459 x[i] = y[i] ? z[i] : 10;
12460
12461 results in following optimized GIMPLE:
12462
12463 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12464 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12465 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12466 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12467 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12468 vect_iftmp.11_47, { 10, ... }>;
12469
12470 instead of using a masked and unmasked forms of
12471 vec != { 0, ... } (masked in the MASK_LOAD,
12472 unmasked in the VEC_COND_EXPR). */
12473
12474 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12475 in cases where that's necessary. */
12476
12477 tree len = NULL_TREE, bias = NULL_TREE;
12478 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12479 {
12480 if (!is_gimple_val (vec_compare))
12481 {
12482 tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12483 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12484 vec_compare);
12485 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12486 vec_compare = vec_compare_name;
12487 }
12488
12489 if (must_invert_cmp_result)
12490 {
12491 tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12492 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12493 BIT_NOT_EXPR,
12494 vec_compare);
12495 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12496 vec_compare = vec_compare_name;
12497 }
12498
12499 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12500 vectype, OPTIMIZE_FOR_SPEED))
12501 {
12502 if (lens)
12503 {
12504 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12505 vec_num * ncopies, vectype, i, 1);
12506 signed char biasval
12507 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12508 bias = build_int_cst (intQI_type_node, biasval);
12509 }
12510 else
12511 {
12512 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12513 bias = build_int_cst (intQI_type_node, 0);
12514 }
12515 }
12516 if (masks)
12517 {
12518 tree loop_mask
12519 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12520 vectype, i);
12521 tree tmp2 = make_ssa_name (var: vec_cmp_type);
12522 gassign *g
12523 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12524 loop_mask);
12525 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
12526 vec_compare = tmp2;
12527 }
12528 }
12529
12530 gimple *new_stmt;
12531 if (reduction_type == EXTRACT_LAST_REDUCTION)
12532 {
12533 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12534 tree lhs = gimple_get_lhs (old_stmt);
12535 if (len)
12536 new_stmt = gimple_build_call_internal
12537 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12538 vec_then_clause, len, bias);
12539 else
12540 new_stmt = gimple_build_call_internal
12541 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12542 vec_then_clause);
12543 gimple_call_set_lhs (gs: new_stmt, lhs);
12544 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12545 if (old_stmt == gsi_stmt (i: *gsi))
12546 vect_finish_replace_stmt (vinfo, stmt_info, vec_stmt: new_stmt);
12547 else
12548 {
12549 /* In this case we're moving the definition to later in the
12550 block. That doesn't matter because the only uses of the
12551 lhs are in phi statements. */
12552 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12553 gsi_remove (&old_gsi, true);
12554 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12555 }
12556 }
12557 else
12558 {
12559 new_temp = make_ssa_name (var: vec_dest);
12560 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12561 vec_then_clause, vec_else_clause);
12562 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12563 }
12564 if (slp_node)
12565 slp_node->push_vec_def (def: new_stmt);
12566 else
12567 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12568 }
12569
12570 if (!slp_node)
12571 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12572
12573 vec_oprnds0.release ();
12574 vec_oprnds1.release ();
12575 vec_oprnds2.release ();
12576 vec_oprnds3.release ();
12577
12578 return true;
12579}
12580
12581/* Helper of vectorizable_comparison.
12582
12583 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12584 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12585 comparison, put it in VEC_STMT, and insert it at GSI.
12586
12587 Return true if STMT_INFO is vectorizable in this way. */
12588
12589static bool
12590vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12591 stmt_vec_info stmt_info, tree_code code,
12592 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12593 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12594{
12595 tree lhs, rhs1, rhs2;
12596 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12597 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12598 tree new_temp;
12599 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12600 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12601 int ndts = 2;
12602 poly_uint64 nunits;
12603 int ncopies;
12604 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12605 int i;
12606 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12607 vec<tree> vec_oprnds0 = vNULL;
12608 vec<tree> vec_oprnds1 = vNULL;
12609 tree mask_type;
12610 tree mask = NULL_TREE;
12611
12612 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12613 return false;
12614
12615 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12616 return false;
12617
12618 mask_type = vectype;
12619 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
12620
12621 if (slp_node)
12622 ncopies = 1;
12623 else
12624 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12625
12626 gcc_assert (ncopies >= 1);
12627
12628 if (TREE_CODE_CLASS (code) != tcc_comparison)
12629 return false;
12630
12631 slp_tree slp_rhs1, slp_rhs2;
12632 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12633 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12634 return false;
12635
12636 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12637 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12638 return false;
12639
12640 if (vectype1 && vectype2
12641 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12642 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12643 return false;
12644
12645 vectype = vectype1 ? vectype1 : vectype2;
12646
12647 /* Invariant comparison. */
12648 if (!vectype)
12649 {
12650 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12651 vectype = mask_type;
12652 else
12653 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12654 slp_node);
12655 if (!vectype || maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
12656 return false;
12657 }
12658 else if (maybe_ne (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
12659 return false;
12660
12661 /* Can't compare mask and non-mask types. */
12662 if (vectype1 && vectype2
12663 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12664 return false;
12665
12666 /* Boolean values may have another representation in vectors
12667 and therefore we prefer bit operations over comparison for
12668 them (which also works for scalar masks). We store opcodes
12669 to use in bitop1 and bitop2. Statement is vectorized as
12670 BITOP2 (rhs1 BITOP1 rhs2) or
12671 rhs1 BITOP2 (BITOP1 rhs2)
12672 depending on bitop1 and bitop2 arity. */
12673 bool swap_p = false;
12674 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12675 {
12676 if (code == GT_EXPR)
12677 {
12678 bitop1 = BIT_NOT_EXPR;
12679 bitop2 = BIT_AND_EXPR;
12680 }
12681 else if (code == GE_EXPR)
12682 {
12683 bitop1 = BIT_NOT_EXPR;
12684 bitop2 = BIT_IOR_EXPR;
12685 }
12686 else if (code == LT_EXPR)
12687 {
12688 bitop1 = BIT_NOT_EXPR;
12689 bitop2 = BIT_AND_EXPR;
12690 swap_p = true;
12691 }
12692 else if (code == LE_EXPR)
12693 {
12694 bitop1 = BIT_NOT_EXPR;
12695 bitop2 = BIT_IOR_EXPR;
12696 swap_p = true;
12697 }
12698 else
12699 {
12700 bitop1 = BIT_XOR_EXPR;
12701 if (code == EQ_EXPR)
12702 bitop2 = BIT_NOT_EXPR;
12703 }
12704 }
12705
12706 if (!vec_stmt)
12707 {
12708 if (bitop1 == NOP_EXPR)
12709 {
12710 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12711 return false;
12712 }
12713 else
12714 {
12715 machine_mode mode = TYPE_MODE (vectype);
12716 optab optab;
12717
12718 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12719 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12720 return false;
12721
12722 if (bitop2 != NOP_EXPR)
12723 {
12724 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12725 if (!optab || optab_handler (op: optab, mode) == CODE_FOR_nothing)
12726 return false;
12727 }
12728 }
12729
12730 /* Put types on constant and invariant SLP children. */
12731 if (slp_node
12732 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12733 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12734 {
12735 if (dump_enabled_p ())
12736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12737 "incompatible vector types for invariants\n");
12738 return false;
12739 }
12740
12741 vect_model_simple_cost (vinfo, stmt_info,
12742 ncopies: ncopies * (1 + (bitop2 != NOP_EXPR)),
12743 dt: dts, ndts, node: slp_node, cost_vec);
12744 return true;
12745 }
12746
12747 /* Transform. */
12748
12749 /* Handle def. */
12750 lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12751 if (lhs)
12752 mask = vect_create_destination_var (lhs, mask_type);
12753
12754 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12755 op0: rhs1, vectype0: vectype, vec_oprnds0: &vec_oprnds0,
12756 op1: rhs2, vectype1: vectype, vec_oprnds1: &vec_oprnds1);
12757 if (swap_p)
12758 std::swap (a&: vec_oprnds0, b&: vec_oprnds1);
12759
12760 /* Arguments are ready. Create the new vector stmt. */
12761 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12762 {
12763 gimple *new_stmt;
12764 vec_rhs2 = vec_oprnds1[i];
12765
12766 if (lhs)
12767 new_temp = make_ssa_name (var: mask);
12768 else
12769 new_temp = make_temp_ssa_name (type: mask_type, NULL, name: "cmp");
12770 if (bitop1 == NOP_EXPR)
12771 {
12772 new_stmt = gimple_build_assign (new_temp, code,
12773 vec_rhs1, vec_rhs2);
12774 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12775 }
12776 else
12777 {
12778 if (bitop1 == BIT_NOT_EXPR)
12779 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12780 else
12781 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12782 vec_rhs2);
12783 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12784 if (bitop2 != NOP_EXPR)
12785 {
12786 tree res = make_ssa_name (var: mask);
12787 if (bitop2 == BIT_NOT_EXPR)
12788 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12789 else
12790 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12791 new_temp);
12792 vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12793 }
12794 }
12795 if (slp_node)
12796 slp_node->push_vec_def (def: new_stmt);
12797 else
12798 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12799 }
12800
12801 if (!slp_node)
12802 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12803
12804 vec_oprnds0.release ();
12805 vec_oprnds1.release ();
12806
12807 return true;
12808}
12809
12810/* vectorizable_comparison.
12811
12812 Check if STMT_INFO is comparison expression that can be vectorized.
12813 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12814 comparison, put it in VEC_STMT, and insert it at GSI.
12815
12816 Return true if STMT_INFO is vectorizable in this way. */
12817
12818static bool
12819vectorizable_comparison (vec_info *vinfo,
12820 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12821 gimple **vec_stmt,
12822 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12823{
12824 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12825
12826 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12827 return false;
12828
12829 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12830 return false;
12831
12832 gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
12833 if (!stmt)
12834 return false;
12835
12836 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
12837 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12838 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12839 vec_stmt, slp_node, cost_vec))
12840 return false;
12841
12842 if (!vec_stmt)
12843 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12844
12845 return true;
12846}
12847
12848/* Check to see if the current early break given in STMT_INFO is valid for
12849 vectorization. */
12850
12851static bool
12852vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
12853 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12854 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12855{
12856 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12857 if (!loop_vinfo
12858 || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12859 return false;
12860
12861 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12862 return false;
12863
12864 if (!STMT_VINFO_RELEVANT_P (stmt_info))
12865 return false;
12866
12867 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12868
12869 auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12870
12871 tree vectype = NULL_TREE;
12872 slp_tree slp_op0;
12873 tree op0;
12874 enum vect_def_type dt0;
12875 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
12876 &vectype))
12877 {
12878 if (dump_enabled_p ())
12879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12880 "use not simple.\n");
12881 return false;
12882 }
12883
12884 if (!vectype)
12885 return false;
12886
12887 machine_mode mode = TYPE_MODE (vectype);
12888 int ncopies;
12889
12890 if (slp_node)
12891 ncopies = 1;
12892 else
12893 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12894
12895 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12896 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12897
12898 /* Now build the new conditional. Pattern gimple_conds get dropped during
12899 codegen so we must replace the original insn. */
12900 gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12901 gcond *cond_stmt = as_a <gcond *>(p: orig_stmt);
12902 /* When vectorizing we assume that if the branch edge is taken that we're
12903 exiting the loop. This is not however always the case as the compiler will
12904 rewrite conditions to always be a comparison against 0. To do this it
12905 sometimes flips the edges. This is fine for scalar, but for vector we
12906 then have to flip the test, as we're still assuming that if you take the
12907 branch edge that we found the exit condition. i.e. we need to know whether
12908 we are generating a `forall` or an `exist` condition. */
12909 auto new_code = NE_EXPR;
12910 auto reduc_optab = ior_optab;
12911 auto reduc_op = BIT_IOR_EXPR;
12912 tree cst = build_zero_cst (vectype);
12913 edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
12914 if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12915 exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
12916 gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12917 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12918 exit_true_edge->dest))
12919 {
12920 new_code = EQ_EXPR;
12921 reduc_optab = and_optab;
12922 reduc_op = BIT_AND_EXPR;
12923 cst = build_minus_one_cst (vectype);
12924 }
12925
12926 /* Analyze only. */
12927 if (!vec_stmt)
12928 {
12929 if (direct_optab_handler (op: cbranch_optab, mode) == CODE_FOR_nothing)
12930 {
12931 if (dump_enabled_p ())
12932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12933 "can't vectorize early exit because the "
12934 "target doesn't support flag setting vector "
12935 "comparisons.\n");
12936 return false;
12937 }
12938
12939 if (ncopies > 1
12940 && direct_optab_handler (op: reduc_optab, mode) == CODE_FOR_nothing)
12941 {
12942 if (dump_enabled_p ())
12943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12944 "can't vectorize early exit because the "
12945 "target does not support boolean vector %s "
12946 "for type %T.\n",
12947 reduc_optab == ior_optab ? "OR" : "AND",
12948 vectype);
12949 return false;
12950 }
12951
12952 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12953 vec_stmt, slp_node, cost_vec))
12954 return false;
12955
12956 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12957 {
12958 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
12959 OPTIMIZE_FOR_SPEED))
12960 return false;
12961 else
12962 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
12963 }
12964
12965
12966 return true;
12967 }
12968
12969 /* Tranform. */
12970
12971 tree new_temp = NULL_TREE;
12972 gimple *new_stmt = NULL;
12973
12974 if (dump_enabled_p ())
12975 dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
12976
12977 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12978 vec_stmt, slp_node, cost_vec))
12979 gcc_unreachable ();
12980
12981 gimple *stmt = STMT_VINFO_STMT (stmt_info);
12982 basic_block cond_bb = gimple_bb (g: stmt);
12983 gimple_stmt_iterator cond_gsi = gsi_last_bb (bb: cond_bb);
12984
12985 auto_vec<tree> stmts;
12986
12987 if (slp_node)
12988 stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
12989 else
12990 {
12991 auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
12992 stmts.reserve_exact (nelems: vec_stmts.length ());
12993 for (auto stmt : vec_stmts)
12994 stmts.quick_push (obj: gimple_assign_lhs (gs: stmt));
12995 }
12996
12997 /* Determine if we need to reduce the final value. */
12998 if (stmts.length () > 1)
12999 {
13000 /* We build the reductions in a way to maintain as much parallelism as
13001 possible. */
13002 auto_vec<tree> workset (stmts.length ());
13003
13004 /* Mask the statements as we queue them up. Normally we loop over
13005 vec_num, but since we inspect the exact results of vectorization
13006 we don't need to and instead can just use the stmts themselves. */
13007 if (masked_loop_p)
13008 for (unsigned i = 0; i < stmts.length (); i++)
13009 {
13010 tree stmt_mask
13011 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
13012 i);
13013 stmt_mask
13014 = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), loop_mask: stmt_mask,
13015 vec_mask: stmts[i], gsi: &cond_gsi);
13016 workset.quick_push (obj: stmt_mask);
13017 }
13018 else
13019 workset.splice (src: stmts);
13020
13021 while (workset.length () > 1)
13022 {
13023 new_temp = make_temp_ssa_name (type: vectype, NULL, name: "vexit_reduc");
13024 tree arg0 = workset.pop ();
13025 tree arg1 = workset.pop ();
13026 new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
13027 vect_finish_stmt_generation (vinfo: loop_vinfo, stmt_info, vec_stmt: new_stmt,
13028 gsi: &cond_gsi);
13029 workset.quick_insert (ix: 0, obj: new_temp);
13030 }
13031 }
13032 else
13033 {
13034 new_temp = stmts[0];
13035 if (masked_loop_p)
13036 {
13037 tree mask
13038 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
13039 new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
13040 vec_mask: new_temp, gsi: &cond_gsi);
13041 }
13042 }
13043
13044 gcc_assert (new_temp);
13045
13046 gimple_cond_set_condition (stmt: cond_stmt, code: new_code, lhs: new_temp, rhs: cst);
13047 update_stmt (s: orig_stmt);
13048
13049 if (slp_node)
13050 SLP_TREE_VEC_DEFS (slp_node).truncate (size: 0);
13051 else
13052 STMT_VINFO_VEC_STMTS (stmt_info).truncate (size: 0);
13053
13054 if (!slp_node)
13055 *vec_stmt = orig_stmt;
13056
13057 return true;
13058}
13059
13060/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13061 can handle all live statements in the node. Otherwise return true
13062 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13063 VEC_STMT_P is as for vectorizable_live_operation. */
13064
13065static bool
13066can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13067 slp_tree slp_node, slp_instance slp_node_instance,
13068 bool vec_stmt_p,
13069 stmt_vector_for_cost *cost_vec)
13070{
13071 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
13072 if (slp_node)
13073 {
13074 stmt_vec_info slp_stmt_info;
13075 unsigned int i;
13076 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13077 {
13078 if ((STMT_VINFO_LIVE_P (slp_stmt_info)
13079 || (loop_vinfo
13080 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13081 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
13082 == vect_induction_def))
13083 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13084 slp_node_instance, i,
13085 vec_stmt_p, cost_vec))
13086 return false;
13087 }
13088 }
13089 else if ((STMT_VINFO_LIVE_P (stmt_info)
13090 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13091 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13092 && !vectorizable_live_operation (vinfo, stmt_info,
13093 slp_node, slp_node_instance, -1,
13094 vec_stmt_p, cost_vec))
13095 return false;
13096
13097 return true;
13098}
13099
13100/* Make sure the statement is vectorizable. */
13101
13102opt_result
13103vect_analyze_stmt (vec_info *vinfo,
13104 stmt_vec_info stmt_info, bool *need_to_vectorize,
13105 slp_tree node, slp_instance node_instance,
13106 stmt_vector_for_cost *cost_vec)
13107{
13108 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
13109 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13110 bool ok;
13111 gimple_seq pattern_def_seq;
13112
13113 if (dump_enabled_p ())
13114 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13115 stmt_info->stmt);
13116
13117 if (gimple_has_volatile_ops (stmt: stmt_info->stmt))
13118 return opt_result::failure_at (loc: stmt_info->stmt,
13119 fmt: "not vectorized:"
13120 " stmt has volatile operands: %G\n",
13121 stmt_info->stmt);
13122
13123 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13124 && node == NULL
13125 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13126 {
13127 gimple_stmt_iterator si;
13128
13129 for (si = gsi_start (seq&: pattern_def_seq); !gsi_end_p (i: si); gsi_next (i: &si))
13130 {
13131 stmt_vec_info pattern_def_stmt_info
13132 = vinfo->lookup_stmt (gsi_stmt (i: si));
13133 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13134 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13135 {
13136 /* Analyze def stmt of STMT if it's a pattern stmt. */
13137 if (dump_enabled_p ())
13138 dump_printf_loc (MSG_NOTE, vect_location,
13139 "==> examining pattern def statement: %G",
13140 pattern_def_stmt_info->stmt);
13141
13142 opt_result res
13143 = vect_analyze_stmt (vinfo, stmt_info: pattern_def_stmt_info,
13144 need_to_vectorize, node, node_instance,
13145 cost_vec);
13146 if (!res)
13147 return res;
13148 }
13149 }
13150 }
13151
13152 /* Skip stmts that do not need to be vectorized. In loops this is expected
13153 to include:
13154 - the COND_EXPR which is the loop exit condition
13155 - any LABEL_EXPRs in the loop
13156 - computations that are used only for array indexing or loop control.
13157 In basic blocks we only analyze statements that are a part of some SLP
13158 instance, therefore, all the statements are relevant.
13159
13160 Pattern statement needs to be analyzed instead of the original statement
13161 if the original statement is not relevant. Otherwise, we analyze both
13162 statements. In basic blocks we are called from some SLP instance
13163 traversal, don't analyze pattern stmts instead, the pattern stmts
13164 already will be part of SLP instance. */
13165
13166 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13167 if (!STMT_VINFO_RELEVANT_P (stmt_info)
13168 && !STMT_VINFO_LIVE_P (stmt_info))
13169 {
13170 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13171 && pattern_stmt_info
13172 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13173 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13174 {
13175 /* Analyze PATTERN_STMT instead of the original stmt. */
13176 stmt_info = pattern_stmt_info;
13177 if (dump_enabled_p ())
13178 dump_printf_loc (MSG_NOTE, vect_location,
13179 "==> examining pattern statement: %G",
13180 stmt_info->stmt);
13181 }
13182 else
13183 {
13184 if (dump_enabled_p ())
13185 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13186
13187 return opt_result::success ();
13188 }
13189 }
13190 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13191 && node == NULL
13192 && pattern_stmt_info
13193 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13194 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13195 {
13196 /* Analyze PATTERN_STMT too. */
13197 if (dump_enabled_p ())
13198 dump_printf_loc (MSG_NOTE, vect_location,
13199 "==> examining pattern statement: %G",
13200 pattern_stmt_info->stmt);
13201
13202 opt_result res
13203 = vect_analyze_stmt (vinfo, stmt_info: pattern_stmt_info, need_to_vectorize, node,
13204 node_instance, cost_vec);
13205 if (!res)
13206 return res;
13207 }
13208
13209 switch (STMT_VINFO_DEF_TYPE (stmt_info))
13210 {
13211 case vect_internal_def:
13212 case vect_condition_def:
13213 break;
13214
13215 case vect_reduction_def:
13216 case vect_nested_cycle:
13217 gcc_assert (!bb_vinfo
13218 && (relevance == vect_used_in_outer
13219 || relevance == vect_used_in_outer_by_reduction
13220 || relevance == vect_used_by_reduction
13221 || relevance == vect_unused_in_scope
13222 || relevance == vect_used_only_live));
13223 break;
13224
13225 case vect_induction_def:
13226 case vect_first_order_recurrence:
13227 gcc_assert (!bb_vinfo);
13228 break;
13229
13230 case vect_constant_def:
13231 case vect_external_def:
13232 case vect_unknown_def_type:
13233 default:
13234 gcc_unreachable ();
13235 }
13236
13237 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13238 if (node)
13239 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13240
13241 if (STMT_VINFO_RELEVANT_P (stmt_info))
13242 {
13243 gcall *call = dyn_cast <gcall *> (p: stmt_info->stmt);
13244 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13245 || gimple_code (stmt_info->stmt) == GIMPLE_COND
13246 || (call && gimple_call_lhs (call) == NULL_TREE));
13247 *need_to_vectorize = true;
13248 }
13249
13250 if (PURE_SLP_STMT (stmt_info) && !node)
13251 {
13252 if (dump_enabled_p ())
13253 dump_printf_loc (MSG_NOTE, vect_location,
13254 "handled only by SLP analysis\n");
13255 return opt_result::success ();
13256 }
13257
13258 ok = true;
13259 if (!bb_vinfo
13260 && (STMT_VINFO_RELEVANT_P (stmt_info)
13261 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13262 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13263 -mveclibabi= takes preference over library functions with
13264 the simd attribute. */
13265 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13266 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, slp_node: node,
13267 cost_vec)
13268 || vectorizable_conversion (vinfo, stmt_info,
13269 NULL, NULL, slp_node: node, cost_vec)
13270 || vectorizable_operation (vinfo, stmt_info,
13271 NULL, NULL, slp_node: node, cost_vec)
13272 || vectorizable_assignment (vinfo, stmt_info,
13273 NULL, NULL, slp_node: node, cost_vec)
13274 || vectorizable_load (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13275 || vectorizable_store (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13276 || vectorizable_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13277 node, node_instance, cost_vec)
13278 || vectorizable_induction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13279 NULL, node, cost_vec)
13280 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13281 || vectorizable_condition (vinfo, stmt_info,
13282 NULL, NULL, slp_node: node, cost_vec)
13283 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
13284 cost_vec)
13285 || vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13286 stmt_info, NULL, node)
13287 || vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13288 stmt_info, NULL, node, cost_vec)
13289 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, slp_node: node,
13290 cost_vec));
13291 else
13292 {
13293 if (bb_vinfo)
13294 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13295 || vectorizable_simd_clone_call (vinfo, stmt_info,
13296 NULL, NULL, slp_node: node, cost_vec)
13297 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, slp_node: node,
13298 cost_vec)
13299 || vectorizable_shift (vinfo, stmt_info,
13300 NULL, NULL, slp_node: node, cost_vec)
13301 || vectorizable_operation (vinfo, stmt_info,
13302 NULL, NULL, slp_node: node, cost_vec)
13303 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, slp_node: node,
13304 cost_vec)
13305 || vectorizable_load (vinfo, stmt_info,
13306 NULL, NULL, slp_node: node, cost_vec)
13307 || vectorizable_store (vinfo, stmt_info,
13308 NULL, NULL, slp_node: node, cost_vec)
13309 || vectorizable_condition (vinfo, stmt_info,
13310 NULL, NULL, slp_node: node, cost_vec)
13311 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
13312 cost_vec)
13313 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13314 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, slp_node: node,
13315 cost_vec));
13316
13317 }
13318
13319 if (node)
13320 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13321
13322 if (!ok)
13323 return opt_result::failure_at (loc: stmt_info->stmt,
13324 fmt: "not vectorized:"
13325 " relevant stmt not supported: %G",
13326 stmt_info->stmt);
13327
13328 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13329 need extra handling, except for vectorizable reductions. */
13330 if (!bb_vinfo
13331 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13332 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13333 && !can_vectorize_live_stmts (vinfo: as_a <loop_vec_info> (p: vinfo),
13334 stmt_info, slp_node: node, slp_node_instance: node_instance,
13335 vec_stmt_p: false, cost_vec))
13336 return opt_result::failure_at (loc: stmt_info->stmt,
13337 fmt: "not vectorized:"
13338 " live stmt not supported: %G",
13339 stmt_info->stmt);
13340
13341 return opt_result::success ();
13342}
13343
13344
13345/* Function vect_transform_stmt.
13346
13347 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13348
13349bool
13350vect_transform_stmt (vec_info *vinfo,
13351 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13352 slp_tree slp_node, slp_instance slp_node_instance)
13353{
13354 bool is_store = false;
13355 gimple *vec_stmt = NULL;
13356 bool done;
13357
13358 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13359
13360 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13361 if (slp_node)
13362 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13363
13364 switch (STMT_VINFO_TYPE (stmt_info))
13365 {
13366 case type_demotion_vec_info_type:
13367 case type_promotion_vec_info_type:
13368 case type_conversion_vec_info_type:
13369 done = vectorizable_conversion (vinfo, stmt_info,
13370 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13371 gcc_assert (done);
13372 break;
13373
13374 case induc_vec_info_type:
13375 done = vectorizable_induction (as_a <loop_vec_info> (p: vinfo),
13376 stmt_info, &vec_stmt, slp_node,
13377 NULL);
13378 gcc_assert (done);
13379 break;
13380
13381 case shift_vec_info_type:
13382 done = vectorizable_shift (vinfo, stmt_info,
13383 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13384 gcc_assert (done);
13385 break;
13386
13387 case op_vec_info_type:
13388 done = vectorizable_operation (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13389 NULL);
13390 gcc_assert (done);
13391 break;
13392
13393 case assignment_vec_info_type:
13394 done = vectorizable_assignment (vinfo, stmt_info,
13395 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13396 gcc_assert (done);
13397 break;
13398
13399 case load_vec_info_type:
13400 done = vectorizable_load (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13401 NULL);
13402 gcc_assert (done);
13403 break;
13404
13405 case store_vec_info_type:
13406 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13407 && !slp_node
13408 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13409 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13410 /* In case of interleaving, the whole chain is vectorized when the
13411 last store in the chain is reached. Store stmts before the last
13412 one are skipped, and there vec_stmt_info shouldn't be freed
13413 meanwhile. */
13414 ;
13415 else
13416 {
13417 done = vectorizable_store (vinfo, stmt_info,
13418 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13419 gcc_assert (done);
13420 is_store = true;
13421 }
13422 break;
13423
13424 case condition_vec_info_type:
13425 done = vectorizable_condition (vinfo, stmt_info,
13426 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13427 gcc_assert (done);
13428 break;
13429
13430 case comparison_vec_info_type:
13431 done = vectorizable_comparison (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13432 slp_node, NULL);
13433 gcc_assert (done);
13434 break;
13435
13436 case call_vec_info_type:
13437 done = vectorizable_call (vinfo, stmt_info,
13438 gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13439 break;
13440
13441 case call_simd_clone_vec_info_type:
13442 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13443 slp_node, NULL);
13444 break;
13445
13446 case reduc_vec_info_type:
13447 done = vect_transform_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13448 gsi, &vec_stmt, slp_node);
13449 gcc_assert (done);
13450 break;
13451
13452 case cycle_phi_info_type:
13453 done = vect_transform_cycle_phi (as_a <loop_vec_info> (p: vinfo), stmt_info,
13454 &vec_stmt, slp_node, slp_node_instance);
13455 gcc_assert (done);
13456 break;
13457
13458 case lc_phi_info_type:
13459 done = vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13460 stmt_info, &vec_stmt, slp_node);
13461 gcc_assert (done);
13462 break;
13463
13464 case recurr_info_type:
13465 done = vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13466 stmt_info, &vec_stmt, slp_node, NULL);
13467 gcc_assert (done);
13468 break;
13469
13470 case phi_info_type:
13471 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13472 gcc_assert (done);
13473 break;
13474
13475 case loop_exit_ctrl_vec_info_type:
13476 done = vectorizable_early_exit (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13477 slp_node, NULL);
13478 gcc_assert (done);
13479 break;
13480
13481 default:
13482 if (!STMT_VINFO_LIVE_P (stmt_info))
13483 {
13484 if (dump_enabled_p ())
13485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13486 "stmt not supported.\n");
13487 gcc_unreachable ();
13488 }
13489 done = true;
13490 }
13491
13492 if (!slp_node && vec_stmt)
13493 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13494
13495 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13496 {
13497 /* Handle stmts whose DEF is used outside the loop-nest that is
13498 being vectorized. */
13499 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13500 slp_node_instance, vec_stmt_p: true, NULL);
13501 gcc_assert (done);
13502 }
13503
13504 if (slp_node)
13505 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13506
13507 return is_store;
13508}
13509
13510
13511/* Remove a group of stores (for SLP or interleaving), free their
13512 stmt_vec_info. */
13513
13514void
13515vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13516{
13517 stmt_vec_info next_stmt_info = first_stmt_info;
13518
13519 while (next_stmt_info)
13520 {
13521 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13522 next_stmt_info = vect_orig_stmt (stmt_info: next_stmt_info);
13523 /* Free the attached stmt_vec_info and remove the stmt. */
13524 vinfo->remove_stmt (next_stmt_info);
13525 next_stmt_info = tmp;
13526 }
13527}
13528
13529/* If NUNITS is nonzero, return a vector type that contains NUNITS
13530 elements of type SCALAR_TYPE, or null if the target doesn't support
13531 such a type.
13532
13533 If NUNITS is zero, return a vector type that contains elements of
13534 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13535
13536 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13537 for this vectorization region and want to "autodetect" the best choice.
13538 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13539 and we want the new type to be interoperable with it. PREVAILING_MODE
13540 in this case can be a scalar integer mode or a vector mode; when it
13541 is a vector mode, the function acts like a tree-level version of
13542 related_vector_mode. */
13543
13544tree
13545get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13546 tree scalar_type, poly_uint64 nunits)
13547{
13548 tree orig_scalar_type = scalar_type;
13549 scalar_mode inner_mode;
13550 machine_mode simd_mode;
13551 tree vectype;
13552
13553 if ((!INTEGRAL_TYPE_P (scalar_type)
13554 && !POINTER_TYPE_P (scalar_type)
13555 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13556 || (!is_int_mode (TYPE_MODE (scalar_type), int_mode: &inner_mode)
13557 && !is_float_mode (TYPE_MODE (scalar_type), float_mode: &inner_mode)))
13558 return NULL_TREE;
13559
13560 unsigned int nbytes = GET_MODE_SIZE (mode: inner_mode);
13561
13562 /* Interoperability between modes requires one to be a constant multiple
13563 of the other, so that the number of vectors required for each operation
13564 is a compile-time constant. */
13565 if (prevailing_mode != VOIDmode
13566 && !constant_multiple_p (a: nunits * nbytes,
13567 b: GET_MODE_SIZE (mode: prevailing_mode))
13568 && !constant_multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode),
13569 b: nunits * nbytes))
13570 return NULL_TREE;
13571
13572 /* For vector types of elements whose mode precision doesn't
13573 match their types precision we use a element type of mode
13574 precision. The vectorization routines will have to make sure
13575 they support the proper result truncation/extension.
13576 We also make sure to build vector types with INTEGER_TYPE
13577 component type only. */
13578 if (INTEGRAL_TYPE_P (scalar_type)
13579 && (GET_MODE_BITSIZE (mode: inner_mode) != TYPE_PRECISION (scalar_type)
13580 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13581 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: inner_mode),
13582 TYPE_UNSIGNED (scalar_type));
13583
13584 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13585 When the component mode passes the above test simply use a type
13586 corresponding to that mode. The theory is that any use that
13587 would cause problems with this will disable vectorization anyway. */
13588 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13589 && !INTEGRAL_TYPE_P (scalar_type))
13590 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13591
13592 /* We can't build a vector type of elements with alignment bigger than
13593 their size. */
13594 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13595 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13596 TYPE_UNSIGNED (scalar_type));
13597
13598 /* If we felt back to using the mode fail if there was
13599 no scalar type for it. */
13600 if (scalar_type == NULL_TREE)
13601 return NULL_TREE;
13602
13603 /* If no prevailing mode was supplied, use the mode the target prefers.
13604 Otherwise lookup a vector mode based on the prevailing mode. */
13605 if (prevailing_mode == VOIDmode)
13606 {
13607 gcc_assert (known_eq (nunits, 0U));
13608 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13609 if (SCALAR_INT_MODE_P (simd_mode))
13610 {
13611 /* Traditional behavior is not to take the integer mode
13612 literally, but simply to use it as a way of determining
13613 the vector size. It is up to mode_for_vector to decide
13614 what the TYPE_MODE should be.
13615
13616 Note that nunits == 1 is allowed in order to support single
13617 element vector types. */
13618 if (!multiple_p (a: GET_MODE_SIZE (mode: simd_mode), b: nbytes, multiple: &nunits)
13619 || !mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13620 return NULL_TREE;
13621 }
13622 }
13623 else if (SCALAR_INT_MODE_P (prevailing_mode)
13624 || !related_vector_mode (prevailing_mode,
13625 inner_mode, nunits).exists (mode: &simd_mode))
13626 {
13627 /* Fall back to using mode_for_vector, mostly in the hope of being
13628 able to use an integer mode. */
13629 if (known_eq (nunits, 0U)
13630 && !multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode), b: nbytes, multiple: &nunits))
13631 return NULL_TREE;
13632
13633 if (!mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13634 return NULL_TREE;
13635 }
13636
13637 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13638
13639 /* In cases where the mode was chosen by mode_for_vector, check that
13640 the target actually supports the chosen mode, or that it at least
13641 allows the vector mode to be replaced by a like-sized integer. */
13642 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13643 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13644 return NULL_TREE;
13645
13646 /* Re-attach the address-space qualifier if we canonicalized the scalar
13647 type. */
13648 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13649 return build_qualified_type
13650 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13651
13652 return vectype;
13653}
13654
13655/* Function get_vectype_for_scalar_type.
13656
13657 Returns the vector type corresponding to SCALAR_TYPE as supported
13658 by the target. If GROUP_SIZE is nonzero and we're performing BB
13659 vectorization, make sure that the number of elements in the vector
13660 is no bigger than GROUP_SIZE. */
13661
13662tree
13663get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13664 unsigned int group_size)
13665{
13666 /* For BB vectorization, we should always have a group size once we've
13667 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13668 are tentative requests during things like early data reference
13669 analysis and pattern recognition. */
13670 if (is_a <bb_vec_info> (p: vinfo))
13671 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13672 else
13673 group_size = 0;
13674
13675 tree vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13676 scalar_type);
13677 if (vectype && vinfo->vector_mode == VOIDmode)
13678 vinfo->vector_mode = TYPE_MODE (vectype);
13679
13680 /* Register the natural choice of vector type, before the group size
13681 has been applied. */
13682 if (vectype)
13683 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13684
13685 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13686 try again with an explicit number of elements. */
13687 if (vectype
13688 && group_size
13689 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13690 {
13691 /* Start with the biggest number of units that fits within
13692 GROUP_SIZE and halve it until we find a valid vector type.
13693 Usually either the first attempt will succeed or all will
13694 fail (in the latter case because GROUP_SIZE is too small
13695 for the target), but it's possible that a target could have
13696 a hole between supported vector types.
13697
13698 If GROUP_SIZE is not a power of 2, this has the effect of
13699 trying the largest power of 2 that fits within the group,
13700 even though the group is not a multiple of that vector size.
13701 The BB vectorizer will then try to carve up the group into
13702 smaller pieces. */
13703 unsigned int nunits = 1 << floor_log2 (x: group_size);
13704 do
13705 {
13706 vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13707 scalar_type, nunits);
13708 nunits /= 2;
13709 }
13710 while (nunits > 1 && !vectype);
13711 }
13712
13713 return vectype;
13714}
13715
13716/* Return the vector type corresponding to SCALAR_TYPE as supported
13717 by the target. NODE, if nonnull, is the SLP tree node that will
13718 use the returned vector type. */
13719
13720tree
13721get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13722{
13723 unsigned int group_size = 0;
13724 if (node)
13725 group_size = SLP_TREE_LANES (node);
13726 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13727}
13728
13729/* Function get_mask_type_for_scalar_type.
13730
13731 Returns the mask type corresponding to a result of comparison
13732 of vectors of specified SCALAR_TYPE as supported by target.
13733 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13734 make sure that the number of elements in the vector is no bigger
13735 than GROUP_SIZE. */
13736
13737tree
13738get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13739 unsigned int group_size)
13740{
13741 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13742
13743 if (!vectype)
13744 return NULL;
13745
13746 return truth_type_for (vectype);
13747}
13748
13749/* Function get_mask_type_for_scalar_type.
13750
13751 Returns the mask type corresponding to a result of comparison
13752 of vectors of specified SCALAR_TYPE as supported by target.
13753 NODE, if nonnull, is the SLP tree node that will use the returned
13754 vector type. */
13755
13756tree
13757get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13758 slp_tree node)
13759{
13760 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13761
13762 if (!vectype)
13763 return NULL;
13764
13765 return truth_type_for (vectype);
13766}
13767
13768/* Function get_same_sized_vectype
13769
13770 Returns a vector type corresponding to SCALAR_TYPE of size
13771 VECTOR_TYPE if supported by the target. */
13772
13773tree
13774get_same_sized_vectype (tree scalar_type, tree vector_type)
13775{
13776 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13777 return truth_type_for (vector_type);
13778
13779 poly_uint64 nunits;
13780 if (!multiple_p (a: GET_MODE_SIZE (TYPE_MODE (vector_type)),
13781 b: GET_MODE_SIZE (TYPE_MODE (scalar_type)), multiple: &nunits))
13782 return NULL_TREE;
13783
13784 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13785 scalar_type, nunits);
13786}
13787
13788/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13789 would not change the chosen vector modes. */
13790
13791bool
13792vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13793{
13794 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13795 i != vinfo->used_vector_modes.end (); ++i)
13796 if (!VECTOR_MODE_P (*i)
13797 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13798 return false;
13799 return true;
13800}
13801
13802/* Function vect_is_simple_use.
13803
13804 Input:
13805 VINFO - the vect info of the loop or basic block that is being vectorized.
13806 OPERAND - operand in the loop or bb.
13807 Output:
13808 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13809 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13810 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13811 the definition could be anywhere in the function
13812 DT - the type of definition
13813
13814 Returns whether a stmt with OPERAND can be vectorized.
13815 For loops, supportable operands are constants, loop invariants, and operands
13816 that are defined by the current iteration of the loop. Unsupportable
13817 operands are those that are defined by a previous iteration of the loop (as
13818 is the case in reduction/induction computations).
13819 For basic blocks, supportable operands are constants and bb invariants.
13820 For now, operands defined outside the basic block are not supported. */
13821
13822bool
13823vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13824 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13825{
13826 if (def_stmt_info_out)
13827 *def_stmt_info_out = NULL;
13828 if (def_stmt_out)
13829 *def_stmt_out = NULL;
13830 *dt = vect_unknown_def_type;
13831
13832 if (dump_enabled_p ())
13833 {
13834 dump_printf_loc (MSG_NOTE, vect_location,
13835 "vect_is_simple_use: operand ");
13836 if (TREE_CODE (operand) == SSA_NAME
13837 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13838 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13839 else
13840 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13841 }
13842
13843 if (CONSTANT_CLASS_P (operand))
13844 *dt = vect_constant_def;
13845 else if (is_gimple_min_invariant (operand))
13846 *dt = vect_external_def;
13847 else if (TREE_CODE (operand) != SSA_NAME)
13848 *dt = vect_unknown_def_type;
13849 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13850 *dt = vect_external_def;
13851 else
13852 {
13853 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13854 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13855 if (!stmt_vinfo)
13856 *dt = vect_external_def;
13857 else
13858 {
13859 stmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
13860 def_stmt = stmt_vinfo->stmt;
13861 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13862 if (def_stmt_info_out)
13863 *def_stmt_info_out = stmt_vinfo;
13864 }
13865 if (def_stmt_out)
13866 *def_stmt_out = def_stmt;
13867 }
13868
13869 if (dump_enabled_p ())
13870 {
13871 dump_printf (MSG_NOTE, ", type of def: ");
13872 switch (*dt)
13873 {
13874 case vect_uninitialized_def:
13875 dump_printf (MSG_NOTE, "uninitialized\n");
13876 break;
13877 case vect_constant_def:
13878 dump_printf (MSG_NOTE, "constant\n");
13879 break;
13880 case vect_external_def:
13881 dump_printf (MSG_NOTE, "external\n");
13882 break;
13883 case vect_internal_def:
13884 dump_printf (MSG_NOTE, "internal\n");
13885 break;
13886 case vect_induction_def:
13887 dump_printf (MSG_NOTE, "induction\n");
13888 break;
13889 case vect_reduction_def:
13890 dump_printf (MSG_NOTE, "reduction\n");
13891 break;
13892 case vect_double_reduction_def:
13893 dump_printf (MSG_NOTE, "double reduction\n");
13894 break;
13895 case vect_nested_cycle:
13896 dump_printf (MSG_NOTE, "nested cycle\n");
13897 break;
13898 case vect_first_order_recurrence:
13899 dump_printf (MSG_NOTE, "first order recurrence\n");
13900 break;
13901 case vect_condition_def:
13902 dump_printf (MSG_NOTE, "control flow\n");
13903 break;
13904 case vect_unknown_def_type:
13905 dump_printf (MSG_NOTE, "unknown\n");
13906 break;
13907 }
13908 }
13909
13910 if (*dt == vect_unknown_def_type)
13911 {
13912 if (dump_enabled_p ())
13913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13914 "Unsupported pattern.\n");
13915 return false;
13916 }
13917
13918 return true;
13919}
13920
13921/* Function vect_is_simple_use.
13922
13923 Same as vect_is_simple_use but also determines the vector operand
13924 type of OPERAND and stores it to *VECTYPE. If the definition of
13925 OPERAND is vect_uninitialized_def, vect_constant_def or
13926 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13927 is responsible to compute the best suited vector type for the
13928 scalar operand. */
13929
13930bool
13931vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13932 tree *vectype, stmt_vec_info *def_stmt_info_out,
13933 gimple **def_stmt_out)
13934{
13935 stmt_vec_info def_stmt_info;
13936 gimple *def_stmt;
13937 if (!vect_is_simple_use (operand, vinfo, dt, def_stmt_info_out: &def_stmt_info, def_stmt_out: &def_stmt))
13938 return false;
13939
13940 if (def_stmt_out)
13941 *def_stmt_out = def_stmt;
13942 if (def_stmt_info_out)
13943 *def_stmt_info_out = def_stmt_info;
13944
13945 /* Now get a vector type if the def is internal, otherwise supply
13946 NULL_TREE and leave it up to the caller to figure out a proper
13947 type for the use stmt. */
13948 if (*dt == vect_internal_def
13949 || *dt == vect_induction_def
13950 || *dt == vect_reduction_def
13951 || *dt == vect_double_reduction_def
13952 || *dt == vect_nested_cycle
13953 || *dt == vect_first_order_recurrence)
13954 {
13955 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13956 gcc_assert (*vectype != NULL_TREE);
13957 if (dump_enabled_p ())
13958 dump_printf_loc (MSG_NOTE, vect_location,
13959 "vect_is_simple_use: vectype %T\n", *vectype);
13960 }
13961 else if (*dt == vect_uninitialized_def
13962 || *dt == vect_constant_def
13963 || *dt == vect_external_def)
13964 *vectype = NULL_TREE;
13965 else
13966 gcc_unreachable ();
13967
13968 return true;
13969}
13970
13971/* Function vect_is_simple_use.
13972
13973 Same as vect_is_simple_use but determines the operand by operand
13974 position OPERAND from either STMT or SLP_NODE, filling in *OP
13975 and *SLP_DEF (when SLP_NODE is not NULL). */
13976
13977bool
13978vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13979 unsigned operand, tree *op, slp_tree *slp_def,
13980 enum vect_def_type *dt,
13981 tree *vectype, stmt_vec_info *def_stmt_info_out)
13982{
13983 if (slp_node)
13984 {
13985 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13986 *slp_def = child;
13987 *vectype = SLP_TREE_VECTYPE (child);
13988 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13989 {
13990 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13991 return vect_is_simple_use (operand: *op, vinfo, dt, def_stmt_info_out);
13992 }
13993 else
13994 {
13995 if (def_stmt_info_out)
13996 *def_stmt_info_out = NULL;
13997 *op = SLP_TREE_SCALAR_OPS (child)[0];
13998 *dt = SLP_TREE_DEF_TYPE (child);
13999 return true;
14000 }
14001 }
14002 else
14003 {
14004 *slp_def = NULL;
14005 if (gassign *ass = dyn_cast <gassign *> (p: stmt->stmt))
14006 {
14007 if (gimple_assign_rhs_code (gs: ass) == COND_EXPR
14008 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
14009 {
14010 if (operand < 2)
14011 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
14012 else
14013 *op = gimple_op (gs: ass, i: operand);
14014 }
14015 else if (gimple_assign_rhs_code (gs: ass) == VIEW_CONVERT_EXPR)
14016 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
14017 else
14018 *op = gimple_op (gs: ass, i: operand + 1);
14019 }
14020 else if (gcond *cond = dyn_cast <gcond *> (p: stmt->stmt))
14021 *op = gimple_op (gs: cond, i: operand);
14022 else if (gcall *call = dyn_cast <gcall *> (p: stmt->stmt))
14023 *op = gimple_call_arg (gs: call, index: operand);
14024 else
14025 gcc_unreachable ();
14026 return vect_is_simple_use (operand: *op, vinfo, dt, vectype, def_stmt_info_out);
14027 }
14028}
14029
14030/* If OP is not NULL and is external or constant update its vector
14031 type with VECTYPE. Returns true if successful or false if not,
14032 for example when conflicting vector types are present. */
14033
14034bool
14035vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14036{
14037 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14038 return true;
14039 if (SLP_TREE_VECTYPE (op))
14040 return types_compatible_p (SLP_TREE_VECTYPE (op), type2: vectype);
14041 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14042 should be handled by patters. Allow vect_constant_def for now. */
14043 if (VECTOR_BOOLEAN_TYPE_P (vectype)
14044 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
14045 return false;
14046 SLP_TREE_VECTYPE (op) = vectype;
14047 return true;
14048}
14049
14050/* Function supportable_widening_operation
14051
14052 Check whether an operation represented by the code CODE is a
14053 widening operation that is supported by the target platform in
14054 vector form (i.e., when operating on arguments of type VECTYPE_IN
14055 producing a result of type VECTYPE_OUT).
14056
14057 Widening operations we currently support are NOP (CONVERT), FLOAT,
14058 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14059 are supported by the target platform either directly (via vector
14060 tree-codes), or via target builtins.
14061
14062 Output:
14063 - CODE1 and CODE2 are codes of vector operations to be used when
14064 vectorizing the operation, if available.
14065 - MULTI_STEP_CVT determines the number of required intermediate steps in
14066 case of multi-step conversion (like char->short->int - in that case
14067 MULTI_STEP_CVT will be 1).
14068 - INTERM_TYPES contains the intermediate type required to perform the
14069 widening operation (short in the above example). */
14070
14071bool
14072supportable_widening_operation (vec_info *vinfo,
14073 code_helper code,
14074 stmt_vec_info stmt_info,
14075 tree vectype_out, tree vectype_in,
14076 code_helper *code1,
14077 code_helper *code2,
14078 int *multi_step_cvt,
14079 vec<tree> *interm_types)
14080{
14081 loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
14082 class loop *vect_loop = NULL;
14083 machine_mode vec_mode;
14084 enum insn_code icode1, icode2;
14085 optab optab1 = unknown_optab, optab2 = unknown_optab;
14086 tree vectype = vectype_in;
14087 tree wide_vectype = vectype_out;
14088 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14089 int i;
14090 tree prev_type, intermediate_type;
14091 machine_mode intermediate_mode, prev_mode;
14092 optab optab3, optab4;
14093
14094 *multi_step_cvt = 0;
14095 if (loop_info)
14096 vect_loop = LOOP_VINFO_LOOP (loop_info);
14097
14098 switch (code.safe_as_tree_code ())
14099 {
14100 case MAX_TREE_CODES:
14101 /* Don't set c1 and c2 if code is not a tree_code. */
14102 break;
14103
14104 case WIDEN_MULT_EXPR:
14105 /* The result of a vectorized widening operation usually requires
14106 two vectors (because the widened results do not fit into one vector).
14107 The generated vector results would normally be expected to be
14108 generated in the same order as in the original scalar computation,
14109 i.e. if 8 results are generated in each vector iteration, they are
14110 to be organized as follows:
14111 vect1: [res1,res2,res3,res4],
14112 vect2: [res5,res6,res7,res8].
14113
14114 However, in the special case that the result of the widening
14115 operation is used in a reduction computation only, the order doesn't
14116 matter (because when vectorizing a reduction we change the order of
14117 the computation). Some targets can take advantage of this and
14118 generate more efficient code. For example, targets like Altivec,
14119 that support widen_mult using a sequence of {mult_even,mult_odd}
14120 generate the following vectors:
14121 vect1: [res1,res3,res5,res7],
14122 vect2: [res2,res4,res6,res8].
14123
14124 When vectorizing outer-loops, we execute the inner-loop sequentially
14125 (each vectorized inner-loop iteration contributes to VF outer-loop
14126 iterations in parallel). We therefore don't allow to change the
14127 order of the computation in the inner-loop during outer-loop
14128 vectorization. */
14129 /* TODO: Another case in which order doesn't *really* matter is when we
14130 widen and then contract again, e.g. (short)((int)x * y >> 8).
14131 Normally, pack_trunc performs an even/odd permute, whereas the
14132 repack from an even/odd expansion would be an interleave, which
14133 would be significantly simpler for e.g. AVX2. */
14134 /* In any case, in order to avoid duplicating the code below, recurse
14135 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14136 are properly set up for the caller. If we fail, we'll continue with
14137 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14138 if (vect_loop
14139 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
14140 && !nested_in_vect_loop_p (loop: vect_loop, stmt_info)
14141 && supportable_widening_operation (vinfo, code: VEC_WIDEN_MULT_EVEN_EXPR,
14142 stmt_info, vectype_out,
14143 vectype_in, code1,
14144 code2, multi_step_cvt,
14145 interm_types))
14146 {
14147 /* Elements in a vector with vect_used_by_reduction property cannot
14148 be reordered if the use chain with this property does not have the
14149 same operation. One such an example is s += a * b, where elements
14150 in a and b cannot be reordered. Here we check if the vector defined
14151 by STMT is only directly used in the reduction statement. */
14152 tree lhs = gimple_assign_lhs (gs: stmt_info->stmt);
14153 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14154 if (use_stmt_info
14155 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
14156 return true;
14157 }
14158 c1 = VEC_WIDEN_MULT_LO_EXPR;
14159 c2 = VEC_WIDEN_MULT_HI_EXPR;
14160 break;
14161
14162 case DOT_PROD_EXPR:
14163 c1 = DOT_PROD_EXPR;
14164 c2 = DOT_PROD_EXPR;
14165 break;
14166
14167 case SAD_EXPR:
14168 c1 = SAD_EXPR;
14169 c2 = SAD_EXPR;
14170 break;
14171
14172 case VEC_WIDEN_MULT_EVEN_EXPR:
14173 /* Support the recursion induced just above. */
14174 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14175 c2 = VEC_WIDEN_MULT_ODD_EXPR;
14176 break;
14177
14178 case WIDEN_LSHIFT_EXPR:
14179 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14180 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14181 break;
14182
14183 CASE_CONVERT:
14184 c1 = VEC_UNPACK_LO_EXPR;
14185 c2 = VEC_UNPACK_HI_EXPR;
14186 break;
14187
14188 case FLOAT_EXPR:
14189 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14190 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14191 break;
14192
14193 case FIX_TRUNC_EXPR:
14194 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14195 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14196 break;
14197
14198 default:
14199 gcc_unreachable ();
14200 }
14201
14202 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14203 std::swap (a&: c1, b&: c2);
14204
14205 if (code == FIX_TRUNC_EXPR)
14206 {
14207 /* The signedness is determined from output operand. */
14208 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14209 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14210 }
14211 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14212 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14213 && VECTOR_BOOLEAN_TYPE_P (vectype)
14214 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14215 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14216 {
14217 /* If the input and result modes are the same, a different optab
14218 is needed where we pass in the number of units in vectype. */
14219 optab1 = vec_unpacks_sbool_lo_optab;
14220 optab2 = vec_unpacks_sbool_hi_optab;
14221 }
14222
14223 vec_mode = TYPE_MODE (vectype);
14224 if (widening_fn_p (code))
14225 {
14226 /* If this is an internal fn then we must check whether the target
14227 supports either a low-high split or an even-odd split. */
14228 internal_fn ifn = as_internal_fn (code: (combined_fn) code);
14229
14230 internal_fn lo, hi, even, odd;
14231 lookup_hilo_internal_fn (ifn, &lo, &hi);
14232 *code1 = as_combined_fn (fn: lo);
14233 *code2 = as_combined_fn (fn: hi);
14234 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14235 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14236
14237 /* If we don't support low-high, then check for even-odd. */
14238 if (!optab1
14239 || (icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
14240 || !optab2
14241 || (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
14242 {
14243 lookup_evenodd_internal_fn (ifn, &even, &odd);
14244 *code1 = as_combined_fn (fn: even);
14245 *code2 = as_combined_fn (fn: odd);
14246 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14247 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14248 }
14249 }
14250 else if (code.is_tree_code ())
14251 {
14252 if (code == FIX_TRUNC_EXPR)
14253 {
14254 /* The signedness is determined from output operand. */
14255 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14256 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14257 }
14258 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14259 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14260 && VECTOR_BOOLEAN_TYPE_P (vectype)
14261 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14262 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14263 {
14264 /* If the input and result modes are the same, a different optab
14265 is needed where we pass in the number of units in vectype. */
14266 optab1 = vec_unpacks_sbool_lo_optab;
14267 optab2 = vec_unpacks_sbool_hi_optab;
14268 }
14269 else
14270 {
14271 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14272 optab2 = optab_for_tree_code (c2, vectype, optab_default);
14273 }
14274 *code1 = c1;
14275 *code2 = c2;
14276 }
14277
14278 if (!optab1 || !optab2)
14279 return false;
14280
14281 if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
14282 || (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
14283 return false;
14284
14285
14286 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14287 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14288 {
14289 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14290 return true;
14291 /* For scalar masks we may have different boolean
14292 vector types having the same QImode. Thus we
14293 add additional check for elements number. */
14294 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14295 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14296 return true;
14297 }
14298
14299 /* Check if it's a multi-step conversion that can be done using intermediate
14300 types. */
14301
14302 prev_type = vectype;
14303 prev_mode = vec_mode;
14304
14305 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14306 return false;
14307
14308 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14309 intermediate steps in promotion sequence. We try
14310 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14311 not. */
14312 interm_types->create (MAX_INTERM_CVT_STEPS);
14313 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14314 {
14315 intermediate_mode = insn_data[icode1].operand[0].mode;
14316 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14317 intermediate_type
14318 = vect_halve_mask_nunits (prev_type, intermediate_mode);
14319 else if (VECTOR_MODE_P (intermediate_mode))
14320 {
14321 tree intermediate_element_type
14322 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14323 TYPE_UNSIGNED (prev_type));
14324 intermediate_type
14325 = build_vector_type_for_mode (intermediate_element_type,
14326 intermediate_mode);
14327 }
14328 else
14329 intermediate_type
14330 = lang_hooks.types.type_for_mode (intermediate_mode,
14331 TYPE_UNSIGNED (prev_type));
14332
14333 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14334 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14335 && intermediate_mode == prev_mode
14336 && SCALAR_INT_MODE_P (prev_mode))
14337 {
14338 /* If the input and result modes are the same, a different optab
14339 is needed where we pass in the number of units in vectype. */
14340 optab3 = vec_unpacks_sbool_lo_optab;
14341 optab4 = vec_unpacks_sbool_hi_optab;
14342 }
14343 else
14344 {
14345 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14346 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14347 }
14348
14349 if (!optab3 || !optab4
14350 || (icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing
14351 || insn_data[icode1].operand[0].mode != intermediate_mode
14352 || (icode2 = optab_handler (op: optab2, mode: prev_mode)) == CODE_FOR_nothing
14353 || insn_data[icode2].operand[0].mode != intermediate_mode
14354 || ((icode1 = optab_handler (op: optab3, mode: intermediate_mode))
14355 == CODE_FOR_nothing)
14356 || ((icode2 = optab_handler (op: optab4, mode: intermediate_mode))
14357 == CODE_FOR_nothing))
14358 break;
14359
14360 interm_types->quick_push (obj: intermediate_type);
14361 (*multi_step_cvt)++;
14362
14363 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14364 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14365 {
14366 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14367 return true;
14368 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14369 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14370 return true;
14371 }
14372
14373 prev_type = intermediate_type;
14374 prev_mode = intermediate_mode;
14375 }
14376
14377 interm_types->release ();
14378 return false;
14379}
14380
14381
14382/* Function supportable_narrowing_operation
14383
14384 Check whether an operation represented by the code CODE is a
14385 narrowing operation that is supported by the target platform in
14386 vector form (i.e., when operating on arguments of type VECTYPE_IN
14387 and producing a result of type VECTYPE_OUT).
14388
14389 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14390 and FLOAT. This function checks if these operations are supported by
14391 the target platform directly via vector tree-codes.
14392
14393 Output:
14394 - CODE1 is the code of a vector operation to be used when
14395 vectorizing the operation, if available.
14396 - MULTI_STEP_CVT determines the number of required intermediate steps in
14397 case of multi-step conversion (like int->short->char - in that case
14398 MULTI_STEP_CVT will be 1).
14399 - INTERM_TYPES contains the intermediate type required to perform the
14400 narrowing operation (short in the above example). */
14401
14402bool
14403supportable_narrowing_operation (code_helper code,
14404 tree vectype_out, tree vectype_in,
14405 code_helper *code1, int *multi_step_cvt,
14406 vec<tree> *interm_types)
14407{
14408 machine_mode vec_mode;
14409 enum insn_code icode1;
14410 optab optab1, interm_optab;
14411 tree vectype = vectype_in;
14412 tree narrow_vectype = vectype_out;
14413 enum tree_code c1;
14414 tree intermediate_type, prev_type;
14415 machine_mode intermediate_mode, prev_mode;
14416 int i;
14417 unsigned HOST_WIDE_INT n_elts;
14418 bool uns;
14419
14420 if (!code.is_tree_code ())
14421 return false;
14422
14423 *multi_step_cvt = 0;
14424 switch ((tree_code) code)
14425 {
14426 CASE_CONVERT:
14427 c1 = VEC_PACK_TRUNC_EXPR;
14428 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14429 && VECTOR_BOOLEAN_TYPE_P (vectype)
14430 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14431 && TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &n_elts)
14432 && n_elts < BITS_PER_UNIT)
14433 optab1 = vec_pack_sbool_trunc_optab;
14434 else
14435 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14436 break;
14437
14438 case FIX_TRUNC_EXPR:
14439 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14440 /* The signedness is determined from output operand. */
14441 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14442 break;
14443
14444 case FLOAT_EXPR:
14445 c1 = VEC_PACK_FLOAT_EXPR;
14446 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14447 break;
14448
14449 default:
14450 gcc_unreachable ();
14451 }
14452
14453 if (!optab1)
14454 return false;
14455
14456 vec_mode = TYPE_MODE (vectype);
14457 if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing)
14458 return false;
14459
14460 *code1 = c1;
14461
14462 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14463 {
14464 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14465 return true;
14466 /* For scalar masks we may have different boolean
14467 vector types having the same QImode. Thus we
14468 add additional check for elements number. */
14469 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14470 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14471 return true;
14472 }
14473
14474 if (code == FLOAT_EXPR)
14475 return false;
14476
14477 /* Check if it's a multi-step conversion that can be done using intermediate
14478 types. */
14479 prev_mode = vec_mode;
14480 prev_type = vectype;
14481 if (code == FIX_TRUNC_EXPR)
14482 uns = TYPE_UNSIGNED (vectype_out);
14483 else
14484 uns = TYPE_UNSIGNED (vectype);
14485
14486 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14487 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14488 costly than signed. */
14489 if (code == FIX_TRUNC_EXPR && uns)
14490 {
14491 enum insn_code icode2;
14492
14493 intermediate_type
14494 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14495 interm_optab
14496 = optab_for_tree_code (c1, intermediate_type, optab_default);
14497 if (interm_optab != unknown_optab
14498 && (icode2 = optab_handler (op: optab1, mode: vec_mode)) != CODE_FOR_nothing
14499 && insn_data[icode1].operand[0].mode
14500 == insn_data[icode2].operand[0].mode)
14501 {
14502 uns = false;
14503 optab1 = interm_optab;
14504 icode1 = icode2;
14505 }
14506 }
14507
14508 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14509 intermediate steps in promotion sequence. We try
14510 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14511 interm_types->create (MAX_INTERM_CVT_STEPS);
14512 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14513 {
14514 intermediate_mode = insn_data[icode1].operand[0].mode;
14515 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14516 intermediate_type
14517 = vect_double_mask_nunits (prev_type, intermediate_mode);
14518 else
14519 intermediate_type
14520 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14521 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14522 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14523 && SCALAR_INT_MODE_P (prev_mode)
14524 && TYPE_VECTOR_SUBPARTS (node: intermediate_type).is_constant (const_value: &n_elts)
14525 && n_elts < BITS_PER_UNIT)
14526 interm_optab = vec_pack_sbool_trunc_optab;
14527 else
14528 interm_optab
14529 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14530 optab_default);
14531 if (!interm_optab
14532 || ((icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing)
14533 || insn_data[icode1].operand[0].mode != intermediate_mode
14534 || ((icode1 = optab_handler (op: interm_optab, mode: intermediate_mode))
14535 == CODE_FOR_nothing))
14536 break;
14537
14538 interm_types->quick_push (obj: intermediate_type);
14539 (*multi_step_cvt)++;
14540
14541 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14542 {
14543 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14544 return true;
14545 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14546 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14547 return true;
14548 }
14549
14550 prev_mode = intermediate_mode;
14551 prev_type = intermediate_type;
14552 optab1 = interm_optab;
14553 }
14554
14555 interm_types->release ();
14556 return false;
14557}
14558
14559/* Generate and return a vector mask of MASK_TYPE such that
14560 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14561 Add the statements to SEQ. */
14562
14563tree
14564vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14565 tree end_index, const char *name)
14566{
14567 tree cmp_type = TREE_TYPE (start_index);
14568 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14569 cmp_type, mask_type,
14570 OPTIMIZE_FOR_SPEED));
14571 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14572 start_index, end_index,
14573 build_zero_cst (mask_type));
14574 tree tmp;
14575 if (name)
14576 tmp = make_temp_ssa_name (type: mask_type, NULL, name);
14577 else
14578 tmp = make_ssa_name (var: mask_type);
14579 gimple_call_set_lhs (gs: call, lhs: tmp);
14580 gimple_seq_add_stmt (seq, call);
14581 return tmp;
14582}
14583
14584/* Generate a vector mask of type MASK_TYPE for which index I is false iff
14585 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14586
14587tree
14588vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14589 tree end_index)
14590{
14591 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14592 return gimple_build (seq, code: BIT_NOT_EXPR, type: mask_type, ops: tmp);
14593}
14594
14595/* Try to compute the vector types required to vectorize STMT_INFO,
14596 returning true on success and false if vectorization isn't possible.
14597 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14598 take sure that the number of elements in the vectors is no bigger
14599 than GROUP_SIZE.
14600
14601 On success:
14602
14603 - Set *STMT_VECTYPE_OUT to:
14604 - NULL_TREE if the statement doesn't need to be vectorized;
14605 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14606
14607 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14608 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14609 statement does not help to determine the overall number of units. */
14610
14611opt_result
14612vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14613 tree *stmt_vectype_out,
14614 tree *nunits_vectype_out,
14615 unsigned int group_size)
14616{
14617 gimple *stmt = stmt_info->stmt;
14618
14619 /* For BB vectorization, we should always have a group size once we've
14620 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14621 are tentative requests during things like early data reference
14622 analysis and pattern recognition. */
14623 if (is_a <bb_vec_info> (p: vinfo))
14624 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14625 else
14626 group_size = 0;
14627
14628 *stmt_vectype_out = NULL_TREE;
14629 *nunits_vectype_out = NULL_TREE;
14630
14631 if (gimple_get_lhs (stmt) == NULL_TREE
14632 /* Allow vector conditionals through here. */
14633 && !is_a <gcond *> (p: stmt)
14634 /* MASK_STORE has no lhs, but is ok. */
14635 && !gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14636 {
14637 if (is_a <gcall *> (p: stmt))
14638 {
14639 /* Ignore calls with no lhs. These must be calls to
14640 #pragma omp simd functions, and what vectorization factor
14641 it really needs can't be determined until
14642 vectorizable_simd_clone_call. */
14643 if (dump_enabled_p ())
14644 dump_printf_loc (MSG_NOTE, vect_location,
14645 "defer to SIMD clone analysis.\n");
14646 return opt_result::success ();
14647 }
14648
14649 return opt_result::failure_at (loc: stmt,
14650 fmt: "not vectorized: irregular stmt: %G", stmt);
14651 }
14652
14653 tree vectype;
14654 tree scalar_type = NULL_TREE;
14655 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14656 {
14657 vectype = STMT_VINFO_VECTYPE (stmt_info);
14658 if (dump_enabled_p ())
14659 dump_printf_loc (MSG_NOTE, vect_location,
14660 "precomputed vectype: %T\n", vectype);
14661 }
14662 else if (vect_use_mask_type_p (stmt_info))
14663 {
14664 unsigned int precision = stmt_info->mask_precision;
14665 scalar_type = build_nonstandard_integer_type (precision, 1);
14666 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14667 if (!vectype)
14668 return opt_result::failure_at (loc: stmt, fmt: "not vectorized: unsupported"
14669 " data-type %T\n", scalar_type);
14670 if (dump_enabled_p ())
14671 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14672 }
14673 else
14674 {
14675 /* If we got here with a gcond it means that the target had no available vector
14676 mode for the scalar type. We can't vectorize so abort. */
14677 if (is_a <gcond *> (p: stmt))
14678 return opt_result::failure_at (loc: stmt,
14679 fmt: "not vectorized:"
14680 " unsupported data-type for gcond %T\n",
14681 scalar_type);
14682
14683 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14684 scalar_type = TREE_TYPE (DR_REF (dr));
14685 else if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14686 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14687 else
14688 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14689
14690 if (dump_enabled_p ())
14691 {
14692 if (group_size)
14693 dump_printf_loc (MSG_NOTE, vect_location,
14694 "get vectype for scalar type (group size %d):"
14695 " %T\n", group_size, scalar_type);
14696 else
14697 dump_printf_loc (MSG_NOTE, vect_location,
14698 "get vectype for scalar type: %T\n", scalar_type);
14699 }
14700 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14701 if (!vectype)
14702 return opt_result::failure_at (loc: stmt,
14703 fmt: "not vectorized:"
14704 " unsupported data-type %T\n",
14705 scalar_type);
14706
14707 if (dump_enabled_p ())
14708 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14709 }
14710
14711 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14712 return opt_result::failure_at (loc: stmt,
14713 fmt: "not vectorized: vector stmt in loop:%G",
14714 stmt);
14715
14716 *stmt_vectype_out = vectype;
14717
14718 /* Don't try to compute scalar types if the stmt produces a boolean
14719 vector; use the existing vector type instead. */
14720 tree nunits_vectype = vectype;
14721 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14722 {
14723 /* The number of units is set according to the smallest scalar
14724 type (or the largest vector size, but we only support one
14725 vector size per vectorization). */
14726 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14727 TREE_TYPE (vectype));
14728 if (scalar_type != TREE_TYPE (vectype))
14729 {
14730 if (dump_enabled_p ())
14731 dump_printf_loc (MSG_NOTE, vect_location,
14732 "get vectype for smallest scalar type: %T\n",
14733 scalar_type);
14734 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14735 group_size);
14736 if (!nunits_vectype)
14737 return opt_result::failure_at
14738 (loc: stmt, fmt: "not vectorized: unsupported data-type %T\n",
14739 scalar_type);
14740 if (dump_enabled_p ())
14741 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14742 nunits_vectype);
14743 }
14744 }
14745
14746 if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: nunits_vectype),
14747 b: TYPE_VECTOR_SUBPARTS (node: *stmt_vectype_out)))
14748 return opt_result::failure_at (loc: stmt,
14749 fmt: "Not vectorized: Incompatible number "
14750 "of vector subparts between %T and %T\n",
14751 nunits_vectype, *stmt_vectype_out);
14752
14753 if (dump_enabled_p ())
14754 {
14755 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14756 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: nunits_vectype));
14757 dump_printf (MSG_NOTE, "\n");
14758 }
14759
14760 *nunits_vectype_out = nunits_vectype;
14761 return opt_result::success ();
14762}
14763
14764/* Generate and return statement sequence that sets vector length LEN that is:
14765
14766 min_of_start_and_end = min (START_INDEX, END_INDEX);
14767 left_len = END_INDEX - min_of_start_and_end;
14768 rhs = min (left_len, LEN_LIMIT);
14769 LEN = rhs;
14770
14771 Note: the cost of the code generated by this function is modeled
14772 by vect_estimate_min_profitable_iters, so changes here may need
14773 corresponding changes there. */
14774
14775gimple_seq
14776vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14777{
14778 gimple_seq stmts = NULL;
14779 tree len_type = TREE_TYPE (len);
14780 gcc_assert (TREE_TYPE (start_index) == len_type);
14781
14782 tree min = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: start_index, ops: end_index);
14783 tree left_len = gimple_build (seq: &stmts, code: MINUS_EXPR, type: len_type, ops: end_index, ops: min);
14784 tree rhs = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: left_len, ops: len_limit);
14785 gimple* stmt = gimple_build_assign (len, rhs);
14786 gimple_seq_add_stmt (&stmts, stmt);
14787
14788 return stmts;
14789}
14790
14791

source code of gcc/tree-vect-stmts.cc