1/* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "gimple.h"
30#include "tree-pass.h"
31#include "ssa.h"
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
35#include "params.h"
36#include "fold-const.h"
37#include "stor-layout.h"
38#include "gimple-iterator.h"
39#include "cfgloop.h"
40#include "tree-vectorizer.h"
41#include "langhooks.h"
42#include "gimple-walk.h"
43#include "dbgcnt.h"
44#include "tree-vector-builder.h"
45
46
47/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
48
49static void
50vect_free_slp_tree (slp_tree node)
51{
52 int i;
53 slp_tree child;
54
55 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
56 vect_free_slp_tree (child);
57
58 gimple *stmt;
59 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
60 /* After transform some stmts are removed and thus their vinfo is gone. */
61 if (vinfo_for_stmt (stmt))
62 {
63 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
64 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
65 }
66
67 SLP_TREE_CHILDREN (node).release ();
68 SLP_TREE_SCALAR_STMTS (node).release ();
69 SLP_TREE_VEC_STMTS (node).release ();
70 SLP_TREE_LOAD_PERMUTATION (node).release ();
71
72 free (node);
73}
74
75
76/* Free the memory allocated for the SLP instance. */
77
78void
79vect_free_slp_instance (slp_instance instance)
80{
81 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
82 SLP_INSTANCE_LOADS (instance).release ();
83 free (instance);
84}
85
86
87/* Create an SLP node for SCALAR_STMTS. */
88
89static slp_tree
90vect_create_new_slp_node (vec<gimple *> scalar_stmts)
91{
92 slp_tree node;
93 gimple *stmt = scalar_stmts[0];
94 unsigned int nops;
95
96 if (is_gimple_call (stmt))
97 nops = gimple_call_num_args (stmt);
98 else if (is_gimple_assign (stmt))
99 {
100 nops = gimple_num_ops (stmt) - 1;
101 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
102 nops++;
103 }
104 else if (gimple_code (stmt) == GIMPLE_PHI)
105 nops = 0;
106 else
107 return NULL;
108
109 node = XNEW (struct _slp_tree);
110 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
111 SLP_TREE_VEC_STMTS (node).create (0);
112 SLP_TREE_CHILDREN (node).create (nops);
113 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
114 SLP_TREE_TWO_OPERATORS (node) = false;
115 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
116
117 unsigned i;
118 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
119 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
120
121 return node;
122}
123
124
125/* This structure is used in creation of an SLP tree. Each instance
126 corresponds to the same operand in a group of scalar stmts in an SLP
127 node. */
128typedef struct _slp_oprnd_info
129{
130 /* Def-stmts for the operands. */
131 vec<gimple *> def_stmts;
132 /* Information about the first statement, its vector def-type, type, the
133 operand itself in case it's constant, and an indication if it's a pattern
134 stmt. */
135 tree first_op_type;
136 enum vect_def_type first_dt;
137 bool first_pattern;
138 bool second_pattern;
139} *slp_oprnd_info;
140
141
142/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
143 operand. */
144static vec<slp_oprnd_info>
145vect_create_oprnd_info (int nops, int group_size)
146{
147 int i;
148 slp_oprnd_info oprnd_info;
149 vec<slp_oprnd_info> oprnds_info;
150
151 oprnds_info.create (nops);
152 for (i = 0; i < nops; i++)
153 {
154 oprnd_info = XNEW (struct _slp_oprnd_info);
155 oprnd_info->def_stmts.create (group_size);
156 oprnd_info->first_dt = vect_uninitialized_def;
157 oprnd_info->first_op_type = NULL_TREE;
158 oprnd_info->first_pattern = false;
159 oprnd_info->second_pattern = false;
160 oprnds_info.quick_push (oprnd_info);
161 }
162
163 return oprnds_info;
164}
165
166
167/* Free operands info. */
168
169static void
170vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
171{
172 int i;
173 slp_oprnd_info oprnd_info;
174
175 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
176 {
177 oprnd_info->def_stmts.release ();
178 XDELETE (oprnd_info);
179 }
180
181 oprnds_info.release ();
182}
183
184
185/* Find the place of the data-ref in STMT in the interleaving chain that starts
186 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
187
188static int
189vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
190{
191 gimple *next_stmt = first_stmt;
192 int result = 0;
193
194 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
195 return -1;
196
197 do
198 {
199 if (next_stmt == stmt)
200 return result;
201 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
202 if (next_stmt)
203 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
204 }
205 while (next_stmt);
206
207 return -1;
208}
209
210
211/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
212 they are of a valid type and that they match the defs of the first stmt of
213 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
214 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
215 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
216 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
217 and code of comparison needs to be inverted. If there is any operand swap
218 in this function, *SWAP is set to non-zero value.
219 If there was a fatal error return -1; if the error could be corrected by
220 swapping operands of father node of this one, return 1; if everything is
221 ok return 0. */
222
223static int
224vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
225 gimple *stmt, unsigned stmt_num,
226 vec<slp_oprnd_info> *oprnds_info)
227{
228 tree oprnd;
229 unsigned int i, number_of_oprnds;
230 gimple *def_stmt;
231 enum vect_def_type dt = vect_uninitialized_def;
232 bool pattern = false;
233 slp_oprnd_info oprnd_info;
234 int first_op_idx = 1;
235 bool commutative = false;
236 bool first_op_cond = false;
237 bool first = stmt_num == 0;
238 bool second = stmt_num == 1;
239
240 if (is_gimple_call (stmt))
241 {
242 number_of_oprnds = gimple_call_num_args (stmt);
243 first_op_idx = 3;
244 }
245 else if (is_gimple_assign (stmt))
246 {
247 enum tree_code code = gimple_assign_rhs_code (stmt);
248 number_of_oprnds = gimple_num_ops (stmt) - 1;
249 /* Swap can only be done for cond_expr if asked to, otherwise we
250 could result in different comparison code to the first stmt. */
251 if (gimple_assign_rhs_code (stmt) == COND_EXPR
252 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
253 {
254 first_op_cond = true;
255 number_of_oprnds++;
256 }
257 else
258 commutative = commutative_tree_code (code);
259 }
260 else
261 return -1;
262
263 bool swapped = (*swap != 0);
264 gcc_assert (!swapped || first_op_cond);
265 for (i = 0; i < number_of_oprnds; i++)
266 {
267again:
268 if (first_op_cond)
269 {
270 /* Map indicating how operands of cond_expr should be swapped. */
271 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
272 int *map = maps[*swap];
273
274 if (i < 2)
275 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
276 else
277 oprnd = gimple_op (stmt, map[i]);
278 }
279 else
280 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
281
282 oprnd_info = (*oprnds_info)[i];
283
284 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
285 {
286 if (dump_enabled_p ())
287 {
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
289 "Build SLP failed: can't analyze def for ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
291 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
292 }
293
294 return -1;
295 }
296
297 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
298 from the pattern. Check that all the stmts of the node are in the
299 pattern. */
300 if (def_stmt && gimple_bb (def_stmt)
301 && vect_stmt_in_region_p (vinfo, def_stmt)
302 && vinfo_for_stmt (def_stmt)
303 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
306 {
307 pattern = true;
308 if (!first && !oprnd_info->first_pattern
309 /* Allow different pattern state for the defs of the
310 first stmt in reduction chains. */
311 && (oprnd_info->first_dt != vect_reduction_def
312 || (!second && !oprnd_info->second_pattern)))
313 {
314 if (i == 0
315 && !swapped
316 && commutative)
317 {
318 swapped = true;
319 goto again;
320 }
321
322 if (dump_enabled_p ())
323 {
324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
325 "Build SLP failed: some of the stmts"
326 " are in a pattern, and others are not ");
327 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
328 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
329 }
330
331 return 1;
332 }
333
334 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
335 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
336
337 if (dt == vect_unknown_def_type)
338 {
339 if (dump_enabled_p ())
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
341 "Unsupported pattern.\n");
342 return -1;
343 }
344
345 switch (gimple_code (def_stmt))
346 {
347 case GIMPLE_PHI:
348 case GIMPLE_ASSIGN:
349 break;
350
351 default:
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
354 "unsupported defining stmt:\n");
355 return -1;
356 }
357 }
358
359 if (second)
360 oprnd_info->second_pattern = pattern;
361
362 if (first)
363 {
364 oprnd_info->first_dt = dt;
365 oprnd_info->first_pattern = pattern;
366 oprnd_info->first_op_type = TREE_TYPE (oprnd);
367 }
368 else
369 {
370 /* Not first stmt of the group, check that the def-stmt/s match
371 the def-stmt/s of the first stmt. Allow different definition
372 types for reduction chains: the first stmt must be a
373 vect_reduction_def (a phi node), and the rest
374 vect_internal_def. */
375 if (((oprnd_info->first_dt != dt
376 && !(oprnd_info->first_dt == vect_reduction_def
377 && dt == vect_internal_def)
378 && !((oprnd_info->first_dt == vect_external_def
379 || oprnd_info->first_dt == vect_constant_def)
380 && (dt == vect_external_def
381 || dt == vect_constant_def)))
382 || !types_compatible_p (oprnd_info->first_op_type,
383 TREE_TYPE (oprnd))))
384 {
385 /* Try swapping operands if we got a mismatch. */
386 if (i == 0
387 && !swapped
388 && commutative)
389 {
390 swapped = true;
391 goto again;
392 }
393
394 if (dump_enabled_p ())
395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
396 "Build SLP failed: different types\n");
397
398 return 1;
399 }
400 }
401
402 /* Check the types of the definitions. */
403 switch (dt)
404 {
405 case vect_constant_def:
406 case vect_external_def:
407 break;
408
409 case vect_reduction_def:
410 case vect_induction_def:
411 case vect_internal_def:
412 oprnd_info->def_stmts.quick_push (def_stmt);
413 break;
414
415 default:
416 /* FORNOW: Not supported. */
417 if (dump_enabled_p ())
418 {
419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
420 "Build SLP failed: illegal type of def ");
421 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
422 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
423 }
424
425 return -1;
426 }
427 }
428
429 /* Swap operands. */
430 if (swapped)
431 {
432 /* If there are already uses of this stmt in a SLP instance then
433 we've committed to the operand order and can't swap it. */
434 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
435 {
436 if (dump_enabled_p ())
437 {
438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
439 "Build SLP failed: cannot swap operands of "
440 "shared stmt ");
441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
442 }
443 return -1;
444 }
445
446 if (first_op_cond)
447 {
448 tree cond = gimple_assign_rhs1 (stmt);
449 enum tree_code code = TREE_CODE (cond);
450
451 /* Swap. */
452 if (*swap == 1)
453 {
454 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
455 &TREE_OPERAND (cond, 1));
456 TREE_SET_CODE (cond, swap_tree_comparison (code));
457 }
458 /* Invert. */
459 else
460 {
461 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
462 gimple_assign_rhs3_ptr (stmt));
463 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
464 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
465 gcc_assert (code != ERROR_MARK);
466 TREE_SET_CODE (cond, code);
467 }
468 }
469 else
470 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
471 gimple_assign_rhs2_ptr (stmt));
472 if (dump_enabled_p ())
473 {
474 dump_printf_loc (MSG_NOTE, vect_location,
475 "swapped operands to match def types in ");
476 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
477 }
478 }
479
480 *swap = swapped;
481 return 0;
482}
483
484/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
485 caller's attempt to find the vector type in STMT with the narrowest
486 element type. Return true if VECTYPE is nonnull and if it is valid
487 for VINFO. When returning true, update MAX_NUNITS to reflect the
488 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
489 as for vect_build_slp_tree. */
490
491static bool
492vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
493 tree vectype, unsigned int *max_nunits)
494{
495 if (!vectype)
496 {
497 if (dump_enabled_p ())
498 {
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
500 "Build SLP failed: unsupported data-type in ");
501 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
502 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
503 }
504 /* Fatal mismatch. */
505 return false;
506 }
507
508 /* If populating the vector type requires unrolling then fail
509 before adjusting *max_nunits for basic-block vectorization. */
510 if (is_a <bb_vec_info> (vinfo)
511 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
512 {
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 "Build SLP failed: unrolling required "
515 "in basic block SLP\n");
516 /* Fatal mismatch. */
517 return false;
518 }
519
520 /* In case of multiple types we need to detect the smallest type. */
521 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
522 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
523
524 return true;
525}
526
527/* Verify if the scalar stmts STMTS are isomorphic, require data
528 permutation or are of unsupported types of operation. Return
529 true if they are, otherwise return false and indicate in *MATCHES
530 which stmts are not isomorphic to the first one. If MATCHES[0]
531 is false then this indicates the comparison could not be
532 carried out or the stmts will never be vectorized by SLP.
533
534 Note COND_EXPR is possibly ismorphic to another one after swapping its
535 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
536 the first stmt by swapping the two operands of comparison; set SWAP[i]
537 to 2 if stmt I is isormorphic to the first stmt by inverting the code
538 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
539 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
540
541static bool
542vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
543 vec<gimple *> stmts, unsigned int group_size,
544 unsigned nops, unsigned int *max_nunits,
545 bool *matches, bool *two_operators)
546{
547 unsigned int i;
548 gimple *first_stmt = stmts[0], *stmt = stmts[0];
549 enum tree_code first_stmt_code = ERROR_MARK;
550 enum tree_code alt_stmt_code = ERROR_MARK;
551 enum tree_code rhs_code = ERROR_MARK;
552 enum tree_code first_cond_code = ERROR_MARK;
553 tree lhs;
554 bool need_same_oprnds = false;
555 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
556 optab optab;
557 int icode;
558 machine_mode optab_op2_mode;
559 machine_mode vec_mode;
560 HOST_WIDE_INT dummy;
561 gimple *first_load = NULL, *prev_first_load = NULL;
562
563 /* For every stmt in NODE find its def stmt/s. */
564 FOR_EACH_VEC_ELT (stmts, i, stmt)
565 {
566 swap[i] = 0;
567 matches[i] = false;
568
569 if (dump_enabled_p ())
570 {
571 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
572 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
573 }
574
575 /* Fail to vectorize statements marked as unvectorizable. */
576 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
577 {
578 if (dump_enabled_p ())
579 {
580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
581 "Build SLP failed: unvectorizable statement ");
582 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
583 }
584 /* Fatal mismatch. */
585 matches[0] = false;
586 return false;
587 }
588
589 lhs = gimple_get_lhs (stmt);
590 if (lhs == NULL_TREE)
591 {
592 if (dump_enabled_p ())
593 {
594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
595 "Build SLP failed: not GIMPLE_ASSIGN nor "
596 "GIMPLE_CALL ");
597 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
598 }
599 /* Fatal mismatch. */
600 matches[0] = false;
601 return false;
602 }
603
604 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
605 vectype = get_vectype_for_scalar_type (scalar_type);
606 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
607 max_nunits))
608 {
609 /* Fatal mismatch. */
610 matches[0] = false;
611 return false;
612 }
613
614 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
615 {
616 rhs_code = CALL_EXPR;
617 if (gimple_call_internal_p (call_stmt)
618 || gimple_call_tail_p (call_stmt)
619 || gimple_call_noreturn_p (call_stmt)
620 || !gimple_call_nothrow_p (call_stmt)
621 || gimple_call_chain (call_stmt))
622 {
623 if (dump_enabled_p ())
624 {
625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
626 "Build SLP failed: unsupported call type ");
627 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
628 call_stmt, 0);
629 }
630 /* Fatal mismatch. */
631 matches[0] = false;
632 return false;
633 }
634 }
635 else
636 rhs_code = gimple_assign_rhs_code (stmt);
637
638 /* Check the operation. */
639 if (i == 0)
640 {
641 first_stmt_code = rhs_code;
642
643 /* Shift arguments should be equal in all the packed stmts for a
644 vector shift with scalar shift operand. */
645 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
646 || rhs_code == LROTATE_EXPR
647 || rhs_code == RROTATE_EXPR)
648 {
649 vec_mode = TYPE_MODE (vectype);
650
651 /* First see if we have a vector/vector shift. */
652 optab = optab_for_tree_code (rhs_code, vectype,
653 optab_vector);
654
655 if (!optab
656 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
657 {
658 /* No vector/vector shift, try for a vector/scalar shift. */
659 optab = optab_for_tree_code (rhs_code, vectype,
660 optab_scalar);
661
662 if (!optab)
663 {
664 if (dump_enabled_p ())
665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
666 "Build SLP failed: no optab.\n");
667 /* Fatal mismatch. */
668 matches[0] = false;
669 return false;
670 }
671 icode = (int) optab_handler (optab, vec_mode);
672 if (icode == CODE_FOR_nothing)
673 {
674 if (dump_enabled_p ())
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
676 "Build SLP failed: "
677 "op not supported by target.\n");
678 /* Fatal mismatch. */
679 matches[0] = false;
680 return false;
681 }
682 optab_op2_mode = insn_data[icode].operand[2].mode;
683 if (!VECTOR_MODE_P (optab_op2_mode))
684 {
685 need_same_oprnds = true;
686 first_op1 = gimple_assign_rhs2 (stmt);
687 }
688 }
689 }
690 else if (rhs_code == WIDEN_LSHIFT_EXPR)
691 {
692 need_same_oprnds = true;
693 first_op1 = gimple_assign_rhs2 (stmt);
694 }
695 }
696 else
697 {
698 if (first_stmt_code != rhs_code
699 && alt_stmt_code == ERROR_MARK)
700 alt_stmt_code = rhs_code;
701 if (first_stmt_code != rhs_code
702 && (first_stmt_code != IMAGPART_EXPR
703 || rhs_code != REALPART_EXPR)
704 && (first_stmt_code != REALPART_EXPR
705 || rhs_code != IMAGPART_EXPR)
706 /* Handle mismatches in plus/minus by computing both
707 and merging the results. */
708 && !((first_stmt_code == PLUS_EXPR
709 || first_stmt_code == MINUS_EXPR)
710 && (alt_stmt_code == PLUS_EXPR
711 || alt_stmt_code == MINUS_EXPR)
712 && rhs_code == alt_stmt_code)
713 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
714 && (first_stmt_code == ARRAY_REF
715 || first_stmt_code == BIT_FIELD_REF
716 || first_stmt_code == INDIRECT_REF
717 || first_stmt_code == COMPONENT_REF
718 || first_stmt_code == MEM_REF)))
719 {
720 if (dump_enabled_p ())
721 {
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
723 "Build SLP failed: different operation "
724 "in stmt ");
725 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
727 "original stmt ");
728 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
729 first_stmt, 0);
730 }
731 /* Mismatch. */
732 continue;
733 }
734
735 if (need_same_oprnds
736 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
737 {
738 if (dump_enabled_p ())
739 {
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "Build SLP failed: different shift "
742 "arguments in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
744 }
745 /* Mismatch. */
746 continue;
747 }
748
749 if (rhs_code == CALL_EXPR)
750 {
751 gimple *first_stmt = stmts[0];
752 if (gimple_call_num_args (stmt) != nops
753 || !operand_equal_p (gimple_call_fn (first_stmt),
754 gimple_call_fn (stmt), 0)
755 || gimple_call_fntype (first_stmt)
756 != gimple_call_fntype (stmt))
757 {
758 if (dump_enabled_p ())
759 {
760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
761 "Build SLP failed: different calls in ");
762 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
763 stmt, 0);
764 }
765 /* Mismatch. */
766 continue;
767 }
768 }
769 }
770
771 /* Grouped store or load. */
772 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
773 {
774 if (REFERENCE_CLASS_P (lhs))
775 {
776 /* Store. */
777 ;
778 }
779 else
780 {
781 /* Load. */
782 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
783 if (prev_first_load)
784 {
785 /* Check that there are no loads from different interleaving
786 chains in the same node. */
787 if (prev_first_load != first_load)
788 {
789 if (dump_enabled_p ())
790 {
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
792 vect_location,
793 "Build SLP failed: different "
794 "interleaving chains in one node ");
795 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
796 stmt, 0);
797 }
798 /* Mismatch. */
799 continue;
800 }
801 }
802 else
803 prev_first_load = first_load;
804 }
805 } /* Grouped access. */
806 else
807 {
808 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
809 {
810 /* Not grouped load. */
811 if (dump_enabled_p ())
812 {
813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
814 "Build SLP failed: not grouped load ");
815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
816 }
817
818 /* FORNOW: Not grouped loads are not supported. */
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
822 }
823
824 /* Not memory operation. */
825 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
826 && TREE_CODE_CLASS (rhs_code) != tcc_unary
827 && TREE_CODE_CLASS (rhs_code) != tcc_expression
828 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
829 && rhs_code != CALL_EXPR)
830 {
831 if (dump_enabled_p ())
832 {
833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
834 "Build SLP failed: operation");
835 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
836 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
837 }
838 /* Fatal mismatch. */
839 matches[0] = false;
840 return false;
841 }
842
843 if (rhs_code == COND_EXPR)
844 {
845 tree cond_expr = gimple_assign_rhs1 (stmt);
846 enum tree_code cond_code = TREE_CODE (cond_expr);
847 enum tree_code swap_code = ERROR_MARK;
848 enum tree_code invert_code = ERROR_MARK;
849
850 if (i == 0)
851 first_cond_code = TREE_CODE (cond_expr);
852 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
853 {
854 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
855 swap_code = swap_tree_comparison (cond_code);
856 invert_code = invert_tree_comparison (cond_code, honor_nans);
857 }
858
859 if (first_cond_code == cond_code)
860 ;
861 /* Isomorphic can be achieved by swapping. */
862 else if (first_cond_code == swap_code)
863 swap[i] = 1;
864 /* Isomorphic can be achieved by inverting. */
865 else if (first_cond_code == invert_code)
866 swap[i] = 2;
867 else
868 {
869 if (dump_enabled_p ())
870 {
871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
872 "Build SLP failed: different"
873 " operation");
874 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
875 stmt, 0);
876 }
877 /* Mismatch. */
878 continue;
879 }
880 }
881 }
882
883 matches[i] = true;
884 }
885
886 for (i = 0; i < group_size; ++i)
887 if (!matches[i])
888 return false;
889
890 /* If we allowed a two-operation SLP node verify the target can cope
891 with the permute we are going to use. */
892 if (alt_stmt_code != ERROR_MARK
893 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
894 {
895 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
896 auto_vec_perm_indices sel (count);
897 for (i = 0; i < count; ++i)
898 {
899 unsigned int elt = i;
900 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
901 elt += count;
902 sel.quick_push (elt);
903 }
904 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
905 {
906 for (i = 0; i < group_size; ++i)
907 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
908 {
909 matches[i] = false;
910 if (dump_enabled_p ())
911 {
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
913 "Build SLP failed: different operation "
914 "in stmt ");
915 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
916 stmts[i], 0);
917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
918 "original stmt ");
919 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
920 first_stmt, 0);
921 }
922 }
923 return false;
924 }
925 *two_operators = true;
926 }
927
928 return true;
929}
930
931/* Traits for the hash_set to record failed SLP builds for a stmt set.
932 Note we never remove apart from at destruction time so we do not
933 need a special value for deleted that differs from empty. */
934struct bst_traits
935{
936 typedef vec <gimple *> value_type;
937 typedef vec <gimple *> compare_type;
938 static inline hashval_t hash (value_type);
939 static inline bool equal (value_type existing, value_type candidate);
940 static inline bool is_empty (value_type x) { return !x.exists (); }
941 static inline bool is_deleted (value_type x) { return !x.exists (); }
942 static inline void mark_empty (value_type &x) { x.release (); }
943 static inline void mark_deleted (value_type &x) { x.release (); }
944 static inline void remove (value_type &x) { x.release (); }
945};
946inline hashval_t
947bst_traits::hash (value_type x)
948{
949 inchash::hash h;
950 for (unsigned i = 0; i < x.length (); ++i)
951 h.add_int (gimple_uid (x[i]));
952 return h.end ();
953}
954inline bool
955bst_traits::equal (value_type existing, value_type candidate)
956{
957 if (existing.length () != candidate.length ())
958 return false;
959 for (unsigned i = 0; i < existing.length (); ++i)
960 if (existing[i] != candidate[i])
961 return false;
962 return true;
963}
964
965typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
966static scalar_stmts_set_t *bst_fail;
967
968static slp_tree
969vect_build_slp_tree_2 (vec_info *vinfo,
970 vec<gimple *> stmts, unsigned int group_size,
971 unsigned int *max_nunits,
972 vec<slp_tree> *loads,
973 bool *matches, unsigned *npermutes, unsigned *tree_size,
974 unsigned max_tree_size);
975
976static slp_tree
977vect_build_slp_tree (vec_info *vinfo,
978 vec<gimple *> stmts, unsigned int group_size,
979 unsigned int *max_nunits,
980 vec<slp_tree> *loads,
981 bool *matches, unsigned *npermutes, unsigned *tree_size,
982 unsigned max_tree_size)
983{
984 if (bst_fail->contains (stmts))
985 return NULL;
986 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
987 loads, matches, npermutes, tree_size,
988 max_tree_size);
989 /* When SLP build fails for stmts record this, otherwise SLP build
990 can be exponential in time when we allow to construct parts from
991 scalars, see PR81723. */
992 if (! res)
993 {
994 vec <gimple *> x;
995 x.create (stmts.length ());
996 x.splice (stmts);
997 bst_fail->add (x);
998 }
999 return res;
1000}
1001
1002/* Recursively build an SLP tree starting from NODE.
1003 Fail (and return a value not equal to zero) if def-stmts are not
1004 isomorphic, require data permutation or are of unsupported types of
1005 operation. Otherwise, return 0.
1006 The value returned is the depth in the SLP tree where a mismatch
1007 was found. */
1008
1009static slp_tree
1010vect_build_slp_tree_2 (vec_info *vinfo,
1011 vec<gimple *> stmts, unsigned int group_size,
1012 unsigned int *max_nunits,
1013 vec<slp_tree> *loads,
1014 bool *matches, unsigned *npermutes, unsigned *tree_size,
1015 unsigned max_tree_size)
1016{
1017 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
1018 gimple *stmt;
1019 slp_tree node;
1020
1021 matches[0] = false;
1022
1023 stmt = stmts[0];
1024 if (is_gimple_call (stmt))
1025 nops = gimple_call_num_args (stmt);
1026 else if (is_gimple_assign (stmt))
1027 {
1028 nops = gimple_num_ops (stmt) - 1;
1029 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1030 nops++;
1031 }
1032 else if (gimple_code (stmt) == GIMPLE_PHI)
1033 nops = 0;
1034 else
1035 return NULL;
1036
1037 /* If the SLP node is a PHI (induction or reduction), terminate
1038 the recursion. */
1039 if (gimple_code (stmt) == GIMPLE_PHI)
1040 {
1041 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1042 tree vectype = get_vectype_for_scalar_type (scalar_type);
1043 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1044 max_nunits))
1045 return NULL;
1046
1047 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
1048 /* Induction from different IVs is not supported. */
1049 if (def_type == vect_induction_def)
1050 {
1051 FOR_EACH_VEC_ELT (stmts, i, stmt)
1052 if (stmt != stmts[0])
1053 return NULL;
1054 }
1055 else
1056 {
1057 /* Else def types have to match. */
1058 FOR_EACH_VEC_ELT (stmts, i, stmt)
1059 {
1060 /* But for reduction chains only check on the first stmt. */
1061 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1062 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1063 continue;
1064 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1065 return NULL;
1066 }
1067 }
1068 node = vect_create_new_slp_node (stmts);
1069 return node;
1070 }
1071
1072
1073 bool two_operators = false;
1074 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1075 if (!vect_build_slp_tree_1 (vinfo, swap,
1076 stmts, group_size, nops,
1077 &this_max_nunits, matches, &two_operators))
1078 return NULL;
1079
1080 /* If the SLP node is a load, terminate the recursion. */
1081 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1082 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
1083 {
1084 *max_nunits = this_max_nunits;
1085 node = vect_create_new_slp_node (stmts);
1086 loads->safe_push (node);
1087 return node;
1088 }
1089
1090 /* Get at the operands, verifying they are compatible. */
1091 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1092 slp_oprnd_info oprnd_info;
1093 FOR_EACH_VEC_ELT (stmts, i, stmt)
1094 {
1095 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1096 stmt, i, &oprnds_info);
1097 if (res != 0)
1098 matches[(res == -1) ? 0 : i] = false;
1099 if (!matches[0])
1100 break;
1101 }
1102 for (i = 0; i < group_size; ++i)
1103 if (!matches[i])
1104 {
1105 vect_free_oprnd_info (oprnds_info);
1106 return NULL;
1107 }
1108
1109 auto_vec<slp_tree, 4> children;
1110 auto_vec<slp_tree> this_loads;
1111
1112 stmt = stmts[0];
1113
1114 if (tree_size)
1115 max_tree_size -= *tree_size;
1116
1117 /* Create SLP_TREE nodes for the definition node/s. */
1118 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1119 {
1120 slp_tree child;
1121 unsigned old_nloads = this_loads.length ();
1122 unsigned old_tree_size = this_tree_size;
1123 unsigned int j;
1124
1125 if (oprnd_info->first_dt != vect_internal_def
1126 && oprnd_info->first_dt != vect_reduction_def
1127 && oprnd_info->first_dt != vect_induction_def)
1128 continue;
1129
1130 if (++this_tree_size > max_tree_size)
1131 {
1132 FOR_EACH_VEC_ELT (children, j, child)
1133 vect_free_slp_tree (child);
1134 vect_free_oprnd_info (oprnds_info);
1135 return NULL;
1136 }
1137
1138 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1139 group_size, &this_max_nunits,
1140 &this_loads, matches, npermutes,
1141 &this_tree_size,
1142 max_tree_size)) != NULL)
1143 {
1144 /* If we have all children of child built up from scalars then just
1145 throw that away and build it up this node from scalars. */
1146 if (!SLP_TREE_CHILDREN (child).is_empty ()
1147 /* ??? Rejecting patterns this way doesn't work. We'd have to
1148 do extra work to cancel the pattern so the uses see the
1149 scalar version. */
1150 && !is_pattern_stmt_p
1151 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1152 {
1153 slp_tree grandchild;
1154
1155 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1156 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1157 break;
1158 if (!grandchild)
1159 {
1160 /* Roll back. */
1161 this_loads.truncate (old_nloads);
1162 this_tree_size = old_tree_size;
1163 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1164 vect_free_slp_tree (grandchild);
1165 SLP_TREE_CHILDREN (child).truncate (0);
1166
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "Building parent vector operands from "
1169 "scalars instead\n");
1170 oprnd_info->def_stmts = vNULL;
1171 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1172 children.safe_push (child);
1173 continue;
1174 }
1175 }
1176
1177 oprnd_info->def_stmts = vNULL;
1178 children.safe_push (child);
1179 continue;
1180 }
1181
1182 /* If the SLP build failed fatally and we analyze a basic-block
1183 simply treat nodes we fail to build as externally defined
1184 (and thus build vectors from the scalar defs).
1185 The cost model will reject outright expensive cases.
1186 ??? This doesn't treat cases where permutation ultimatively
1187 fails (or we don't try permutation below). Ideally we'd
1188 even compute a permutation that will end up with the maximum
1189 SLP tree size... */
1190 if (is_a <bb_vec_info> (vinfo)
1191 && !matches[0]
1192 /* ??? Rejecting patterns this way doesn't work. We'd have to
1193 do extra work to cancel the pattern so the uses see the
1194 scalar version. */
1195 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1196 {
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "Building vector operands from scalars\n");
1199 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1200 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1201 children.safe_push (child);
1202 oprnd_info->def_stmts = vNULL;
1203 continue;
1204 }
1205
1206 /* If the SLP build for operand zero failed and operand zero
1207 and one can be commutated try that for the scalar stmts
1208 that failed the match. */
1209 if (i == 0
1210 /* A first scalar stmt mismatch signals a fatal mismatch. */
1211 && matches[0]
1212 /* ??? For COND_EXPRs we can swap the comparison operands
1213 as well as the arms under some constraints. */
1214 && nops == 2
1215 && oprnds_info[1]->first_dt == vect_internal_def
1216 && is_gimple_assign (stmt)
1217 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1218 && ! two_operators
1219 /* Do so only if the number of not successful permutes was nor more
1220 than a cut-ff as re-trying the recursive match on
1221 possibly each level of the tree would expose exponential
1222 behavior. */
1223 && *npermutes < 4)
1224 {
1225 /* Verify if we can safely swap or if we committed to a specific
1226 operand order already. */
1227 for (j = 0; j < group_size; ++j)
1228 if (!matches[j]
1229 && (swap[j] != 0
1230 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
1231 {
1232 if (dump_enabled_p ())
1233 {
1234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1235 "Build SLP failed: cannot swap operands "
1236 "of shared stmt ");
1237 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1238 stmts[j], 0);
1239 }
1240 goto fail;
1241 }
1242
1243 /* Swap mismatched definition stmts. */
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "Re-trying with swapped operands of stmts ");
1246 for (j = 0; j < group_size; ++j)
1247 if (!matches[j])
1248 {
1249 std::swap (oprnds_info[0]->def_stmts[j],
1250 oprnds_info[1]->def_stmts[j]);
1251 dump_printf (MSG_NOTE, "%d ", j);
1252 }
1253 dump_printf (MSG_NOTE, "\n");
1254 /* And try again with scratch 'matches' ... */
1255 bool *tem = XALLOCAVEC (bool, group_size);
1256 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1257 group_size, &this_max_nunits,
1258 &this_loads, tem, npermutes,
1259 &this_tree_size,
1260 max_tree_size)) != NULL)
1261 {
1262 /* ... so if successful we can apply the operand swapping
1263 to the GIMPLE IL. This is necessary because for example
1264 vect_get_slp_defs uses operand indexes and thus expects
1265 canonical operand order. This is also necessary even
1266 if we end up building the operand from scalars as
1267 we'll continue to process swapped operand two. */
1268 for (j = 0; j < group_size; ++j)
1269 {
1270 gimple *stmt = stmts[j];
1271 gimple_set_plf (stmt, GF_PLF_1, false);
1272 }
1273 for (j = 0; j < group_size; ++j)
1274 {
1275 gimple *stmt = stmts[j];
1276 if (!matches[j])
1277 {
1278 /* Avoid swapping operands twice. */
1279 if (gimple_plf (stmt, GF_PLF_1))
1280 continue;
1281 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1282 gimple_assign_rhs2_ptr (stmt));
1283 gimple_set_plf (stmt, GF_PLF_1, true);
1284 }
1285 }
1286 /* Verify we swap all duplicates or none. */
1287 if (flag_checking)
1288 for (j = 0; j < group_size; ++j)
1289 {
1290 gimple *stmt = stmts[j];
1291 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1292 }
1293
1294 /* If we have all children of child built up from scalars then
1295 just throw that away and build it up this node from scalars. */
1296 if (!SLP_TREE_CHILDREN (child).is_empty ()
1297 /* ??? Rejecting patterns this way doesn't work. We'd have
1298 to do extra work to cancel the pattern so the uses see the
1299 scalar version. */
1300 && !is_pattern_stmt_p
1301 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1302 {
1303 unsigned int j;
1304 slp_tree grandchild;
1305
1306 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1307 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1308 break;
1309 if (!grandchild)
1310 {
1311 /* Roll back. */
1312 this_loads.truncate (old_nloads);
1313 this_tree_size = old_tree_size;
1314 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1315 vect_free_slp_tree (grandchild);
1316 SLP_TREE_CHILDREN (child).truncate (0);
1317
1318 dump_printf_loc (MSG_NOTE, vect_location,
1319 "Building parent vector operands from "
1320 "scalars instead\n");
1321 oprnd_info->def_stmts = vNULL;
1322 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1323 children.safe_push (child);
1324 continue;
1325 }
1326 }
1327
1328 oprnd_info->def_stmts = vNULL;
1329 children.safe_push (child);
1330 continue;
1331 }
1332
1333 ++*npermutes;
1334 }
1335
1336fail:
1337 gcc_assert (child == NULL);
1338 FOR_EACH_VEC_ELT (children, j, child)
1339 vect_free_slp_tree (child);
1340 vect_free_oprnd_info (oprnds_info);
1341 return NULL;
1342 }
1343
1344 vect_free_oprnd_info (oprnds_info);
1345
1346 if (tree_size)
1347 *tree_size += this_tree_size;
1348 *max_nunits = this_max_nunits;
1349 loads->safe_splice (this_loads);
1350
1351 node = vect_create_new_slp_node (stmts);
1352 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1353 SLP_TREE_CHILDREN (node).splice (children);
1354 return node;
1355}
1356
1357/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1358
1359static void
1360vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
1361{
1362 int i;
1363 gimple *stmt;
1364 slp_tree child;
1365
1366 dump_printf_loc (dump_kind, loc, "node%s\n",
1367 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1368 ? " (external)" : "");
1369 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1370 {
1371 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1372 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1373 }
1374 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1375 vect_print_slp_tree (dump_kind, loc, child);
1376}
1377
1378
1379/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1380 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1381 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1382 stmts in NODE are to be marked. */
1383
1384static void
1385vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1386{
1387 int i;
1388 gimple *stmt;
1389 slp_tree child;
1390
1391 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1392 return;
1393
1394 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1395 if (j < 0 || i == j)
1396 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1397
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1399 vect_mark_slp_stmts (child, mark, j);
1400}
1401
1402
1403/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1404
1405static void
1406vect_mark_slp_stmts_relevant (slp_tree node)
1407{
1408 int i;
1409 gimple *stmt;
1410 stmt_vec_info stmt_info;
1411 slp_tree child;
1412
1413 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1414 return;
1415
1416 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1417 {
1418 stmt_info = vinfo_for_stmt (stmt);
1419 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1420 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1421 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1422 }
1423
1424 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1425 vect_mark_slp_stmts_relevant (child);
1426}
1427
1428
1429/* Rearrange the statements of NODE according to PERMUTATION. */
1430
1431static void
1432vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1433 vec<unsigned> permutation)
1434{
1435 gimple *stmt;
1436 vec<gimple *> tmp_stmts;
1437 unsigned int i;
1438 slp_tree child;
1439
1440 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1441 vect_slp_rearrange_stmts (child, group_size, permutation);
1442
1443 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1444 tmp_stmts.create (group_size);
1445 tmp_stmts.quick_grow_cleared (group_size);
1446
1447 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1448 tmp_stmts[permutation[i]] = stmt;
1449
1450 SLP_TREE_SCALAR_STMTS (node).release ();
1451 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1452}
1453
1454
1455/* Attempt to reorder stmts in a reduction chain so that we don't
1456 require any load permutation. Return true if that was possible,
1457 otherwise return false. */
1458
1459static bool
1460vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1461{
1462 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1463 unsigned int i, j;
1464 unsigned int lidx;
1465 slp_tree node, load;
1466
1467 /* Compare all the permutation sequences to the first one. We know
1468 that at least one load is permuted. */
1469 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1470 if (!node->load_permutation.exists ())
1471 return false;
1472 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1473 {
1474 if (!load->load_permutation.exists ())
1475 return false;
1476 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1477 if (lidx != node->load_permutation[j])
1478 return false;
1479 }
1480
1481 /* Check that the loads in the first sequence are different and there
1482 are no gaps between them. */
1483 auto_sbitmap load_index (group_size);
1484 bitmap_clear (load_index);
1485 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1486 {
1487 if (lidx >= group_size)
1488 return false;
1489 if (bitmap_bit_p (load_index, lidx))
1490 return false;
1491
1492 bitmap_set_bit (load_index, lidx);
1493 }
1494 for (i = 0; i < group_size; i++)
1495 if (!bitmap_bit_p (load_index, i))
1496 return false;
1497
1498 /* This permutation is valid for reduction. Since the order of the
1499 statements in the nodes is not important unless they are memory
1500 accesses, we can rearrange the statements in all the nodes
1501 according to the order of the loads. */
1502 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1503 node->load_permutation);
1504
1505 /* We are done, no actual permutations need to be generated. */
1506 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1507 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1508 {
1509 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1510 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1511 /* But we have to keep those permutations that are required because
1512 of handling of gaps. */
1513 if (unrolling_factor == 1
1514 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1515 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1516 SLP_TREE_LOAD_PERMUTATION (node).release ();
1517 else
1518 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1519 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1520 }
1521
1522 return true;
1523}
1524
1525/* Check if the required load permutations in the SLP instance
1526 SLP_INSTN are supported. */
1527
1528static bool
1529vect_supported_load_permutation_p (slp_instance slp_instn)
1530{
1531 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1532 unsigned int i, j, k, next;
1533 slp_tree node;
1534 gimple *stmt, *load, *next_load;
1535
1536 if (dump_enabled_p ())
1537 {
1538 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1539 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1540 if (node->load_permutation.exists ())
1541 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1542 dump_printf (MSG_NOTE, "%d ", next);
1543 else
1544 for (k = 0; k < group_size; ++k)
1545 dump_printf (MSG_NOTE, "%d ", k);
1546 dump_printf (MSG_NOTE, "\n");
1547 }
1548
1549 /* In case of reduction every load permutation is allowed, since the order
1550 of the reduction statements is not important (as opposed to the case of
1551 grouped stores). The only condition we need to check is that all the
1552 load nodes are of the same size and have the same permutation (and then
1553 rearrange all the nodes of the SLP instance according to this
1554 permutation). */
1555
1556 /* Check that all the load nodes are of the same size. */
1557 /* ??? Can't we assert this? */
1558 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1559 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1560 return false;
1561
1562 node = SLP_INSTANCE_TREE (slp_instn);
1563 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1564
1565 /* Reduction (there are no data-refs in the root).
1566 In reduction chain the order of the loads is not important. */
1567 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1568 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1569 vect_attempt_slp_rearrange_stmts (slp_instn);
1570
1571 /* In basic block vectorization we allow any subchain of an interleaving
1572 chain.
1573 FORNOW: not supported in loop SLP because of realignment compications. */
1574 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
1575 {
1576 /* Check whether the loads in an instance form a subchain and thus
1577 no permutation is necessary. */
1578 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1579 {
1580 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1581 continue;
1582 bool subchain_p = true;
1583 next_load = NULL;
1584 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
1585 {
1586 if (j != 0
1587 && (next_load != load
1588 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
1589 {
1590 subchain_p = false;
1591 break;
1592 }
1593 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1594 }
1595 if (subchain_p)
1596 SLP_TREE_LOAD_PERMUTATION (node).release ();
1597 else
1598 {
1599 stmt_vec_info group_info
1600 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1601 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1602 unsigned nunits
1603 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1604 unsigned k, maxk = 0;
1605 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1606 if (k > maxk)
1607 maxk = k;
1608 /* In BB vectorization we may not actually use a loaded vector
1609 accessing elements in excess of GROUP_SIZE. */
1610 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1611 {
1612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1613 "BB vectorization with gaps at the end of "
1614 "a load is not supported\n");
1615 return false;
1616 }
1617
1618 /* Verify the permutation can be generated. */
1619 vec<tree> tem;
1620 unsigned n_perms;
1621 if (!vect_transform_slp_perm_load (node, tem, NULL,
1622 1, slp_instn, true, &n_perms))
1623 {
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1625 vect_location,
1626 "unsupported load permutation\n");
1627 return false;
1628 }
1629 }
1630 }
1631 return true;
1632 }
1633
1634 /* For loop vectorization verify we can generate the permutation. Be
1635 conservative about the vectorization factor, there are permutations
1636 that will use three vector inputs only starting from a specific factor
1637 and the vectorization factor is not yet final.
1638 ??? The SLP instance unrolling factor might not be the maximum one. */
1639 unsigned n_perms;
1640 unsigned test_vf
1641 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1642 LOOP_VINFO_VECT_FACTOR
1643 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
1644 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1645 if (node->load_permutation.exists ()
1646 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1647 slp_instn, true, &n_perms))
1648 return false;
1649
1650 return true;
1651}
1652
1653
1654/* Find the last store in SLP INSTANCE. */
1655
1656gimple *
1657vect_find_last_scalar_stmt_in_slp (slp_tree node)
1658{
1659 gimple *last = NULL, *stmt;
1660
1661 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1662 {
1663 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1664 if (is_pattern_stmt_p (stmt_vinfo))
1665 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1666 else
1667 last = get_later_stmt (stmt, last);
1668 }
1669
1670 return last;
1671}
1672
1673/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1674
1675static void
1676vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
1677 stmt_vector_for_cost *prologue_cost_vec,
1678 stmt_vector_for_cost *body_cost_vec,
1679 unsigned ncopies_for_cost,
1680 scalar_stmts_set_t* visited)
1681{
1682 unsigned i, j;
1683 slp_tree child;
1684 gimple *stmt;
1685 stmt_vec_info stmt_info;
1686 tree lhs;
1687
1688 /* If we already costed the exact same set of scalar stmts we're done.
1689 We share the generated vector stmts for those. */
1690 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1691 return;
1692
1693 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1694
1695 /* Recurse down the SLP tree. */
1696 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1697 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1698 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1699 body_cost_vec, ncopies_for_cost, visited);
1700
1701 /* Look at the first scalar stmt to determine the cost. */
1702 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1703 stmt_info = vinfo_for_stmt (stmt);
1704 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1705 {
1706 vect_memory_access_type memory_access_type
1707 = (STMT_VINFO_STRIDED_P (stmt_info)
1708 ? VMAT_STRIDED_SLP
1709 : VMAT_CONTIGUOUS);
1710 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1711 vect_model_store_cost (stmt_info, ncopies_for_cost,
1712 memory_access_type, vect_uninitialized_def,
1713 node, prologue_cost_vec, body_cost_vec);
1714 else
1715 {
1716 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1717 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1718 {
1719 /* If the load is permuted then the alignment is determined by
1720 the first group element not by the first scalar stmt DR. */
1721 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1722 stmt_info = vinfo_for_stmt (stmt);
1723 /* Record the cost for the permutation. */
1724 unsigned n_perms;
1725 vect_transform_slp_perm_load (node, vNULL, NULL,
1726 ncopies_for_cost, instance, true,
1727 &n_perms);
1728 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1729 stmt_info, 0, vect_body);
1730 unsigned nunits
1731 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1732 /* And adjust the number of loads performed. This handles
1733 redundancies as well as loads that are later dead. */
1734 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1735 bitmap_clear (perm);
1736 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1737 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1738 ncopies_for_cost = 0;
1739 bool load_seen = false;
1740 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1741 {
1742 if (i % nunits == 0)
1743 {
1744 if (load_seen)
1745 ncopies_for_cost++;
1746 load_seen = false;
1747 }
1748 if (bitmap_bit_p (perm, i))
1749 load_seen = true;
1750 }
1751 if (load_seen)
1752 ncopies_for_cost++;
1753 gcc_assert (ncopies_for_cost
1754 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1755 + nunits - 1) / nunits);
1756 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1757 }
1758 /* Record the cost for the vector loads. */
1759 vect_model_load_cost (stmt_info, ncopies_for_cost,
1760 memory_access_type, node, prologue_cost_vec,
1761 body_cost_vec);
1762 return;
1763 }
1764 }
1765 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1766 {
1767 /* ncopies_for_cost is the number of IVs we generate. */
1768 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1769 stmt_info, 0, vect_body);
1770
1771 /* Prologue cost for the initial values and step vector. */
1772 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1773 CONSTANT_CLASS_P
1774 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1775 (stmt_info))
1776 ? vector_load : vec_construct,
1777 stmt_info, 0, vect_prologue);
1778 record_stmt_cost (prologue_cost_vec, 1,
1779 CONSTANT_CLASS_P
1780 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1781 ? vector_load : vec_construct,
1782 stmt_info, 0, vect_prologue);
1783
1784 /* ??? No easy way to get at the actual number of vector stmts
1785 to be geneated and thus the derived IVs. */
1786 }
1787 else
1788 {
1789 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1790 stmt_info, 0, vect_body);
1791 if (SLP_TREE_TWO_OPERATORS (node))
1792 {
1793 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1794 stmt_info, 0, vect_body);
1795 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1796 stmt_info, 0, vect_body);
1797 }
1798 }
1799
1800 /* Push SLP node def-type to stmts. */
1801 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1802 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1803 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1804 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1805
1806 /* Scan operands and account for prologue cost of constants/externals.
1807 ??? This over-estimates cost for multiple uses and should be
1808 re-engineered. */
1809 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1810 lhs = gimple_get_lhs (stmt);
1811 for (i = 0; i < gimple_num_ops (stmt); ++i)
1812 {
1813 tree op = gimple_op (stmt, i);
1814 gimple *def_stmt;
1815 enum vect_def_type dt;
1816 if (!op || op == lhs)
1817 continue;
1818 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1819 {
1820 /* Without looking at the actual initializer a vector of
1821 constants can be implemented as load from the constant pool.
1822 ??? We need to pass down stmt_info for a vector type
1823 even if it points to the wrong stmt. */
1824 if (dt == vect_constant_def)
1825 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1826 stmt_info, 0, vect_prologue);
1827 else if (dt == vect_external_def)
1828 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1829 stmt_info, 0, vect_prologue);
1830 }
1831 }
1832
1833 /* Restore stmt def-types. */
1834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1835 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1838}
1839
1840/* Compute the cost for the SLP instance INSTANCE. */
1841
1842static void
1843vect_analyze_slp_cost (slp_instance instance, void *data)
1844{
1845 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1846 unsigned ncopies_for_cost;
1847 stmt_info_for_cost *si;
1848 unsigned i;
1849
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_NOTE, vect_location,
1852 "=== vect_analyze_slp_cost ===\n");
1853
1854 /* Calculate the number of vector stmts to create based on the unrolling
1855 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1856 GROUP_SIZE / NUNITS otherwise. */
1857 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1858 slp_tree node = SLP_INSTANCE_TREE (instance);
1859 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1860 /* Adjust the group_size by the vectorization factor which is always one
1861 for basic-block vectorization. */
1862 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1863 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1864 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1865 /* For reductions look at a reduction operand in case the reduction
1866 operation is widening like DOT_PROD or SAD. */
1867 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1868 {
1869 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1870 switch (gimple_assign_rhs_code (stmt))
1871 {
1872 case DOT_PROD_EXPR:
1873 case SAD_EXPR:
1874 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1875 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1876 break;
1877 default:;
1878 }
1879 }
1880 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1881
1882 prologue_cost_vec.create (10);
1883 body_cost_vec.create (10);
1884 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1885 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1886 &prologue_cost_vec, &body_cost_vec,
1887 ncopies_for_cost, visited);
1888 delete visited;
1889
1890 /* Record the prologue costs, which were delayed until we were
1891 sure that SLP was successful. */
1892 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1893 {
1894 struct _stmt_vec_info *stmt_info
1895 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1896 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1897 si->misalign, vect_prologue);
1898 }
1899
1900 /* Record the instance's instructions in the target cost model. */
1901 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1902 {
1903 struct _stmt_vec_info *stmt_info
1904 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1905 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1906 si->misalign, vect_body);
1907 }
1908
1909 prologue_cost_vec.release ();
1910 body_cost_vec.release ();
1911}
1912
1913/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1914 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1915 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1916 containing the remainder.
1917 Return the first stmt in the second group. */
1918
1919static gimple *
1920vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1921{
1922 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1923 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1924 gcc_assert (group1_size > 0);
1925 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1926 gcc_assert (group2_size > 0);
1927 GROUP_SIZE (first_vinfo) = group1_size;
1928
1929 gimple *stmt = first_stmt;
1930 for (unsigned i = group1_size; i > 1; i--)
1931 {
1932 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1933 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1934 }
1935 /* STMT is now the last element of the first group. */
1936 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1937 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1938
1939 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1940 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1941 {
1942 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1943 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1944 }
1945
1946 /* For the second group, the GROUP_GAP is that before the original group,
1947 plus skipping over the first vector. */
1948 GROUP_GAP (vinfo_for_stmt (group2)) =
1949 GROUP_GAP (first_vinfo) + group1_size;
1950
1951 /* GROUP_GAP of the first group now has to skip over the second group too. */
1952 GROUP_GAP (first_vinfo) += group2_size;
1953
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1956 group1_size, group2_size);
1957
1958 return group2;
1959}
1960
1961/* Analyze an SLP instance starting from a group of grouped stores. Call
1962 vect_build_slp_tree to build a tree of packed stmts if possible.
1963 Return FALSE if it's impossible to SLP any stmt in the loop. */
1964
1965static bool
1966vect_analyze_slp_instance (vec_info *vinfo,
1967 gimple *stmt, unsigned max_tree_size)
1968{
1969 slp_instance new_instance;
1970 slp_tree node;
1971 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1972 unsigned int unrolling_factor = 1, nunits;
1973 tree vectype, scalar_type = NULL_TREE;
1974 gimple *next;
1975 unsigned int i;
1976 unsigned int max_nunits = 0;
1977 vec<slp_tree> loads;
1978 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1979 vec<gimple *> scalar_stmts;
1980
1981 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1982 {
1983 if (dr)
1984 {
1985 scalar_type = TREE_TYPE (DR_REF (dr));
1986 vectype = get_vectype_for_scalar_type (scalar_type);
1987 }
1988 else
1989 {
1990 gcc_assert (is_a <loop_vec_info> (vinfo));
1991 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1992 }
1993
1994 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1995 }
1996 else
1997 {
1998 gcc_assert (is_a <loop_vec_info> (vinfo));
1999 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2000 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2001 }
2002
2003 if (!vectype)
2004 {
2005 if (dump_enabled_p ())
2006 {
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2008 "Build SLP failed: unsupported data-type ");
2009 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
2010 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2011 }
2012
2013 return false;
2014 }
2015 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2016
2017 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2018 scalar_stmts.create (group_size);
2019 next = stmt;
2020 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2021 {
2022 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2023 while (next)
2024 {
2025 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2026 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2027 scalar_stmts.safe_push (
2028 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2029 else
2030 scalar_stmts.safe_push (next);
2031 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2032 }
2033 /* Mark the first element of the reduction chain as reduction to properly
2034 transform the node. In the reduction analysis phase only the last
2035 element of the chain is marked as reduction. */
2036 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2037 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2038 }
2039 else
2040 {
2041 /* Collect reduction statements. */
2042 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2043 for (i = 0; reductions.iterate (i, &next); i++)
2044 scalar_stmts.safe_push (next);
2045 }
2046
2047 loads.create (group_size);
2048
2049 /* Build the tree for the SLP instance. */
2050 bool *matches = XALLOCAVEC (bool, group_size);
2051 unsigned npermutes = 0;
2052 bst_fail = new scalar_stmts_set_t ();
2053 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2054 &max_nunits, &loads, matches, &npermutes,
2055 NULL, max_tree_size);
2056 delete bst_fail;
2057 if (node != NULL)
2058 {
2059 /* Calculate the unrolling factor based on the smallest type. */
2060 unrolling_factor
2061 = least_common_multiple (max_nunits, group_size) / group_size;
2062
2063 if (unrolling_factor != 1
2064 && is_a <bb_vec_info> (vinfo))
2065 {
2066
2067 if (max_nunits > group_size)
2068 {
2069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2070 "Build SLP failed: store group "
2071 "size not a multiple of the vector size "
2072 "in basic block SLP\n");
2073 vect_free_slp_tree (node);
2074 loads.release ();
2075 return false;
2076 }
2077 /* Fatal mismatch. */
2078 matches[group_size/max_nunits * max_nunits] = false;
2079 vect_free_slp_tree (node);
2080 loads.release ();
2081 }
2082 else
2083 {
2084 /* Create a new SLP instance. */
2085 new_instance = XNEW (struct _slp_instance);
2086 SLP_INSTANCE_TREE (new_instance) = node;
2087 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2088 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2089 SLP_INSTANCE_LOADS (new_instance) = loads;
2090
2091 /* Compute the load permutation. */
2092 slp_tree load_node;
2093 bool loads_permuted = false;
2094 FOR_EACH_VEC_ELT (loads, i, load_node)
2095 {
2096 vec<unsigned> load_permutation;
2097 int j;
2098 gimple *load, *first_stmt;
2099 bool this_load_permuted = false;
2100 load_permutation.create (group_size);
2101 first_stmt = GROUP_FIRST_ELEMENT
2102 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2103 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2104 {
2105 int load_place = vect_get_place_in_interleaving_chain
2106 (load, first_stmt);
2107 gcc_assert (load_place != -1);
2108 if (load_place != j)
2109 this_load_permuted = true;
2110 load_permutation.safe_push (load_place);
2111 }
2112 if (!this_load_permuted
2113 /* The load requires permutation when unrolling exposes
2114 a gap either because the group is larger than the SLP
2115 group-size or because there is a gap between the groups. */
2116 && (unrolling_factor == 1
2117 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2118 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
2119 {
2120 load_permutation.release ();
2121 continue;
2122 }
2123 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2124 loads_permuted = true;
2125 }
2126
2127 if (loads_permuted)
2128 {
2129 if (!vect_supported_load_permutation_p (new_instance))
2130 {
2131 if (dump_enabled_p ())
2132 {
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2134 "Build SLP failed: unsupported load "
2135 "permutation ");
2136 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2137 TDF_SLIM, stmt, 0);
2138 }
2139 vect_free_slp_instance (new_instance);
2140 return false;
2141 }
2142 }
2143
2144 /* If the loads and stores can be handled with load/store-lan
2145 instructions do not generate this SLP instance. */
2146 if (is_a <loop_vec_info> (vinfo)
2147 && loads_permuted
2148 && dr && vect_store_lanes_supported (vectype, group_size))
2149 {
2150 slp_tree load_node;
2151 FOR_EACH_VEC_ELT (loads, i, load_node)
2152 {
2153 gimple *first_stmt = GROUP_FIRST_ELEMENT
2154 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2155 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
2156 /* Use SLP for strided accesses (or if we
2157 can't load-lanes). */
2158 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2159 || ! vect_load_lanes_supported
2160 (STMT_VINFO_VECTYPE (stmt_vinfo),
2161 GROUP_SIZE (stmt_vinfo)))
2162 break;
2163 }
2164 if (i == loads.length ())
2165 {
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "Built SLP cancelled: can use "
2169 "load/store-lanes\n");
2170 vect_free_slp_instance (new_instance);
2171 return false;
2172 }
2173 }
2174
2175 vinfo->slp_instances.safe_push (new_instance);
2176
2177 if (dump_enabled_p ())
2178 {
2179 dump_printf_loc (MSG_NOTE, vect_location,
2180 "Final SLP tree for instance:\n");
2181 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2182 }
2183
2184 return true;
2185 }
2186 }
2187 else
2188 {
2189 /* Failed to SLP. */
2190 /* Free the allocated memory. */
2191 scalar_stmts.release ();
2192 loads.release ();
2193 }
2194
2195 /* For basic block SLP, try to break the group up into multiples of the
2196 vector size. */
2197 if (is_a <bb_vec_info> (vinfo)
2198 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2199 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2200 {
2201 /* We consider breaking the group only on VF boundaries from the existing
2202 start. */
2203 for (i = 0; i < group_size; i++)
2204 if (!matches[i]) break;
2205
2206 if (i >= nunits && i < group_size)
2207 {
2208 /* Split into two groups at the first vector boundary before i. */
2209 gcc_assert ((nunits & (nunits - 1)) == 0);
2210 unsigned group1_size = i & ~(nunits - 1);
2211
2212 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2213 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2214 /* If the first non-match was in the middle of a vector,
2215 skip the rest of that vector. */
2216 if (group1_size < i)
2217 {
2218 i = group1_size + nunits;
2219 if (i < group_size)
2220 rest = vect_split_slp_store_group (rest, nunits);
2221 }
2222 if (i < group_size)
2223 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2224 return res;
2225 }
2226 /* Even though the first vector did not all match, we might be able to SLP
2227 (some) of the remainder. FORNOW ignore this possibility. */
2228 }
2229
2230 return false;
2231}
2232
2233
2234/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2235 trees of packed scalar stmts if SLP is possible. */
2236
2237bool
2238vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2239{
2240 unsigned int i;
2241 gimple *first_element;
2242
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2245
2246 /* Find SLP sequences starting from groups of grouped stores. */
2247 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2248 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2249
2250 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2251 {
2252 if (loop_vinfo->reduction_chains.length () > 0)
2253 {
2254 /* Find SLP sequences starting from reduction chains. */
2255 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2256 if (! vect_analyze_slp_instance (vinfo, first_element,
2257 max_tree_size))
2258 {
2259 /* Dissolve reduction chain group. */
2260 gimple *next, *stmt = first_element;
2261 while (stmt)
2262 {
2263 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2264 next = GROUP_NEXT_ELEMENT (vinfo);
2265 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2266 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2267 stmt = next;
2268 }
2269 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2270 = vect_internal_def;
2271 }
2272 }
2273
2274 /* Find SLP sequences starting from groups of reductions. */
2275 if (loop_vinfo->reductions.length () > 1)
2276 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2277 max_tree_size);
2278 }
2279
2280 return true;
2281}
2282
2283
2284/* For each possible SLP instance decide whether to SLP it and calculate overall
2285 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2286 least one instance. */
2287
2288bool
2289vect_make_slp_decision (loop_vec_info loop_vinfo)
2290{
2291 unsigned int i, unrolling_factor = 1;
2292 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2293 slp_instance instance;
2294 int decided_to_slp = 0;
2295
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2298 "\n");
2299
2300 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2301 {
2302 /* FORNOW: SLP if you can. */
2303 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2304 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2305
2306 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2307 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2308 loop-based vectorization. Such stmts will be marked as HYBRID. */
2309 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2310 decided_to_slp++;
2311 }
2312
2313 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2314
2315 if (decided_to_slp && dump_enabled_p ())
2316 dump_printf_loc (MSG_NOTE, vect_location,
2317 "Decided to SLP %d instances. Unrolling factor %d\n",
2318 decided_to_slp, unrolling_factor);
2319
2320 return (decided_to_slp > 0);
2321}
2322
2323
2324/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2325 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2326
2327static void
2328vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2329{
2330 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
2331 imm_use_iterator imm_iter;
2332 gimple *use_stmt;
2333 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
2334 slp_tree child;
2335 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2337 int j;
2338
2339 /* Propagate hybrid down the SLP tree. */
2340 if (stype == hybrid)
2341 ;
2342 else if (HYBRID_SLP_STMT (stmt_vinfo))
2343 stype = hybrid;
2344 else
2345 {
2346 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2347 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2348 /* If we get a pattern stmt here we have to use the LHS of the
2349 original stmt for immediate uses. */
2350 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2351 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2352 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2353 tree def;
2354 if (gimple_code (stmt) == GIMPLE_PHI)
2355 def = gimple_phi_result (stmt);
2356 else
2357 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2358 if (def)
2359 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2360 {
2361 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2362 continue;
2363 use_vinfo = vinfo_for_stmt (use_stmt);
2364 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2365 && STMT_VINFO_RELATED_STMT (use_vinfo))
2366 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2367 if (!STMT_SLP_TYPE (use_vinfo)
2368 && (STMT_VINFO_RELEVANT (use_vinfo)
2369 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2370 && !(gimple_code (use_stmt) == GIMPLE_PHI
2371 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2372 {
2373 if (dump_enabled_p ())
2374 {
2375 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2376 "def in non-SLP stmt: ");
2377 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2378 }
2379 stype = hybrid;
2380 }
2381 }
2382 }
2383
2384 if (stype == hybrid
2385 && !HYBRID_SLP_STMT (stmt_vinfo))
2386 {
2387 if (dump_enabled_p ())
2388 {
2389 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2390 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2391 }
2392 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2393 }
2394
2395 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2396 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2397 vect_detect_hybrid_slp_stmts (child, i, stype);
2398}
2399
2400/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2401
2402static tree
2403vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2404{
2405 walk_stmt_info *wi = (walk_stmt_info *)data;
2406 struct loop *loopp = (struct loop *)wi->info;
2407
2408 if (wi->is_lhs)
2409 return NULL_TREE;
2410
2411 if (TREE_CODE (*tp) == SSA_NAME
2412 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2413 {
2414 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
2415 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2416 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
2417 {
2418 if (dump_enabled_p ())
2419 {
2420 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2421 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2422 }
2423 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2424 }
2425 }
2426
2427 return NULL_TREE;
2428}
2429
2430static tree
2431vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2432 walk_stmt_info *)
2433{
2434 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
2435 /* If the stmt is in a SLP instance then this isn't a reason
2436 to mark use definitions in other SLP instances as hybrid. */
2437 if (! STMT_SLP_TYPE (use_vinfo)
2438 && (STMT_VINFO_RELEVANT (use_vinfo)
2439 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2440 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2441 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2442 ;
2443 else
2444 *handled = true;
2445 return NULL_TREE;
2446}
2447
2448/* Find stmts that must be both vectorized and SLPed. */
2449
2450void
2451vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2452{
2453 unsigned int i;
2454 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2455 slp_instance instance;
2456
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2459 "\n");
2460
2461 /* First walk all pattern stmt in the loop and mark defs of uses as
2462 hybrid because immediate uses in them are not recorded. */
2463 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2464 {
2465 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2466 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2467 gsi_next (&gsi))
2468 {
2469 gimple *stmt = gsi_stmt (gsi);
2470 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2471 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2472 {
2473 walk_stmt_info wi;
2474 memset (&wi, 0, sizeof (wi));
2475 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2476 gimple_stmt_iterator gsi2
2477 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2478 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2479 vect_detect_hybrid_slp_1, &wi);
2480 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2481 vect_detect_hybrid_slp_2,
2482 vect_detect_hybrid_slp_1, &wi);
2483 }
2484 }
2485 }
2486
2487 /* Then walk the SLP instance trees marking stmts with uses in
2488 non-SLP stmts as hybrid, also propagating hybrid down the
2489 SLP tree, collecting the above info on-the-fly. */
2490 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2491 {
2492 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2493 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2494 i, pure_slp);
2495 }
2496}
2497
2498
2499/* Initialize a bb_vec_info struct for the statements between
2500 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2501
2502_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2503 gimple_stmt_iterator region_end_in)
2504 : vec_info (vec_info::bb, init_cost (NULL)),
2505 bb (gsi_bb (region_begin_in)),
2506 region_begin (region_begin_in),
2507 region_end (region_end_in)
2508{
2509 gimple_stmt_iterator gsi;
2510
2511 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2512 gsi_next (&gsi))
2513 {
2514 gimple *stmt = gsi_stmt (gsi);
2515 gimple_set_uid (stmt, 0);
2516 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
2517 }
2518
2519 bb->aux = this;
2520}
2521
2522
2523/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2524 stmts in the basic block. */
2525
2526_bb_vec_info::~_bb_vec_info ()
2527{
2528 for (gimple_stmt_iterator si = region_begin;
2529 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2530 {
2531 gimple *stmt = gsi_stmt (si);
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2533
2534 if (stmt_info)
2535 /* Free stmt_vec_info. */
2536 free_stmt_vec_info (stmt);
2537
2538 /* Reset region marker. */
2539 gimple_set_uid (stmt, -1);
2540 }
2541
2542 bb->aux = NULL;
2543}
2544
2545
2546/* Analyze statements contained in SLP tree NODE after recursively analyzing
2547 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2548
2549 Return true if the operations are supported. */
2550
2551static bool
2552vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2553 slp_instance node_instance)
2554{
2555 bool dummy;
2556 int i, j;
2557 gimple *stmt;
2558 slp_tree child;
2559
2560 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2561 return true;
2562
2563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2564 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2565 return false;
2566
2567 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2569 gcc_assert (stmt_info);
2570 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2571
2572 /* For BB vectorization vector types are assigned here.
2573 Memory accesses already got their vector type assigned
2574 in vect_analyze_data_refs. */
2575 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2576 if (bb_vinfo
2577 && ! STMT_VINFO_DATA_REF (stmt_info))
2578 {
2579 gcc_assert (PURE_SLP_STMT (stmt_info));
2580
2581 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2582 if (dump_enabled_p ())
2583 {
2584 dump_printf_loc (MSG_NOTE, vect_location,
2585 "get vectype for scalar type: ");
2586 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2587 dump_printf (MSG_NOTE, "\n");
2588 }
2589
2590 tree vectype = get_vectype_for_scalar_type (scalar_type);
2591 if (!vectype)
2592 {
2593 if (dump_enabled_p ())
2594 {
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2596 "not SLPed: unsupported data-type ");
2597 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2598 scalar_type);
2599 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2600 }
2601 return false;
2602 }
2603
2604 if (dump_enabled_p ())
2605 {
2606 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2607 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2608 dump_printf (MSG_NOTE, "\n");
2609 }
2610
2611 gimple *sstmt;
2612 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2613 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2614 }
2615
2616 /* Calculate the number of vector statements to be created for the
2617 scalar stmts in this node. For SLP reductions it is equal to the
2618 number of vector statements in the children (which has already been
2619 calculated by the recursive call). Otherwise it is the number of
2620 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2621 VF divided by the number of elements in a vector. */
2622 if (GROUP_FIRST_ELEMENT (stmt_info)
2623 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2624 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2625 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2626 else
2627 {
2628 int vf;
2629 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2630 vf = loop_vinfo->vectorization_factor;
2631 else
2632 vf = 1;
2633 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2634 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2635 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2636 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype);
2637 }
2638
2639 /* Push SLP node def-type to stmt operands. */
2640 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2641 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2642 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2643 = SLP_TREE_DEF_TYPE (child);
2644 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
2645 /* Restore def-types. */
2646 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2647 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2648 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2649 = vect_internal_def;
2650 if (! res)
2651 return false;
2652
2653 return true;
2654}
2655
2656
2657/* Analyze statements in SLP instances of VINFO. Return true if the
2658 operations are supported. */
2659
2660bool
2661vect_slp_analyze_operations (vec_info *vinfo)
2662{
2663 slp_instance instance;
2664 int i;
2665
2666 if (dump_enabled_p ())
2667 dump_printf_loc (MSG_NOTE, vect_location,
2668 "=== vect_slp_analyze_operations ===\n");
2669
2670 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2671 {
2672 if (!vect_slp_analyze_node_operations (vinfo,
2673 SLP_INSTANCE_TREE (instance),
2674 instance))
2675 {
2676 dump_printf_loc (MSG_NOTE, vect_location,
2677 "removing SLP instance operations starting from: ");
2678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2679 SLP_TREE_SCALAR_STMTS
2680 (SLP_INSTANCE_TREE (instance))[0], 0);
2681 vect_free_slp_instance (instance);
2682 vinfo->slp_instances.ordered_remove (i);
2683 }
2684 else
2685 {
2686 /* Compute the costs of the SLP instance. */
2687 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
2688 i++;
2689 }
2690 }
2691
2692 return !vinfo->slp_instances.is_empty ();
2693}
2694
2695
2696/* Compute the scalar cost of the SLP node NODE and its children
2697 and return it. Do not account defs that are marked in LIFE and
2698 update LIFE according to uses of NODE. */
2699
2700static unsigned
2701vect_bb_slp_scalar_cost (basic_block bb,
2702 slp_tree node, vec<bool, va_heap> *life)
2703{
2704 unsigned scalar_cost = 0;
2705 unsigned i;
2706 gimple *stmt;
2707 slp_tree child;
2708
2709 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2710 {
2711 unsigned stmt_cost;
2712 ssa_op_iter op_iter;
2713 def_operand_p def_p;
2714 stmt_vec_info stmt_info;
2715
2716 if ((*life)[i])
2717 continue;
2718
2719 /* If there is a non-vectorized use of the defs then the scalar
2720 stmt is kept live in which case we do not account it or any
2721 required defs in the SLP children in the scalar cost. This
2722 way we make the vectorization more costly when compared to
2723 the scalar cost. */
2724 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2725 {
2726 imm_use_iterator use_iter;
2727 gimple *use_stmt;
2728 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2729 if (!is_gimple_debug (use_stmt)
2730 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2731 use_stmt)
2732 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2733 {
2734 (*life)[i] = true;
2735 BREAK_FROM_IMM_USE_STMT (use_iter);
2736 }
2737 }
2738 if ((*life)[i])
2739 continue;
2740
2741 /* Count scalar stmts only once. */
2742 if (gimple_visited_p (stmt))
2743 continue;
2744 gimple_set_visited (stmt, true);
2745
2746 stmt_info = vinfo_for_stmt (stmt);
2747 if (STMT_VINFO_DATA_REF (stmt_info))
2748 {
2749 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2750 stmt_cost = vect_get_stmt_cost (scalar_load);
2751 else
2752 stmt_cost = vect_get_stmt_cost (scalar_store);
2753 }
2754 else
2755 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2756
2757 scalar_cost += stmt_cost;
2758 }
2759
2760 auto_vec<bool, 20> subtree_life;
2761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2762 {
2763 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2764 {
2765 /* Do not directly pass LIFE to the recursive call, copy it to
2766 confine changes in the callee to the current child/subtree. */
2767 subtree_life.safe_splice (*life);
2768 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2769 subtree_life.truncate (0);
2770 }
2771 }
2772
2773 return scalar_cost;
2774}
2775
2776/* Check if vectorization of the basic block is profitable. */
2777
2778static bool
2779vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2780{
2781 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2782 slp_instance instance;
2783 int i;
2784 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2785 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2786
2787 /* Calculate scalar cost. */
2788 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2789 {
2790 auto_vec<bool, 20> life;
2791 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2792 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2793 SLP_INSTANCE_TREE (instance),
2794 &life);
2795 }
2796
2797 /* Unset visited flag. */
2798 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2799 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2800 gimple_set_visited (gsi_stmt (gsi), false);
2801
2802 /* Complete the target-specific cost calculation. */
2803 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2804 &vec_inside_cost, &vec_epilogue_cost);
2805
2806 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
2807
2808 if (dump_enabled_p ())
2809 {
2810 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2811 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2812 vec_inside_cost);
2813 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2814 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2815 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
2816 }
2817
2818 /* Vectorization is profitable if its cost is more than the cost of scalar
2819 version. Note that we err on the vector side for equal cost because
2820 the cost estimate is otherwise quite pessimistic (constant uses are
2821 free on the scalar side but cost a load on the vector side for
2822 example). */
2823 if (vec_outside_cost + vec_inside_cost > scalar_cost)
2824 return false;
2825
2826 return true;
2827}
2828
2829/* Check if the basic block can be vectorized. Returns a bb_vec_info
2830 if so and sets fatal to true if failure is independent of
2831 current_vector_size. */
2832
2833static bb_vec_info
2834vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2835 gimple_stmt_iterator region_end,
2836 vec<data_reference_p> datarefs, int n_stmts,
2837 bool &fatal)
2838{
2839 bb_vec_info bb_vinfo;
2840 slp_instance instance;
2841 int i;
2842 int min_vf = 2;
2843
2844 /* The first group of checks is independent of the vector size. */
2845 fatal = true;
2846
2847 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2848 {
2849 if (dump_enabled_p ())
2850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2851 "not vectorized: too many instructions in "
2852 "basic block.\n");
2853 free_data_refs (datarefs);
2854 return NULL;
2855 }
2856
2857 bb_vinfo = new _bb_vec_info (region_begin, region_end);
2858 if (!bb_vinfo)
2859 return NULL;
2860
2861 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2862
2863 /* Analyze the data references. */
2864
2865 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2866 {
2867 if (dump_enabled_p ())
2868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2869 "not vectorized: unhandled data-ref in basic "
2870 "block.\n");
2871
2872 delete bb_vinfo;
2873 return NULL;
2874 }
2875
2876 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
2877 {
2878 if (dump_enabled_p ())
2879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2880 "not vectorized: not enough data-refs in "
2881 "basic block.\n");
2882
2883 delete bb_vinfo;
2884 return NULL;
2885 }
2886
2887 if (!vect_analyze_data_ref_accesses (bb_vinfo))
2888 {
2889 if (dump_enabled_p ())
2890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2891 "not vectorized: unhandled data access in "
2892 "basic block.\n");
2893
2894 delete bb_vinfo;
2895 return NULL;
2896 }
2897
2898 /* If there are no grouped stores in the region there is no need
2899 to continue with pattern recog as vect_analyze_slp will fail
2900 anyway. */
2901 if (bb_vinfo->grouped_stores.is_empty ())
2902 {
2903 if (dump_enabled_p ())
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2905 "not vectorized: no grouped stores in "
2906 "basic block.\n");
2907
2908 delete bb_vinfo;
2909 return NULL;
2910 }
2911
2912 /* While the rest of the analysis below depends on it in some way. */
2913 fatal = false;
2914
2915 vect_pattern_recog (bb_vinfo);
2916
2917 /* Check the SLP opportunities in the basic block, analyze and build SLP
2918 trees. */
2919 if (!vect_analyze_slp (bb_vinfo, n_stmts))
2920 {
2921 if (dump_enabled_p ())
2922 {
2923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2924 "Failed to SLP the basic block.\n");
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926 "not vectorized: failed to find SLP opportunities "
2927 "in basic block.\n");
2928 }
2929
2930 delete bb_vinfo;
2931 return NULL;
2932 }
2933
2934 vect_record_base_alignments (bb_vinfo);
2935
2936 /* Analyze and verify the alignment of data references and the
2937 dependence in the SLP instances. */
2938 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2939 {
2940 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2941 || ! vect_slp_analyze_instance_dependence (instance))
2942 {
2943 dump_printf_loc (MSG_NOTE, vect_location,
2944 "removing SLP instance operations starting from: ");
2945 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2946 SLP_TREE_SCALAR_STMTS
2947 (SLP_INSTANCE_TREE (instance))[0], 0);
2948 vect_free_slp_instance (instance);
2949 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2950 continue;
2951 }
2952
2953 /* Mark all the statements that we want to vectorize as pure SLP and
2954 relevant. */
2955 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2956 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2957
2958 i++;
2959 }
2960 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2961 {
2962 delete bb_vinfo;
2963 return NULL;
2964 }
2965
2966 if (!vect_slp_analyze_operations (bb_vinfo))
2967 {
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "not vectorized: bad operation in basic block.\n");
2971
2972 delete bb_vinfo;
2973 return NULL;
2974 }
2975
2976 /* Cost model: check if the vectorization is worthwhile. */
2977 if (!unlimited_cost_model (NULL)
2978 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2979 {
2980 if (dump_enabled_p ())
2981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2982 "not vectorized: vectorization is not "
2983 "profitable.\n");
2984
2985 delete bb_vinfo;
2986 return NULL;
2987 }
2988
2989 if (dump_enabled_p ())
2990 dump_printf_loc (MSG_NOTE, vect_location,
2991 "Basic block will be vectorized using SLP\n");
2992
2993 return bb_vinfo;
2994}
2995
2996
2997/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2998 true if anything in the basic-block was vectorized. */
2999
3000bool
3001vect_slp_bb (basic_block bb)
3002{
3003 bb_vec_info bb_vinfo;
3004 gimple_stmt_iterator gsi;
3005 unsigned int vector_sizes;
3006 bool any_vectorized = false;
3007
3008 if (dump_enabled_p ())
3009 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
3010
3011 /* Autodetect first vector size we try. */
3012 current_vector_size = 0;
3013 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3014
3015 gsi = gsi_start_bb (bb);
3016
3017 while (1)
3018 {
3019 if (gsi_end_p (gsi))
3020 break;
3021
3022 gimple_stmt_iterator region_begin = gsi;
3023 vec<data_reference_p> datarefs = vNULL;
3024 int insns = 0;
3025
3026 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3027 {
3028 gimple *stmt = gsi_stmt (gsi);
3029 if (is_gimple_debug (stmt))
3030 continue;
3031 insns++;
3032
3033 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3034 vect_location = gimple_location (stmt);
3035
3036 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3037 break;
3038 }
3039
3040 /* Skip leading unhandled stmts. */
3041 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3042 {
3043 gsi_next (&gsi);
3044 continue;
3045 }
3046
3047 gimple_stmt_iterator region_end = gsi;
3048
3049 bool vectorized = false;
3050 bool fatal = false;
3051 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3052 datarefs, insns, fatal);
3053 if (bb_vinfo
3054 && dbg_cnt (vect_slp))
3055 {
3056 if (dump_enabled_p ())
3057 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3058
3059 vect_schedule_slp (bb_vinfo);
3060
3061 if (dump_enabled_p ())
3062 dump_printf_loc (MSG_NOTE, vect_location,
3063 "basic block part vectorized\n");
3064
3065 vectorized = true;
3066 }
3067 delete bb_vinfo;
3068
3069 any_vectorized |= vectorized;
3070
3071 vector_sizes &= ~current_vector_size;
3072 if (vectorized
3073 || vector_sizes == 0
3074 || current_vector_size == 0
3075 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3076 vector sizes will fail do not bother iterating. */
3077 || fatal)
3078 {
3079 if (gsi_end_p (region_end))
3080 break;
3081
3082 /* Skip the unhandled stmt. */
3083 gsi_next (&gsi);
3084
3085 /* And reset vector sizes. */
3086 current_vector_size = 0;
3087 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3088 }
3089 else
3090 {
3091 /* Try the next biggest vector size. */
3092 current_vector_size = 1 << floor_log2 (vector_sizes);
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_NOTE, vect_location,
3095 "***** Re-trying analysis with "
3096 "vector size %d\n", current_vector_size);
3097
3098 /* Start over. */
3099 gsi = region_begin;
3100 }
3101 }
3102
3103 return any_vectorized;
3104}
3105
3106
3107/* Return 1 if vector type of boolean constant which is OPNUM
3108 operand in statement STMT is a boolean vector. */
3109
3110static bool
3111vect_mask_constant_operand_p (gimple *stmt, int opnum)
3112{
3113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3114 enum tree_code code = gimple_expr_code (stmt);
3115 tree op, vectype;
3116 gimple *def_stmt;
3117 enum vect_def_type dt;
3118
3119 /* For comparison and COND_EXPR type is chosen depending
3120 on the other comparison operand. */
3121 if (TREE_CODE_CLASS (code) == tcc_comparison)
3122 {
3123 if (opnum)
3124 op = gimple_assign_rhs1 (stmt);
3125 else
3126 op = gimple_assign_rhs2 (stmt);
3127
3128 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3129 &dt, &vectype))
3130 gcc_unreachable ();
3131
3132 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3133 }
3134
3135 if (code == COND_EXPR)
3136 {
3137 tree cond = gimple_assign_rhs1 (stmt);
3138
3139 if (TREE_CODE (cond) == SSA_NAME)
3140 op = cond;
3141 else if (opnum)
3142 op = TREE_OPERAND (cond, 1);
3143 else
3144 op = TREE_OPERAND (cond, 0);
3145
3146 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3147 &dt, &vectype))
3148 gcc_unreachable ();
3149
3150 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3151 }
3152
3153 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3154}
3155
3156
3157/* For constant and loop invariant defs of SLP_NODE this function returns
3158 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3159 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3160 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3161 REDUC_INDEX is the index of the reduction operand in the statements, unless
3162 it is -1. */
3163
3164static void
3165vect_get_constant_vectors (tree op, slp_tree slp_node,
3166 vec<tree> *vec_oprnds,
3167 unsigned int op_num, unsigned int number_of_vectors)
3168{
3169 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3170 gimple *stmt = stmts[0];
3171 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3172 unsigned nunits;
3173 tree vec_cst;
3174 unsigned j, number_of_places_left_in_vector;
3175 tree vector_type;
3176 tree vop;
3177 int group_size = stmts.length ();
3178 unsigned int vec_num, i;
3179 unsigned number_of_copies = 1;
3180 vec<tree> voprnds;
3181 voprnds.create (number_of_vectors);
3182 bool constant_p, is_store;
3183 tree neutral_op = NULL;
3184 enum tree_code code = gimple_expr_code (stmt);
3185 gimple_seq ctor_seq = NULL;
3186
3187 /* Check if vector type is a boolean vector. */
3188 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3189 && vect_mask_constant_operand_p (stmt, op_num))
3190 vector_type
3191 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3192 else
3193 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3194 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3195
3196 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3197 {
3198 is_store = true;
3199 op = gimple_assign_rhs1 (stmt);
3200 }
3201 else
3202 is_store = false;
3203
3204 gcc_assert (op);
3205
3206 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3207 created vectors. It is greater than 1 if unrolling is performed.
3208
3209 For example, we have two scalar operands, s1 and s2 (e.g., group of
3210 strided accesses of size two), while NUNITS is four (i.e., four scalars
3211 of this type can be packed in a vector). The output vector will contain
3212 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3213 will be 2).
3214
3215 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3216 containing the operands.
3217
3218 For example, NUNITS is four as before, and the group size is 8
3219 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3220 {s5, s6, s7, s8}. */
3221
3222 number_of_copies = nunits * number_of_vectors / group_size;
3223
3224 number_of_places_left_in_vector = nunits;
3225 constant_p = true;
3226 tree_vector_builder elts (vector_type, nunits, 1);
3227 elts.quick_grow (nunits);
3228 bool place_after_defs = false;
3229 for (j = 0; j < number_of_copies; j++)
3230 {
3231 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
3232 {
3233 if (is_store)
3234 op = gimple_assign_rhs1 (stmt);
3235 else
3236 {
3237 switch (code)
3238 {
3239 case COND_EXPR:
3240 {
3241 tree cond = gimple_assign_rhs1 (stmt);
3242 if (TREE_CODE (cond) == SSA_NAME)
3243 op = gimple_op (stmt, op_num + 1);
3244 else if (op_num == 0 || op_num == 1)
3245 op = TREE_OPERAND (cond, op_num);
3246 else
3247 {
3248 if (op_num == 2)
3249 op = gimple_assign_rhs2 (stmt);
3250 else
3251 op = gimple_assign_rhs3 (stmt);
3252 }
3253 }
3254 break;
3255
3256 case CALL_EXPR:
3257 op = gimple_call_arg (stmt, op_num);
3258 break;
3259
3260 case LSHIFT_EXPR:
3261 case RSHIFT_EXPR:
3262 case LROTATE_EXPR:
3263 case RROTATE_EXPR:
3264 op = gimple_op (stmt, op_num + 1);
3265 /* Unlike the other binary operators, shifts/rotates have
3266 the shift count being int, instead of the same type as
3267 the lhs, so make sure the scalar is the right type if
3268 we are dealing with vectors of
3269 long long/long/short/char. */
3270 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
3271 op = fold_convert (TREE_TYPE (vector_type), op);
3272 break;
3273
3274 default:
3275 op = gimple_op (stmt, op_num + 1);
3276 break;
3277 }
3278 }
3279
3280 /* Create 'vect_ = {op0,op1,...,opn}'. */
3281 number_of_places_left_in_vector--;
3282 tree orig_op = op;
3283 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3284 {
3285 if (CONSTANT_CLASS_P (op))
3286 {
3287 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3288 {
3289 /* Can't use VIEW_CONVERT_EXPR for booleans because
3290 of possibly different sizes of scalar value and
3291 vector element. */
3292 if (integer_zerop (op))
3293 op = build_int_cst (TREE_TYPE (vector_type), 0);
3294 else if (integer_onep (op))
3295 op = build_all_ones_cst (TREE_TYPE (vector_type));
3296 else
3297 gcc_unreachable ();
3298 }
3299 else
3300 op = fold_unary (VIEW_CONVERT_EXPR,
3301 TREE_TYPE (vector_type), op);
3302 gcc_assert (op && CONSTANT_CLASS_P (op));
3303 }
3304 else
3305 {
3306 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3307 gimple *init_stmt;
3308 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3309 {
3310 tree true_val
3311 = build_all_ones_cst (TREE_TYPE (vector_type));
3312 tree false_val
3313 = build_zero_cst (TREE_TYPE (vector_type));
3314 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3315 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3316 op, true_val,
3317 false_val);
3318 }
3319 else
3320 {
3321 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3322 op);
3323 init_stmt
3324 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3325 op);
3326 }
3327 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3328 op = new_temp;
3329 }
3330 }
3331 elts[number_of_places_left_in_vector] = op;
3332 if (!CONSTANT_CLASS_P (op))
3333 constant_p = false;
3334 if (TREE_CODE (orig_op) == SSA_NAME
3335 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3336 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3337 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3338 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3339 place_after_defs = true;
3340
3341 if (number_of_places_left_in_vector == 0)
3342 {
3343 if (constant_p)
3344 vec_cst = elts.build ();
3345 else
3346 {
3347 vec<constructor_elt, va_gc> *v;
3348 unsigned k;
3349 vec_alloc (v, nunits);
3350 for (k = 0; k < nunits; ++k)
3351 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3352 vec_cst = build_constructor (vector_type, v);
3353 }
3354 tree init;
3355 gimple_stmt_iterator gsi;
3356 if (place_after_defs)
3357 {
3358 gsi = gsi_for_stmt
3359 (vect_find_last_scalar_stmt_in_slp (slp_node));
3360 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3361 }
3362 else
3363 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
3364 if (ctor_seq != NULL)
3365 {
3366 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3367 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3368 GSI_SAME_STMT);
3369 ctor_seq = NULL;
3370 }
3371 voprnds.quick_push (init);
3372 place_after_defs = false;
3373 number_of_places_left_in_vector = nunits;
3374 constant_p = true;
3375 elts.new_vector (vector_type, nunits, 1);
3376 elts.quick_grow (nunits);
3377 }
3378 }
3379 }
3380
3381 /* Since the vectors are created in the reverse order, we should invert
3382 them. */
3383 vec_num = voprnds.length ();
3384 for (j = vec_num; j != 0; j--)
3385 {
3386 vop = voprnds[j - 1];
3387 vec_oprnds->quick_push (vop);
3388 }
3389
3390 voprnds.release ();
3391
3392 /* In case that VF is greater than the unrolling factor needed for the SLP
3393 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3394 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3395 to replicate the vectors. */
3396 while (number_of_vectors > vec_oprnds->length ())
3397 {
3398 tree neutral_vec = NULL;
3399
3400 if (neutral_op)
3401 {
3402 if (!neutral_vec)
3403 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3404
3405 vec_oprnds->quick_push (neutral_vec);
3406 }
3407 else
3408 {
3409 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3410 vec_oprnds->quick_push (vop);
3411 }
3412 }
3413}
3414
3415
3416/* Get vectorized definitions from SLP_NODE that contains corresponding
3417 vectorized def-stmts. */
3418
3419static void
3420vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3421{
3422 tree vec_oprnd;
3423 gimple *vec_def_stmt;
3424 unsigned int i;
3425
3426 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3427
3428 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
3429 {
3430 gcc_assert (vec_def_stmt);
3431 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3432 vec_oprnd = gimple_phi_result (vec_def_stmt);
3433 else
3434 vec_oprnd = gimple_get_lhs (vec_def_stmt);
3435 vec_oprnds->quick_push (vec_oprnd);
3436 }
3437}
3438
3439
3440/* Get vectorized definitions for SLP_NODE.
3441 If the scalar definitions are loop invariants or constants, collect them and
3442 call vect_get_constant_vectors() to create vector stmts.
3443 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3444 must be stored in the corresponding child of SLP_NODE, and we call
3445 vect_get_slp_vect_defs () to retrieve them. */
3446
3447void
3448vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3449 vec<vec<tree> > *vec_oprnds)
3450{
3451 gimple *first_stmt;
3452 int number_of_vects = 0, i;
3453 unsigned int child_index = 0;
3454 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3455 slp_tree child = NULL;
3456 vec<tree> vec_defs;
3457 tree oprnd;
3458 bool vectorized_defs;
3459
3460 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3461 FOR_EACH_VEC_ELT (ops, i, oprnd)
3462 {
3463 /* For each operand we check if it has vectorized definitions in a child
3464 node or we need to create them (for invariants and constants). We
3465 check if the LHS of the first stmt of the next child matches OPRND.
3466 If it does, we found the correct child. Otherwise, we call
3467 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3468 to check this child node for the next operand. */
3469 vectorized_defs = false;
3470 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
3471 {
3472 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3473
3474 /* We have to check both pattern and original def, if available. */
3475 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3476 {
3477 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3478 gimple *related
3479 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3480 tree first_def_op;
3481
3482 if (gimple_code (first_def) == GIMPLE_PHI)
3483 first_def_op = gimple_phi_result (first_def);
3484 else
3485 first_def_op = gimple_get_lhs (first_def);
3486 if (operand_equal_p (oprnd, first_def_op, 0)
3487 || (related
3488 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3489 {
3490 /* The number of vector defs is determined by the number of
3491 vector statements in the node from which we get those
3492 statements. */
3493 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3494 vectorized_defs = true;
3495 child_index++;
3496 }
3497 }
3498 else
3499 child_index++;
3500 }
3501
3502 if (!vectorized_defs)
3503 {
3504 if (i == 0)
3505 {
3506 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3507 /* Number of vector stmts was calculated according to LHS in
3508 vect_schedule_slp_instance (), fix it by replacing LHS with
3509 RHS, if necessary. See vect_get_smallest_scalar_type () for
3510 details. */
3511 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3512 &rhs_size_unit);
3513 if (rhs_size_unit != lhs_size_unit)
3514 {
3515 number_of_vects *= rhs_size_unit;
3516 number_of_vects /= lhs_size_unit;
3517 }
3518 }
3519 }
3520
3521 /* Allocate memory for vectorized defs. */
3522 vec_defs = vNULL;
3523 vec_defs.create (number_of_vects);
3524
3525 /* For reduction defs we call vect_get_constant_vectors (), since we are
3526 looking for initial loop invariant values. */
3527 if (vectorized_defs)
3528 /* The defs are already vectorized. */
3529 vect_get_slp_vect_defs (child, &vec_defs);
3530 else
3531 /* Build vectors from scalar defs. */
3532 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
3533 number_of_vects);
3534
3535 vec_oprnds->quick_push (vec_defs);
3536 }
3537}
3538
3539/* Generate vector permute statements from a list of loads in DR_CHAIN.
3540 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3541 permute statements for the SLP node NODE of the SLP instance
3542 SLP_NODE_INSTANCE. */
3543
3544bool
3545vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3546 gimple_stmt_iterator *gsi, int vf,
3547 slp_instance slp_node_instance, bool analyze_only,
3548 unsigned *n_perms)
3549{
3550 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3551 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3552 tree mask_element_type = NULL_TREE, mask_type;
3553 int nunits, vec_index = 0;
3554 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3555 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3556 int mask_element;
3557 machine_mode mode;
3558
3559 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3560 return false;
3561
3562 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3563
3564 mode = TYPE_MODE (vectype);
3565
3566 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3567 same size as the vector element being permuted. */
3568 mask_element_type = lang_hooks.types.type_for_mode
3569 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3570 mask_type = get_vectype_for_scalar_type (mask_element_type);
3571 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3572 auto_vec_perm_indices mask (nunits);
3573 mask.quick_grow (nunits);
3574
3575 /* Initialize the vect stmts of NODE to properly insert the generated
3576 stmts later. */
3577 if (! analyze_only)
3578 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3579 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3580 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3581
3582 /* Generate permutation masks for every NODE. Number of masks for each NODE
3583 is equal to GROUP_SIZE.
3584 E.g., we have a group of three nodes with three loads from the same
3585 location in each node, and the vector size is 4. I.e., we have a
3586 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3587 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3588 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3589 ...
3590
3591 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3592 The last mask is illegal since we assume two operands for permute
3593 operation, and the mask element values can't be outside that range.
3594 Hence, the last mask must be converted into {2,5,5,5}.
3595 For the first two permutations we need the first and the second input
3596 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3597 we need the second and the third vectors: {b1,c1,a2,b2} and
3598 {c2,a3,b3,c3}. */
3599
3600 int vect_stmts_counter = 0;
3601 int index = 0;
3602 int first_vec_index = -1;
3603 int second_vec_index = -1;
3604 bool noop_p = true;
3605 *n_perms = 0;
3606
3607 for (int j = 0; j < vf; j++)
3608 {
3609 for (int k = 0; k < group_size; k++)
3610 {
3611 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3612 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3613 vec_index = i / nunits;
3614 mask_element = i % nunits;
3615 if (vec_index == first_vec_index
3616 || first_vec_index == -1)
3617 {
3618 first_vec_index = vec_index;
3619 }
3620 else if (vec_index == second_vec_index
3621 || second_vec_index == -1)
3622 {
3623 second_vec_index = vec_index;
3624 mask_element += nunits;
3625 }
3626 else
3627 {
3628 if (dump_enabled_p ())
3629 {
3630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3631 "permutation requires at "
3632 "least three vectors ");
3633 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3634 stmt, 0);
3635 }
3636 gcc_assert (analyze_only);
3637 return false;
3638 }
3639
3640 gcc_assert (mask_element >= 0
3641 && mask_element < 2 * nunits);
3642 if (mask_element != index)
3643 noop_p = false;
3644 mask[index++] = mask_element;
3645
3646 if (index == nunits)
3647 {
3648 if (! noop_p
3649 && ! can_vec_perm_p (mode, false, &mask))
3650 {
3651 if (dump_enabled_p ())
3652 {
3653 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3654 vect_location,
3655 "unsupported vect permute { ");
3656 for (i = 0; i < nunits; ++i)
3657 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3658 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3659 }
3660 gcc_assert (analyze_only);
3661 return false;
3662 }
3663
3664 if (! noop_p)
3665 ++*n_perms;
3666
3667 if (!analyze_only)
3668 {
3669 tree mask_vec = NULL_TREE;
3670
3671 if (! noop_p)
3672 {
3673 tree_vector_builder mask_elts (mask_type, nunits, 1);
3674 for (int l = 0; l < nunits; ++l)
3675 mask_elts.quick_push (build_int_cst (mask_element_type,
3676 mask[l]));
3677 mask_vec = mask_elts.build ();
3678 }
3679
3680 if (second_vec_index == -1)
3681 second_vec_index = first_vec_index;
3682
3683 /* Generate the permute statement if necessary. */
3684 tree first_vec = dr_chain[first_vec_index];
3685 tree second_vec = dr_chain[second_vec_index];
3686 gimple *perm_stmt;
3687 if (! noop_p)
3688 {
3689 tree perm_dest
3690 = vect_create_destination_var (gimple_assign_lhs (stmt),
3691 vectype);
3692 perm_dest = make_ssa_name (perm_dest);
3693 perm_stmt = gimple_build_assign (perm_dest,
3694 VEC_PERM_EXPR,
3695 first_vec, second_vec,
3696 mask_vec);
3697 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3698 }
3699 else
3700 /* If mask was NULL_TREE generate the requested
3701 identity transform. */
3702 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3703
3704 /* Store the vector statement in NODE. */
3705 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
3706 }
3707
3708 index = 0;
3709 first_vec_index = -1;
3710 second_vec_index = -1;
3711 noop_p = true;
3712 }
3713 }
3714 }
3715
3716 return true;
3717}
3718
3719typedef hash_map <vec <gimple *>, slp_tree,
3720 simple_hashmap_traits <bst_traits, slp_tree> >
3721 scalar_stmts_to_slp_tree_map_t;
3722
3723/* Vectorize SLP instance tree in postorder. */
3724
3725static bool
3726vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3727 scalar_stmts_to_slp_tree_map_t *bst_map)
3728{
3729 gimple *stmt;
3730 bool grouped_store, is_store;
3731 gimple_stmt_iterator si;
3732 stmt_vec_info stmt_info;
3733 unsigned int group_size;
3734 tree vectype;
3735 int i, j;
3736 slp_tree child;
3737
3738 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3739 return false;
3740
3741 /* See if we have already vectorized the same set of stmts and reuse their
3742 vectorized stmts. */
3743 slp_tree &leader
3744 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3745 if (leader)
3746 {
3747 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3748 return false;
3749 }
3750
3751 leader = node;
3752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3753 vect_schedule_slp_instance (child, instance, bst_map);
3754
3755 /* Push SLP node def-type to stmts. */
3756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3757 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3758 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3759 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3760
3761 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
3762 stmt_info = vinfo_for_stmt (stmt);
3763
3764 /* VECTYPE is the type of the destination. */
3765 vectype = STMT_VINFO_VECTYPE (stmt_info);
3766 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3767
3768 if (!SLP_TREE_VEC_STMTS (node).exists ())
3769 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3770
3771 if (dump_enabled_p ())
3772 {
3773 dump_printf_loc (MSG_NOTE,vect_location,
3774 "------>vectorizing SLP node starting from: ");
3775 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3776 }
3777
3778 /* Vectorized stmts go before the last scalar stmt which is where
3779 all uses are ready. */
3780 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
3781
3782 /* Mark the first element of the reduction chain as reduction to properly
3783 transform the node. In the analysis phase only the last element of the
3784 chain is marked as reduction. */
3785 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
3786 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3787 {
3788 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3789 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3790 }
3791
3792 /* Handle two-operation SLP nodes by vectorizing the group with
3793 both operations and then performing a merge. */
3794 if (SLP_TREE_TWO_OPERATORS (node))
3795 {
3796 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3797 enum tree_code ocode = ERROR_MARK;
3798 gimple *ostmt;
3799 auto_vec_perm_indices mask (group_size);
3800 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3801 if (gimple_assign_rhs_code (ostmt) != code0)
3802 {
3803 mask.quick_push (1);
3804 ocode = gimple_assign_rhs_code (ostmt);
3805 }
3806 else
3807 mask.quick_push (0);
3808 if (ocode != ERROR_MARK)
3809 {
3810 vec<gimple *> v0;
3811 vec<gimple *> v1;
3812 unsigned j;
3813 tree tmask = NULL_TREE;
3814 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3815 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3816 SLP_TREE_VEC_STMTS (node).truncate (0);
3817 gimple_assign_set_rhs_code (stmt, ocode);
3818 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3819 gimple_assign_set_rhs_code (stmt, code0);
3820 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3821 SLP_TREE_VEC_STMTS (node).truncate (0);
3822 tree meltype = build_nonstandard_integer_type
3823 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3824 tree mvectype = get_same_sized_vectype (meltype, vectype);
3825 unsigned k = 0, l;
3826 for (j = 0; j < v0.length (); ++j)
3827 {
3828 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3829 tree_vector_builder melts (mvectype, nunits, 1);
3830 for (l = 0; l < nunits; ++l)
3831 {
3832 if (k >= group_size)
3833 k = 0;
3834 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3835 melts.quick_push (t);
3836 }
3837 tmask = melts.build ();
3838
3839 /* ??? Not all targets support a VEC_PERM_EXPR with a
3840 constant mask that would translate to a vec_merge RTX
3841 (with their vec_perm_const_ok). We can either not
3842 vectorize in that case or let veclower do its job.
3843 Unfortunately that isn't too great and at least for
3844 plus/minus we'd eventually like to match targets
3845 vector addsub instructions. */
3846 gimple *vstmt;
3847 vstmt = gimple_build_assign (make_ssa_name (vectype),
3848 VEC_PERM_EXPR,
3849 gimple_assign_lhs (v0[j]),
3850 gimple_assign_lhs (v1[j]), tmask);
3851 vect_finish_stmt_generation (stmt, vstmt, &si);
3852 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3853 }
3854 v0.release ();
3855 v1.release ();
3856 return false;
3857 }
3858 }
3859 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3860
3861 /* Restore stmt def-types. */
3862 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3863 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3864 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3865 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3866
3867 return is_store;
3868}
3869
3870/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3871 For loop vectorization this is done in vectorizable_call, but for SLP
3872 it needs to be deferred until end of vect_schedule_slp, because multiple
3873 SLP instances may refer to the same scalar stmt. */
3874
3875static void
3876vect_remove_slp_scalar_calls (slp_tree node)
3877{
3878 gimple *stmt, *new_stmt;
3879 gimple_stmt_iterator gsi;
3880 int i;
3881 slp_tree child;
3882 tree lhs;
3883 stmt_vec_info stmt_info;
3884
3885 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3886 return;
3887
3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3889 vect_remove_slp_scalar_calls (child);
3890
3891 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
3892 {
3893 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3894 continue;
3895 stmt_info = vinfo_for_stmt (stmt);
3896 if (stmt_info == NULL
3897 || is_pattern_stmt_p (stmt_info)
3898 || !PURE_SLP_STMT (stmt_info))
3899 continue;
3900 lhs = gimple_call_lhs (stmt);
3901 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3902 set_vinfo_for_stmt (new_stmt, stmt_info);
3903 set_vinfo_for_stmt (stmt, NULL);
3904 STMT_VINFO_STMT (stmt_info) = new_stmt;
3905 gsi = gsi_for_stmt (stmt);
3906 gsi_replace (&gsi, new_stmt, false);
3907 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3908 }
3909}
3910
3911/* Generate vector code for all SLP instances in the loop/basic block. */
3912
3913bool
3914vect_schedule_slp (vec_info *vinfo)
3915{
3916 vec<slp_instance> slp_instances;
3917 slp_instance instance;
3918 unsigned int i;
3919 bool is_store = false;
3920
3921 slp_instances = vinfo->slp_instances;
3922 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3923 {
3924 /* Schedule the tree of INSTANCE. */
3925 scalar_stmts_to_slp_tree_map_t *bst_map
3926 = new scalar_stmts_to_slp_tree_map_t ();
3927 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3928 instance, bst_map);
3929 delete bst_map;
3930 if (dump_enabled_p ())
3931 dump_printf_loc (MSG_NOTE, vect_location,
3932 "vectorizing stmts using SLP.\n");
3933 }
3934
3935 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3936 {
3937 slp_tree root = SLP_INSTANCE_TREE (instance);
3938 gimple *store;
3939 unsigned int j;
3940 gimple_stmt_iterator gsi;
3941
3942 /* Remove scalar call stmts. Do not do this for basic-block
3943 vectorization as not all uses may be vectorized.
3944 ??? Why should this be necessary? DCE should be able to
3945 remove the stmts itself.
3946 ??? For BB vectorization we can as well remove scalar
3947 stmts starting from the SLP tree root if they have no
3948 uses. */
3949 if (is_a <loop_vec_info> (vinfo))
3950 vect_remove_slp_scalar_calls (root);
3951
3952 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
3953 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3954 {
3955 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3956 break;
3957
3958 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3959 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
3960 /* Free the attached stmt_vec_info and remove the stmt. */
3961 gsi = gsi_for_stmt (store);
3962 unlink_stmt_vdef (store);
3963 gsi_remove (&gsi, true);
3964 release_defs (store);
3965 free_stmt_vec_info (store);
3966 }
3967 }
3968
3969 return is_store;
3970}
3971