1/* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "gimple-iterator.h"
29#include "gimple-fold.h"
30#include "ssa.h"
31#include "expmed.h"
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
35#include "fold-const.h"
36#include "stor-layout.h"
37#include "tree-eh.h"
38#include "gimplify.h"
39#include "gimple-iterator.h"
40#include "gimple-fold.h"
41#include "gimplify-me.h"
42#include "cfgloop.h"
43#include "tree-vectorizer.h"
44#include "dumpfile.h"
45#include "builtins.h"
46#include "internal-fn.h"
47#include "case-cfn-macros.h"
48#include "fold-const-call.h"
49#include "attribs.h"
50#include "cgraph.h"
51#include "omp-simd-clone.h"
52#include "predict.h"
53#include "tree-vector-builder.h"
54#include "vec-perm-indices.h"
55#include "gimple-range.h"
56
57
58/* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
61
62
63/* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
65
66bool
67vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
68{
69 value_range vr;
70 tree vr_min, vr_max;
71 get_range_query (cfun)->range_of_expr (r&: vr, expr: var);
72 if (vr.undefined_p ())
73 vr.set_varying (TREE_TYPE (var));
74 value_range_kind vr_type = get_legacy_range (vr, min&: vr_min, max&: vr_max);
75 *min_value = wi::to_wide (t: vr_min);
76 *max_value = wi::to_wide (t: vr_max);
77 wide_int nonzero = get_nonzero_bits (var);
78 signop sgn = TYPE_SIGN (TREE_TYPE (var));
79 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
80 nonzero, sgn) == VR_RANGE)
81 {
82 if (dump_enabled_p ())
83 {
84 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
85 dump_printf (MSG_NOTE, " has range [");
86 dump_hex (MSG_NOTE, *min_value);
87 dump_printf (MSG_NOTE, ", ");
88 dump_hex (MSG_NOTE, *max_value);
89 dump_printf (MSG_NOTE, "]\n");
90 }
91 return true;
92 }
93 else
94 {
95 if (dump_enabled_p ())
96 {
97 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
98 dump_printf (MSG_NOTE, " has no range info\n");
99 }
100 return false;
101 }
102}
103
104/* Report that we've found an instance of pattern PATTERN in
105 statement STMT. */
106
107static void
108vect_pattern_detected (const char *name, gimple *stmt)
109{
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
112}
113
114/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
117
118static stmt_vec_info
119vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
120 stmt_vec_info orig_stmt_info, tree vectype)
121{
122 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
123 if (pattern_stmt_info == NULL)
124 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
125 gimple_set_bb (pattern_stmt, gimple_bb (g: orig_stmt_info->stmt));
126
127 pattern_stmt_info->pattern_stmt_p = true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
131 STMT_VINFO_TYPE (pattern_stmt_info) = STMT_VINFO_TYPE (orig_stmt_info);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
133 {
134 gcc_assert (!vectype
135 || is_a <gcond *> (pattern_stmt)
136 || (VECTOR_BOOLEAN_TYPE_P (vectype)
137 == vect_use_mask_type_p (orig_stmt_info)));
138 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
139 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
140 }
141 return pattern_stmt_info;
142}
143
144/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
146 have one already. */
147
148static void
149vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
150 stmt_vec_info orig_stmt_info, tree vectype)
151{
152 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
153 STMT_VINFO_RELATED_STMT (orig_stmt_info)
154 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
155}
156
157/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 be different from the vector type of the final pattern statement.
160 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 from which it was derived. */
162
163static inline void
164append_pattern_def_seq (vec_info *vinfo,
165 stmt_vec_info stmt_info, gimple *new_stmt,
166 tree vectype = NULL_TREE,
167 tree scalar_type_for_mask = NULL_TREE)
168{
169 gcc_assert (!scalar_type_for_mask
170 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
171 if (vectype)
172 {
173 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
174 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
175 if (scalar_type_for_mask)
176 new_stmt_info->mask_precision
177 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
178 }
179 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
180 new_stmt);
181}
182
183/* The caller wants to perform new operations on vect_external variable
184 VAR, so that the result of the operations would also be vect_external.
185 Return the edge on which the operations can be performed, if one exists.
186 Return null if the operations should instead be treated as part of
187 the pattern that needs them. */
188
189static edge
190vect_get_external_def_edge (vec_info *vinfo, tree var)
191{
192 edge e = NULL;
193 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
194 {
195 e = loop_preheader_edge (loop_vinfo->loop);
196 if (!SSA_NAME_IS_DEFAULT_DEF (var))
197 {
198 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
199 if (bb == NULL
200 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
201 e = NULL;
202 }
203 }
204 return e;
205}
206
207/* Return true if the target supports a vector version of CODE,
208 where CODE is known to map to a direct optab with the given SUBTYPE.
209 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 specifies the type of the scalar result.
211
212 If CODE allows the inputs and outputs to have different type
213 (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 than the output mode that determines the appropriate target pattern.
215 Operand 0 of the target pattern then specifies the mode that the output
216 must have.
217
218 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
220 is nonnull. */
221
222static bool
223vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
224 tree itype, tree *vecotype_out,
225 tree *vecitype_out = NULL,
226 enum optab_subtype subtype = optab_default)
227{
228 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
229 if (!vecitype)
230 return false;
231
232 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
233 if (!vecotype)
234 return false;
235
236 optab optab = optab_for_tree_code (code, vecitype, subtype);
237 if (!optab)
238 return false;
239
240 insn_code icode = optab_handler (op: optab, TYPE_MODE (vecitype));
241 if (icode == CODE_FOR_nothing
242 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
243 return false;
244
245 *vecotype_out = vecotype;
246 if (vecitype_out)
247 *vecitype_out = vecitype;
248 return true;
249}
250
251/* Round bit precision PRECISION up to a full element. */
252
253static unsigned int
254vect_element_precision (unsigned int precision)
255{
256 precision = 1 << ceil_log2 (x: precision);
257 return MAX (precision, BITS_PER_UNIT);
258}
259
260/* If OP is defined by a statement that's being considered for vectorization,
261 return information about that statement, otherwise return NULL. */
262
263static stmt_vec_info
264vect_get_internal_def (vec_info *vinfo, tree op)
265{
266 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
267 if (def_stmt_info
268 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
269 return def_stmt_info;
270 return NULL;
271}
272
273/* Check whether NAME, an ssa-name used in STMT_VINFO,
274 is a result of a type promotion, such that:
275 DEF_STMT: NAME = NOP (name0)
276 If CHECK_SIGN is TRUE, check that either both types are signed or both are
277 unsigned. */
278
279static bool
280type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
281 tree *orig_type, gimple **def_stmt, bool *promotion)
282{
283 tree type = TREE_TYPE (name);
284 tree oprnd0;
285 enum vect_def_type dt;
286
287 stmt_vec_info def_stmt_info;
288 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
289 return false;
290
291 if (dt != vect_internal_def
292 && dt != vect_external_def && dt != vect_constant_def)
293 return false;
294
295 if (!*def_stmt)
296 return false;
297
298 if (!is_gimple_assign (gs: *def_stmt))
299 return false;
300
301 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
302 return false;
303
304 oprnd0 = gimple_assign_rhs1 (gs: *def_stmt);
305
306 *orig_type = TREE_TYPE (oprnd0);
307 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
308 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
309 return false;
310
311 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
312 *promotion = true;
313 else
314 *promotion = false;
315
316 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
317 return false;
318
319 return true;
320}
321
322/* Holds information about an input operand after some sign changes
323 and type promotions have been peeled away. */
324class vect_unpromoted_value {
325public:
326 vect_unpromoted_value ();
327
328 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
329
330 /* The value obtained after peeling away zero or more casts. */
331 tree op;
332
333 /* The type of OP. */
334 tree type;
335
336 /* The definition type of OP. */
337 vect_def_type dt;
338
339 /* If OP is the result of peeling at least one cast, and if the cast
340 of OP itself is a vectorizable statement, CASTER identifies that
341 statement, otherwise it is null. */
342 stmt_vec_info caster;
343};
344
345inline vect_unpromoted_value::vect_unpromoted_value ()
346 : op (NULL_TREE),
347 type (NULL_TREE),
348 dt (vect_uninitialized_def),
349 caster (NULL)
350{
351}
352
353/* Set the operand to OP_IN, its definition type to DT_IN, and the
354 statement that casts it to CASTER_IN. */
355
356inline void
357vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
358 stmt_vec_info caster_in)
359{
360 op = op_in;
361 type = TREE_TYPE (op);
362 dt = dt_in;
363 caster = caster_in;
364}
365
366/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 to reach some vectorizable inner operand OP', continuing as long as it
368 is possible to convert OP' back to OP using a possible sign change
369 followed by a possible promotion P. Return this OP', or null if OP is
370 not a vectorizable SSA name. If there is a promotion P, describe its
371 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 have more than one user.
374
375 A successful return means that it is possible to go from OP' to OP
376 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 whereas the cast from UNPROM to OP might be a promotion, a sign
378 change, or a nop.
379
380 E.g. say we have:
381
382 signed short *ptr = ...;
383 signed short C = *ptr;
384 unsigned short B = (unsigned short) C; // sign change
385 signed int A = (signed int) B; // unsigned promotion
386 ...possible other uses of A...
387 unsigned int OP = (unsigned int) A; // sign change
388
389 In this case it's possible to go directly from C to OP using:
390
391 OP = (unsigned int) (unsigned short) C;
392 +------------+ +--------------+
393 promotion sign change
394
395 so OP' would be C. The input to the promotion is B, so UNPROM
396 would describe B. */
397
398static tree
399vect_look_through_possible_promotion (vec_info *vinfo, tree op,
400 vect_unpromoted_value *unprom,
401 bool *single_use_p = NULL)
402{
403 tree op_type = TREE_TYPE (op);
404 if (!INTEGRAL_TYPE_P (op_type))
405 return NULL_TREE;
406
407 tree res = NULL_TREE;
408 unsigned int orig_precision = TYPE_PRECISION (op_type);
409 unsigned int min_precision = orig_precision;
410 stmt_vec_info caster = NULL;
411 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
412 {
413 /* See whether OP is simple enough to vectorize. */
414 stmt_vec_info def_stmt_info;
415 gimple *def_stmt;
416 vect_def_type dt;
417 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
418 break;
419
420 /* If OP is the input of a demotion, skip over it to see whether
421 OP is itself the result of a promotion. If so, the combined
422 effect of the promotion and the demotion might fit the required
423 pattern, otherwise neither operation fits.
424
425 This copes with cases such as the result of an arithmetic
426 operation being truncated before being stored, and where that
427 arithmetic operation has been recognized as an over-widened one. */
428 if (TYPE_PRECISION (op_type) <= min_precision)
429 {
430 /* Use OP as the UNPROM described above if we haven't yet
431 found a promotion, or if using the new input preserves the
432 sign of the previous promotion. */
433 if (!res
434 || TYPE_PRECISION (unprom->type) == orig_precision
435 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
436 {
437 unprom->set_op (op_in: op, dt_in: dt, caster_in: caster);
438 min_precision = TYPE_PRECISION (op_type);
439 }
440 /* Stop if we've already seen a promotion and if this
441 conversion does more than change the sign. */
442 else if (TYPE_PRECISION (op_type)
443 != TYPE_PRECISION (unprom->type))
444 break;
445
446 /* The sequence now extends to OP. */
447 res = op;
448 }
449
450 /* See whether OP is defined by a cast. Record it as CASTER if
451 the cast is potentially vectorizable. */
452 if (!def_stmt)
453 break;
454 caster = def_stmt_info;
455
456 /* Ignore pattern statements, since we don't link uses for them. */
457 if (caster
458 && single_use_p
459 && !STMT_VINFO_RELATED_STMT (caster)
460 && !has_single_use (var: res))
461 *single_use_p = false;
462
463 gassign *assign = dyn_cast <gassign *> (p: def_stmt);
464 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
465 break;
466
467 /* Continue with the input to the cast. */
468 op = gimple_assign_rhs1 (gs: def_stmt);
469 op_type = TREE_TYPE (op);
470 }
471 return res;
472}
473
474/* OP is an integer operand to an operation that returns TYPE, and we
475 want to treat the operation as a widening one. So far we can treat
476 it as widening from *COMMON_TYPE.
477
478 Return true if OP is suitable for such a widening operation,
479 either widening from *COMMON_TYPE or from some supertype of it.
480 Update *COMMON_TYPE to the supertype in the latter case.
481
482 SHIFT_P is true if OP is a shift amount. */
483
484static bool
485vect_joust_widened_integer (tree type, bool shift_p, tree op,
486 tree *common_type)
487{
488 /* Calculate the minimum precision required by OP, without changing
489 the sign of either operand. */
490 unsigned int precision;
491 if (shift_p)
492 {
493 if (!wi::leu_p (x: wi::to_widest (t: op), TYPE_PRECISION (type) / 2))
494 return false;
495 precision = TREE_INT_CST_LOW (op);
496 }
497 else
498 {
499 precision = wi::min_precision (x: wi::to_widest (t: op),
500 TYPE_SIGN (*common_type));
501 if (precision * 2 > TYPE_PRECISION (type))
502 return false;
503 }
504
505 /* If OP requires a wider type, switch to that type. The checks
506 above ensure that this is still narrower than the result. */
507 precision = vect_element_precision (precision);
508 if (TYPE_PRECISION (*common_type) < precision)
509 *common_type = build_nonstandard_integer_type
510 (precision, TYPE_UNSIGNED (*common_type));
511 return true;
512}
513
514/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
516
517static bool
518vect_joust_widened_type (tree type, tree new_type, tree *common_type)
519{
520 if (types_compatible_p (type1: *common_type, type2: new_type))
521 return true;
522
523 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
525 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
526 return true;
527
528 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
530 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
531 {
532 *common_type = new_type;
533 return true;
534 }
535
536 /* We have mismatched signs, with the signed type being
537 no wider than the unsigned type. In this case we need
538 a wider signed type. */
539 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
540 TYPE_PRECISION (new_type));
541 precision *= 2;
542
543 if (precision * 2 > TYPE_PRECISION (type))
544 return false;
545
546 *common_type = build_nonstandard_integer_type (precision, false);
547 return true;
548}
549
550/* Check whether STMT_INFO can be viewed as a tree of integer operations
551 in which each node either performs CODE or WIDENED_CODE, and where
552 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 specifies the maximum number of leaf operands. SHIFT_P says whether
554 CODE and WIDENED_CODE are some sort of shift.
555
556 If STMT_INFO is such a tree, return the number of leaf operands
557 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 to a type that (a) is narrower than the result of STMT_INFO and
559 (b) can hold all leaf operand values.
560
561 If SUBTYPE then allow that the signs of the operands
562 may differ in signs but not in precision. SUBTYPE is updated to reflect
563 this.
564
565 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
566 exists. */
567
568static unsigned int
569vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
570 code_helper widened_code, bool shift_p,
571 unsigned int max_nops,
572 vect_unpromoted_value *unprom, tree *common_type,
573 enum optab_subtype *subtype = NULL)
574{
575 /* Check for an integer operation with the right code. */
576 gimple* stmt = stmt_info->stmt;
577 if (!(is_gimple_assign (gs: stmt) || is_gimple_call (gs: stmt)))
578 return 0;
579
580 code_helper rhs_code;
581 if (is_gimple_assign (gs: stmt))
582 rhs_code = gimple_assign_rhs_code (gs: stmt);
583 else if (is_gimple_call (gs: stmt))
584 rhs_code = gimple_call_combined_fn (stmt);
585 else
586 return 0;
587
588 if (rhs_code != code
589 && rhs_code != widened_code)
590 return 0;
591
592 tree lhs = gimple_get_lhs (stmt);
593 tree type = TREE_TYPE (lhs);
594 if (!INTEGRAL_TYPE_P (type))
595 return 0;
596
597 /* Assume that both operands will be leaf operands. */
598 max_nops -= 2;
599
600 /* Check the operands. */
601 unsigned int next_op = 0;
602 for (unsigned int i = 0; i < 2; ++i)
603 {
604 vect_unpromoted_value *this_unprom = &unprom[next_op];
605 unsigned int nops = 1;
606 tree op = gimple_arg (gs: stmt, i);
607 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
608 {
609 /* We already have a common type from earlier operands.
610 Update it to account for OP. */
611 this_unprom->set_op (op_in: op, dt_in: vect_constant_def);
612 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
613 return 0;
614 }
615 else
616 {
617 /* Only allow shifts by constants. */
618 if (shift_p && i == 1)
619 return 0;
620
621 if (rhs_code != code)
622 {
623 /* If rhs_code is widened_code, don't look through further
624 possible promotions, there is a promotion already embedded
625 in the WIDEN_*_EXPR. */
626 if (TREE_CODE (op) != SSA_NAME
627 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
628 return 0;
629
630 stmt_vec_info def_stmt_info;
631 gimple *def_stmt;
632 vect_def_type dt;
633 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
634 &def_stmt))
635 return 0;
636 this_unprom->set_op (op_in: op, dt_in: dt, NULL);
637 }
638 else if (!vect_look_through_possible_promotion (vinfo, op,
639 unprom: this_unprom))
640 return 0;
641
642 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
643 {
644 /* The operand isn't widened. If STMT_INFO has the code
645 for an unwidened operation, recursively check whether
646 this operand is a node of the tree. */
647 if (rhs_code != code
648 || max_nops == 0
649 || this_unprom->dt != vect_internal_def)
650 return 0;
651
652 /* Give back the leaf slot allocated above now that we're
653 not treating this as a leaf operand. */
654 max_nops += 1;
655
656 /* Recursively process the definition of the operand. */
657 stmt_vec_info def_stmt_info
658 = vinfo->lookup_def (this_unprom->op);
659 nops = vect_widened_op_tree (vinfo, stmt_info: def_stmt_info, code,
660 widened_code, shift_p, max_nops,
661 unprom: this_unprom, common_type,
662 subtype);
663 if (nops == 0)
664 return 0;
665
666 max_nops -= nops;
667 }
668 else
669 {
670 /* Make sure that the operand is narrower than the result. */
671 if (TYPE_PRECISION (this_unprom->type) * 2
672 > TYPE_PRECISION (type))
673 return 0;
674
675 /* Update COMMON_TYPE for the new operand. */
676 if (i == 0)
677 *common_type = this_unprom->type;
678 else if (!vect_joust_widened_type (type, new_type: this_unprom->type,
679 common_type))
680 {
681 if (subtype)
682 {
683 /* See if we can sign extend the smaller type. */
684 if (TYPE_PRECISION (this_unprom->type)
685 > TYPE_PRECISION (*common_type))
686 *common_type = this_unprom->type;
687 *subtype = optab_vector_mixed_sign;
688 }
689 else
690 return 0;
691 }
692 }
693 }
694 next_op += nops;
695 }
696 return next_op;
697}
698
699/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
701
702static tree
703vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL)
704{
705 return make_temp_ssa_name (type, stmt, name: "patt");
706}
707
708/* STMT2_INFO describes a type conversion that could be split into STMT1
709 followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 input. Try to do this using pattern statements, returning true on
711 success. */
712
713static bool
714vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
715 gimple *stmt1, tree vectype)
716{
717 if (is_pattern_stmt_p (stmt_info: stmt2_info))
718 {
719 /* STMT2_INFO is part of a pattern. Get the statement to which
720 the pattern is attached. */
721 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
722 vect_init_pattern_stmt (vinfo, pattern_stmt: stmt1, orig_stmt_info: orig_stmt2_info, vectype);
723
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE, vect_location,
726 "Splitting pattern statement: %G", stmt2_info->stmt);
727
728 /* Since STMT2_INFO is a pattern statement, we can change it
729 in-situ without worrying about changing the code for the
730 containing block. */
731 gimple_assign_set_rhs1 (gs: stmt2_info->stmt, rhs: new_rhs);
732
733 if (dump_enabled_p ())
734 {
735 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
736 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
737 stmt2_info->stmt);
738 }
739
740 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
741 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
742 /* STMT2_INFO is the actual pattern statement. Add STMT1
743 to the end of the definition sequence. */
744 gimple_seq_add_stmt_without_update (def_seq, stmt1);
745 else
746 {
747 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
748 before it. */
749 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
750 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
751 }
752 return true;
753 }
754 else
755 {
756 /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 two-statement pattern now. */
758 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
759 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
760 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
761 if (!lhs_vectype)
762 return false;
763
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_NOTE, vect_location,
766 "Splitting statement: %G", stmt2_info->stmt);
767
768 /* Add STMT1 as a singleton pattern definition sequence. */
769 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
770 vect_init_pattern_stmt (vinfo, pattern_stmt: stmt1, orig_stmt_info: stmt2_info, vectype);
771 gimple_seq_add_stmt_without_update (def_seq, stmt1);
772
773 /* Build the second of the two pattern statements. */
774 tree new_lhs = vect_recog_temp_ssa_var (type: lhs_type, NULL);
775 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
776 vect_set_pattern_stmt (vinfo, pattern_stmt: new_stmt2, orig_stmt_info: stmt2_info, vectype: lhs_vectype);
777
778 if (dump_enabled_p ())
779 {
780 dump_printf_loc (MSG_NOTE, vect_location,
781 "into pattern statements: %G", stmt1);
782 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
783 (gimple *) new_stmt2);
784 }
785
786 return true;
787 }
788}
789
790/* Look for the following pattern
791 X = x[i]
792 Y = y[i]
793 DIFF = X - Y
794 DAD = ABS_EXPR<DIFF>
795
796 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 HALF_TYPE and UNPROM will be set should the statement be found to
798 be a widened operation.
799 DIFF_STMT will be set to the MINUS_EXPR
800 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
801 */
802static bool
803vect_recog_absolute_difference (vec_info *vinfo, gassign *abs_stmt,
804 tree *half_type,
805 vect_unpromoted_value unprom[2],
806 gassign **diff_stmt)
807{
808 if (!abs_stmt)
809 return false;
810
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code = gimple_assign_rhs_code (gs: abs_stmt);
814 if (code != ABS_EXPR && code != ABSU_EXPR)
815 return false;
816
817 tree abs_oprnd = gimple_assign_rhs1 (gs: abs_stmt);
818 tree abs_type = TREE_TYPE (abs_oprnd);
819 if (!abs_oprnd)
820 return false;
821 if (!ANY_INTEGRAL_TYPE_P (abs_type)
822 || TYPE_OVERFLOW_WRAPS (abs_type)
823 || TYPE_UNSIGNED (abs_type))
824 return false;
825
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff;
832 abs_oprnd = vect_look_through_possible_promotion (vinfo, op: abs_oprnd,
833 unprom: &unprom_diff);
834 if (!abs_oprnd)
835 return false;
836 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
837 && TYPE_UNSIGNED (unprom_diff.type))
838 return false;
839
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, op: abs_oprnd);
842 if (!diff_stmt_vinfo)
843 return false;
844
845 gassign *diff = dyn_cast <gassign *> (STMT_VINFO_STMT (diff_stmt_vinfo));
846 if (diff_stmt && diff
847 && gimple_assign_rhs_code (gs: diff) == MINUS_EXPR
848 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd)))
849 *diff_stmt = diff;
850
851 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
852 inside the loop (in case we are analyzing an outer-loop). */
853 if (vect_widened_op_tree (vinfo, stmt_info: diff_stmt_vinfo,
854 code: MINUS_EXPR, widened_code: IFN_VEC_WIDEN_MINUS,
855 shift_p: false, max_nops: 2, unprom, common_type: half_type))
856 return true;
857
858 return false;
859}
860
861/* Convert UNPROM to TYPE and return the result, adding new statements
862 to STMT_INFO's pattern definition statements if no better way is
863 available. VECTYPE is the vector form of TYPE.
864
865 If SUBTYPE then convert the type based on the subtype. */
866
867static tree
868vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
869 vect_unpromoted_value *unprom, tree vectype,
870 enum optab_subtype subtype = optab_default)
871{
872 /* Update the type if the signs differ. */
873 if (subtype == optab_vector_mixed_sign)
874 {
875 gcc_assert (!TYPE_UNSIGNED (type));
876 if (TYPE_UNSIGNED (TREE_TYPE (unprom->op)))
877 {
878 type = unsigned_type_for (type);
879 vectype = unsigned_type_for (vectype);
880 }
881 }
882
883 /* Check for a no-op conversion. */
884 if (types_compatible_p (type1: type, TREE_TYPE (unprom->op)))
885 return unprom->op;
886
887 /* Allow the caller to create constant vect_unpromoted_values. */
888 if (TREE_CODE (unprom->op) == INTEGER_CST)
889 return wide_int_to_tree (type, cst: wi::to_widest (t: unprom->op));
890
891 tree input = unprom->op;
892 if (unprom->caster)
893 {
894 tree lhs = gimple_get_lhs (unprom->caster->stmt);
895 tree lhs_type = TREE_TYPE (lhs);
896
897 /* If the result of the existing cast is the right width, use it
898 instead of the source of the cast. */
899 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
900 input = lhs;
901 /* If the precision we want is between the source and result
902 precisions of the existing cast, try splitting the cast into
903 two and tapping into a mid-way point. */
904 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
905 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
906 {
907 /* In order to preserve the semantics of the original cast,
908 give the mid-way point the same signedness as the input value.
909
910 It would be possible to use a signed type here instead if
911 TYPE is signed and UNPROM->TYPE is unsigned, but that would
912 make the sign of the midtype sensitive to the order in
913 which we process the statements, since the signedness of
914 TYPE is the signedness required by just one of possibly
915 many users. Also, unsigned promotions are usually as cheap
916 as or cheaper than signed ones, so it's better to keep an
917 unsigned promotion. */
918 tree midtype = build_nonstandard_integer_type
919 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
920 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
921 if (vec_midtype)
922 {
923 input = vect_recog_temp_ssa_var (type: midtype, NULL);
924 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
925 unprom->op);
926 if (!vect_split_statement (vinfo, stmt2_info: unprom->caster, new_rhs: input, stmt1: new_stmt,
927 vectype: vec_midtype))
928 append_pattern_def_seq (vinfo, stmt_info,
929 new_stmt, vectype: vec_midtype);
930 }
931 }
932
933 /* See if we can reuse an existing result. */
934 if (types_compatible_p (type1: type, TREE_TYPE (input)))
935 return input;
936 }
937
938 /* We need a new conversion statement. */
939 tree new_op = vect_recog_temp_ssa_var (type, NULL);
940 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
941
942 /* If OP is an external value, see if we can insert the new statement
943 on an incoming edge. */
944 if (input == unprom->op && unprom->dt == vect_external_def)
945 if (edge e = vect_get_external_def_edge (vinfo, var: input))
946 {
947 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
948 gcc_assert (!new_bb);
949 return new_op;
950 }
951
952 /* As a (common) last resort, add the statement to the pattern itself. */
953 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
954 return new_op;
955}
956
957/* Invoke vect_convert_input for N elements of UNPROM and store the
958 result in the corresponding elements of RESULT.
959
960 If SUBTYPE then convert the type based on the subtype. */
961
962static void
963vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
964 tree *result, tree type, vect_unpromoted_value *unprom,
965 tree vectype, enum optab_subtype subtype = optab_default)
966{
967 for (unsigned int i = 0; i < n; ++i)
968 {
969 unsigned int j;
970 for (j = 0; j < i; ++j)
971 if (unprom[j].op == unprom[i].op)
972 break;
973
974 if (j < i)
975 result[i] = result[j];
976 else
977 result[i] = vect_convert_input (vinfo, stmt_info,
978 type, unprom: &unprom[i], vectype, subtype);
979 }
980}
981
982/* The caller has created a (possibly empty) sequence of pattern definition
983 statements followed by a single statement PATTERN_STMT. Cast the result
984 of this final statement to TYPE. If a new statement is needed, add
985 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
986 and return the new statement, otherwise return PATTERN_STMT as-is.
987 VECITYPE is the vector form of PATTERN_STMT's result type. */
988
989static gimple *
990vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
991 gimple *pattern_stmt, tree vecitype)
992{
993 tree lhs = gimple_get_lhs (pattern_stmt);
994 if (!types_compatible_p (type1: type, TREE_TYPE (lhs)))
995 {
996 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype: vecitype);
997 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
998 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
999 }
1000 return pattern_stmt;
1001}
1002
1003/* Return true if STMT_VINFO describes a reduction for which reassociation
1004 is allowed. If STMT_INFO is part of a group, assume that it's part of
1005 a reduction chain and optimistically assume that all statements
1006 except the last allow reassociation.
1007 Also require it to have code CODE and to be a reduction
1008 in the outermost loop. When returning true, store the operands in
1009 *OP0_OUT and *OP1_OUT. */
1010
1011static bool
1012vect_reassociating_reduction_p (vec_info *vinfo,
1013 stmt_vec_info stmt_info, tree_code code,
1014 tree *op0_out, tree *op1_out)
1015{
1016 loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
1017 if (!loop_info)
1018 return false;
1019
1020 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
1021 if (!assign || gimple_assign_rhs_code (gs: assign) != code)
1022 return false;
1023
1024 /* We don't allow changing the order of the computation in the inner-loop
1025 when doing outer-loop vectorization. */
1026 class loop *loop = LOOP_VINFO_LOOP (loop_info);
1027 if (loop && nested_in_vect_loop_p (loop, stmt_info))
1028 return false;
1029
1030 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1031 {
1032 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
1033 code))
1034 return false;
1035 }
1036 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
1037 return false;
1038
1039 *op0_out = gimple_assign_rhs1 (gs: assign);
1040 *op1_out = gimple_assign_rhs2 (gs: assign);
1041 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
1042 std::swap (a&: *op0_out, b&: *op1_out);
1043 return true;
1044}
1045
1046/* match.pd function to match
1047 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1048 with conditions:
1049 1) @1, @2, c, d, a, b are all integral type.
1050 2) There's single_use for both @1 and @2.
1051 3) a, c have same precision.
1052 4) c and @1 have different precision.
1053 5) c, d are the same type or they can differ in sign when convert is
1054 truncation.
1055
1056 record a and c and d and @3. */
1057
1058extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
1059
1060/* Function vect_recog_cond_expr_convert
1061
1062 Try to find the following pattern:
1063
1064 TYPE_AB A,B;
1065 TYPE_CD C,D;
1066 TYPE_E E;
1067 TYPE_E op_true = (TYPE_E) A;
1068 TYPE_E op_false = (TYPE_E) B;
1069
1070 E = C cmp D ? op_true : op_false;
1071
1072 where
1073 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1074 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1075 single_use of op_true and op_false.
1076 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1077
1078 Input:
1079
1080 * STMT_VINFO: The stmt from which the pattern search begins.
1081 here it starts with E = c cmp D ? op_true : op_false;
1082
1083 Output:
1084
1085 TYPE1 E' = C cmp D ? A : B;
1086 TYPE3 E = (TYPE3) E';
1087
1088 There may extra nop_convert for A or B to handle different signness.
1089
1090 * TYPE_OUT: The vector type of the output of this pattern.
1091
1092 * Return value: A new stmt that will be used to replace the sequence of
1093 stmts that constitute the pattern. In this case it will be:
1094 E = (TYPE3)E';
1095 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1096
1097static gimple *
1098vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
1099 stmt_vec_info stmt_vinfo, tree *type_out)
1100{
1101 gassign *last_stmt = dyn_cast <gassign *> (p: stmt_vinfo->stmt);
1102 tree lhs, match[4], temp, type, new_lhs, op2;
1103 gimple *cond_stmt;
1104 gimple *pattern_stmt;
1105
1106 if (!last_stmt)
1107 return NULL;
1108
1109 lhs = gimple_assign_lhs (gs: last_stmt);
1110
1111 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1112 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1113 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1114 return NULL;
1115
1116 vect_pattern_detected (name: "vect_recog_cond_expr_convert_pattern", stmt: last_stmt);
1117
1118 op2 = match[2];
1119 type = TREE_TYPE (match[1]);
1120 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1121 {
1122 op2 = vect_recog_temp_ssa_var (type, NULL);
1123 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1124 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: nop_stmt,
1125 vectype: get_vectype_for_scalar_type (vinfo, type));
1126 }
1127
1128 temp = vect_recog_temp_ssa_var (type, NULL);
1129 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1130 match[1], op2));
1131 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: cond_stmt,
1132 vectype: get_vectype_for_scalar_type (vinfo, type));
1133 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1134 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1135 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1136
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "created pattern stmt: %G", pattern_stmt);
1140 return pattern_stmt;
1141}
1142
1143/* Function vect_recog_dot_prod_pattern
1144
1145 Try to find the following pattern:
1146
1147 type1a x_t
1148 type1b y_t;
1149 TYPE1 prod;
1150 TYPE2 sum = init;
1151 loop:
1152 sum_0 = phi <init, sum_1>
1153 S1 x_t = ...
1154 S2 y_t = ...
1155 S3 x_T = (TYPE1) x_t;
1156 S4 y_T = (TYPE1) y_t;
1157 S5 prod = x_T * y_T;
1158 [S6 prod = (TYPE2) prod; #optional]
1159 S7 sum_1 = prod + sum_0;
1160
1161 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1162 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1163 'type1a' and 'type1b' can differ.
1164
1165 Input:
1166
1167 * STMT_VINFO: The stmt from which the pattern search begins. In the
1168 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1169 will be detected.
1170
1171 Output:
1172
1173 * TYPE_OUT: The type of the output of this pattern.
1174
1175 * Return value: A new stmt that will be used to replace the sequence of
1176 stmts that constitute the pattern. In this case it will be:
1177 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1178
1179 Note: The dot-prod idiom is a widening reduction pattern that is
1180 vectorized without preserving all the intermediate results. It
1181 produces only N/2 (widened) results (by summing up pairs of
1182 intermediate results) rather than all N results. Therefore, we
1183 cannot allow this pattern when we want to get all the results and in
1184 the correct order (as is the case when this computation is in an
1185 inner-loop nested in an outer-loop that us being vectorized). */
1186
1187static gimple *
1188vect_recog_dot_prod_pattern (vec_info *vinfo,
1189 stmt_vec_info stmt_vinfo, tree *type_out)
1190{
1191 tree oprnd0, oprnd1;
1192 gimple *last_stmt = stmt_vinfo->stmt;
1193 tree type, half_type;
1194 gimple *pattern_stmt;
1195 tree var;
1196
1197 /* Look for the following pattern
1198 DX = (TYPE1) X;
1199 DY = (TYPE1) Y;
1200 DPROD = DX * DY;
1201 DDPROD = (TYPE2) DPROD;
1202 sum_1 = DDPROD + sum_0;
1203 In which
1204 - DX is double the size of X
1205 - DY is double the size of Y
1206 - DX, DY, DPROD all have the same type but the sign
1207 between X, Y and DPROD can differ.
1208 - sum is the same size of DPROD or bigger
1209 - sum has been recognized as a reduction variable.
1210
1211 This is equivalent to:
1212 DPROD = X w* Y; #widen mult
1213 sum_1 = DPROD w+ sum_0; #widen summation
1214 or
1215 DPROD = X w* Y; #widen mult
1216 sum_1 = DPROD + sum_0; #summation
1217 */
1218
1219 /* Starting from LAST_STMT, follow the defs of its uses in search
1220 of the above pattern. */
1221
1222 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
1223 op0_out: &oprnd0, op1_out: &oprnd1))
1224 return NULL;
1225
1226 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1227
1228 vect_unpromoted_value unprom_mult;
1229 oprnd0 = vect_look_through_possible_promotion (vinfo, op: oprnd0, unprom: &unprom_mult);
1230
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1235 if (!oprnd0)
1236 return NULL;
1237
1238 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, op: oprnd0);
1239 if (!mult_vinfo)
1240 return NULL;
1241
1242 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1243 inside the loop (in case we are analyzing an outer-loop). */
1244 vect_unpromoted_value unprom0[2];
1245 enum optab_subtype subtype = optab_vector;
1246 if (!vect_widened_op_tree (vinfo, stmt_info: mult_vinfo, code: MULT_EXPR, widened_code: WIDEN_MULT_EXPR,
1247 shift_p: false, max_nops: 2, unprom: unprom0, common_type: &half_type, subtype: &subtype))
1248 return NULL;
1249
1250 /* If there are two widening operations, make sure they agree on the sign
1251 of the extension. The result of an optab_vector_mixed_sign operation
1252 is signed; otherwise, the result has the same sign as the operands. */
1253 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1254 && (subtype == optab_vector_mixed_sign
1255 ? TYPE_UNSIGNED (unprom_mult.type)
1256 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1257 return NULL;
1258
1259 vect_pattern_detected (name: "vect_recog_dot_prod_pattern", stmt: last_stmt);
1260
1261 /* If the inputs have mixed signs, canonicalize on using the signed
1262 input type for analysis. This also helps when emulating mixed-sign
1263 operations using signed operations. */
1264 if (subtype == optab_vector_mixed_sign)
1265 half_type = signed_type_for (half_type);
1266
1267 tree half_vectype;
1268 if (!vect_supportable_direct_optab_p (vinfo, otype: type, code: DOT_PROD_EXPR, itype: half_type,
1269 vecotype_out: type_out, vecitype_out: &half_vectype, subtype))
1270 {
1271 /* We can emulate a mixed-sign dot-product using a sequence of
1272 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1273 if (subtype != optab_vector_mixed_sign
1274 || !vect_supportable_direct_optab_p (vinfo, otype: signed_type_for (type),
1275 code: DOT_PROD_EXPR, itype: half_type,
1276 vecotype_out: type_out, vecitype_out: &half_vectype,
1277 subtype: optab_vector))
1278 return NULL;
1279
1280 *type_out = signed_or_unsigned_type_for (TYPE_UNSIGNED (type),
1281 *type_out);
1282 }
1283
1284 /* Get the inputs in the appropriate types. */
1285 tree mult_oprnd[2];
1286 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: mult_oprnd, type: half_type,
1287 unprom: unprom0, vectype: half_vectype, subtype);
1288
1289 var = vect_recog_temp_ssa_var (type, NULL);
1290 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1291 mult_oprnd[0], mult_oprnd[1], oprnd1);
1292
1293 return pattern_stmt;
1294}
1295
1296
1297/* Function vect_recog_sad_pattern
1298
1299 Try to find the following Sum of Absolute Difference (SAD) pattern:
1300
1301 type x_t, y_t;
1302 signed TYPE1 diff, abs_diff;
1303 TYPE2 sum = init;
1304 loop:
1305 sum_0 = phi <init, sum_1>
1306 S1 x_t = ...
1307 S2 y_t = ...
1308 S3 x_T = (TYPE1) x_t;
1309 S4 y_T = (TYPE1) y_t;
1310 S5 diff = x_T - y_T;
1311 S6 abs_diff = ABS_EXPR <diff>;
1312 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1313 S8 sum_1 = abs_diff + sum_0;
1314
1315 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1316 same size of 'TYPE1' or bigger. This is a special case of a reduction
1317 computation.
1318
1319 Input:
1320
1321 * STMT_VINFO: The stmt from which the pattern search begins. In the
1322 example, when this function is called with S8, the pattern
1323 {S3,S4,S5,S6,S7,S8} will be detected.
1324
1325 Output:
1326
1327 * TYPE_OUT: The type of the output of this pattern.
1328
1329 * Return value: A new stmt that will be used to replace the sequence of
1330 stmts that constitute the pattern. In this case it will be:
1331 SAD_EXPR <x_t, y_t, sum_0>
1332 */
1333
1334static gimple *
1335vect_recog_sad_pattern (vec_info *vinfo,
1336 stmt_vec_info stmt_vinfo, tree *type_out)
1337{
1338 gimple *last_stmt = stmt_vinfo->stmt;
1339 tree half_type;
1340
1341 /* Look for the following pattern
1342 DX = (TYPE1) X;
1343 DY = (TYPE1) Y;
1344 DDIFF = DX - DY;
1345 DAD = ABS_EXPR <DDIFF>;
1346 DDPROD = (TYPE2) DPROD;
1347 sum_1 = DAD + sum_0;
1348 In which
1349 - DX is at least double the size of X
1350 - DY is at least double the size of Y
1351 - DX, DY, DDIFF, DAD all have the same type
1352 - sum is the same size of DAD or bigger
1353 - sum has been recognized as a reduction variable.
1354
1355 This is equivalent to:
1356 DDIFF = X w- Y; #widen sub
1357 DAD = ABS_EXPR <DDIFF>;
1358 sum_1 = DAD w+ sum_0; #widen summation
1359 or
1360 DDIFF = X w- Y; #widen sub
1361 DAD = ABS_EXPR <DDIFF>;
1362 sum_1 = DAD + sum_0; #summation
1363 */
1364
1365 /* Starting from LAST_STMT, follow the defs of its uses in search
1366 of the above pattern. */
1367
1368 tree plus_oprnd0, plus_oprnd1;
1369 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
1370 op0_out: &plus_oprnd0, op1_out: &plus_oprnd1))
1371 return NULL;
1372
1373 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1374
1375 /* Any non-truncating sequence of conversions is OK here, since
1376 with a successful match, the result of the ABS(U) is known to fit
1377 within the nonnegative range of the result type. (It cannot be the
1378 negative of the minimum signed value due to the range of the widening
1379 MINUS_EXPR.) */
1380 vect_unpromoted_value unprom_abs;
1381 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, op: plus_oprnd0,
1382 unprom: &unprom_abs);
1383
1384 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1385 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1386 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1387 Then check that plus_oprnd0 is defined by an abs_expr. */
1388
1389 if (!plus_oprnd0)
1390 return NULL;
1391
1392 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, op: plus_oprnd0);
1393 if (!abs_stmt_vinfo)
1394 return NULL;
1395
1396 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1397 inside the loop (in case we are analyzing an outer-loop). */
1398 gassign *abs_stmt = dyn_cast <gassign *> (p: abs_stmt_vinfo->stmt);
1399 vect_unpromoted_value unprom[2];
1400
1401 if (!abs_stmt)
1402 {
1403 gcall *abd_stmt = dyn_cast <gcall *> (p: abs_stmt_vinfo->stmt);
1404 if (!abd_stmt
1405 || !gimple_call_internal_p (gs: abd_stmt)
1406 || gimple_call_num_args (gs: abd_stmt) != 2)
1407 return NULL;
1408
1409 tree abd_oprnd0 = gimple_call_arg (gs: abd_stmt, index: 0);
1410 tree abd_oprnd1 = gimple_call_arg (gs: abd_stmt, index: 1);
1411
1412 if (gimple_call_internal_fn (gs: abd_stmt) == IFN_ABD)
1413 {
1414 if (!vect_look_through_possible_promotion (vinfo, op: abd_oprnd0,
1415 unprom: &unprom[0])
1416 || !vect_look_through_possible_promotion (vinfo, op: abd_oprnd1,
1417 unprom: &unprom[1]))
1418 return NULL;
1419 }
1420 else if (gimple_call_internal_fn (gs: abd_stmt) == IFN_VEC_WIDEN_ABD)
1421 {
1422 unprom[0].op = abd_oprnd0;
1423 unprom[0].type = TREE_TYPE (abd_oprnd0);
1424 unprom[1].op = abd_oprnd1;
1425 unprom[1].type = TREE_TYPE (abd_oprnd1);
1426 }
1427 else
1428 return NULL;
1429
1430 half_type = unprom[0].type;
1431 }
1432 else if (!vect_recog_absolute_difference (vinfo, abs_stmt, half_type: &half_type,
1433 unprom, NULL))
1434 return NULL;
1435
1436 vect_pattern_detected (name: "vect_recog_sad_pattern", stmt: last_stmt);
1437
1438 tree half_vectype;
1439 if (!vect_supportable_direct_optab_p (vinfo, otype: sum_type, code: SAD_EXPR, itype: half_type,
1440 vecotype_out: type_out, vecitype_out: &half_vectype))
1441 return NULL;
1442
1443 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1444 tree sad_oprnd[2];
1445 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: sad_oprnd, type: half_type,
1446 unprom, vectype: half_vectype);
1447
1448 tree var = vect_recog_temp_ssa_var (type: sum_type, NULL);
1449 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1450 sad_oprnd[1], plus_oprnd1);
1451
1452 return pattern_stmt;
1453}
1454
1455/* Function vect_recog_abd_pattern
1456
1457 Try to find the following ABsolute Difference (ABD) or
1458 widening ABD (WIDEN_ABD) pattern:
1459
1460 TYPE1 x;
1461 TYPE2 y;
1462 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1463 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1464 TYPE3 diff = x_cast - y_cast;
1465 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1466 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1467
1468 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1469 twice as wide as TYPE3.
1470
1471 Input:
1472
1473 * STMT_VINFO: The stmt from which the pattern search begins
1474
1475 Output:
1476
1477 * TYPE_OUT: The type of the output of this pattern
1478
1479 * Return value: A new stmt that will be used to replace the sequence of
1480 stmts that constitute the pattern, principally:
1481 out = IFN_ABD (x, y)
1482 out = IFN_WIDEN_ABD (x, y)
1483 */
1484
1485static gimple *
1486vect_recog_abd_pattern (vec_info *vinfo,
1487 stmt_vec_info stmt_vinfo, tree *type_out)
1488{
1489 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1490 if (!last_stmt)
1491 return NULL;
1492
1493 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1494
1495 vect_unpromoted_value unprom[2];
1496 gassign *diff_stmt = NULL;
1497 tree abd_in_type;
1498 if (!vect_recog_absolute_difference (vinfo, abs_stmt: last_stmt, half_type: &abd_in_type,
1499 unprom, diff_stmt: &diff_stmt))
1500 {
1501 /* We cannot try further without having a non-widening MINUS. */
1502 if (!diff_stmt)
1503 return NULL;
1504
1505 unprom[0].op = gimple_assign_rhs1 (gs: diff_stmt);
1506 unprom[1].op = gimple_assign_rhs2 (gs: diff_stmt);
1507 abd_in_type = signed_type_for (out_type);
1508 }
1509
1510 tree abd_out_type = abd_in_type;
1511
1512 tree vectype_in = get_vectype_for_scalar_type (vinfo, abd_in_type);
1513 if (!vectype_in)
1514 return NULL;
1515
1516 internal_fn ifn = IFN_ABD;
1517 tree vectype_out = vectype_in;
1518
1519 if (TYPE_PRECISION (out_type) >= TYPE_PRECISION (abd_in_type) * 2
1520 && stmt_vinfo->min_output_precision >= TYPE_PRECISION (abd_in_type) * 2)
1521 {
1522 tree mid_type
1523 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type) * 2,
1524 TYPE_UNSIGNED (abd_in_type));
1525 tree mid_vectype = get_vectype_for_scalar_type (vinfo, mid_type);
1526
1527 code_helper dummy_code;
1528 int dummy_int;
1529 auto_vec<tree> dummy_vec;
1530 if (mid_vectype
1531 && supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD,
1532 stmt_vinfo, mid_vectype,
1533 vectype_in,
1534 &dummy_code, &dummy_code,
1535 &dummy_int, &dummy_vec))
1536 {
1537 ifn = IFN_VEC_WIDEN_ABD;
1538 abd_out_type = mid_type;
1539 vectype_out = mid_vectype;
1540 }
1541 }
1542
1543 if (ifn == IFN_ABD
1544 && !direct_internal_fn_supported_p (ifn, vectype_in,
1545 OPTIMIZE_FOR_SPEED))
1546 return NULL;
1547
1548 vect_pattern_detected (name: "vect_recog_abd_pattern", stmt: last_stmt);
1549
1550 tree abd_oprnds[2];
1551 vect_convert_inputs (vinfo, stmt_info: stmt_vinfo, n: 2, result: abd_oprnds,
1552 type: abd_in_type, unprom, vectype: vectype_in);
1553
1554 *type_out = get_vectype_for_scalar_type (vinfo, out_type);
1555
1556 tree abd_result = vect_recog_temp_ssa_var (type: abd_out_type, NULL);
1557 gcall *abd_stmt = gimple_build_call_internal (ifn, 2,
1558 abd_oprnds[0], abd_oprnds[1]);
1559 gimple_call_set_lhs (gs: abd_stmt, lhs: abd_result);
1560 gimple_set_location (g: abd_stmt, location: gimple_location (g: last_stmt));
1561
1562 gimple *stmt = abd_stmt;
1563 if (TYPE_PRECISION (abd_in_type) == TYPE_PRECISION (abd_out_type)
1564 && TYPE_PRECISION (abd_out_type) < TYPE_PRECISION (out_type)
1565 && !TYPE_UNSIGNED (abd_out_type))
1566 {
1567 tree unsign = unsigned_type_for (abd_out_type);
1568 stmt = vect_convert_output (vinfo, stmt_info: stmt_vinfo, type: unsign, pattern_stmt: stmt, vecitype: vectype_out);
1569 vectype_out = get_vectype_for_scalar_type (vinfo, unsign);
1570 }
1571
1572 return vect_convert_output (vinfo, stmt_info: stmt_vinfo, type: out_type, pattern_stmt: stmt, vecitype: vectype_out);
1573}
1574
1575/* Recognize an operation that performs ORIG_CODE on widened inputs,
1576 so that it can be treated as though it had the form:
1577
1578 A_TYPE a;
1579 B_TYPE b;
1580 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1581 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1582 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1583 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1584 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1585
1586 Try to replace the pattern with:
1587
1588 A_TYPE a;
1589 B_TYPE b;
1590 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1591 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1592 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1593 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1594
1595 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1596
1597 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1598 name of the pattern being matched, for dump purposes. */
1599
1600static gimple *
1601vect_recog_widen_op_pattern (vec_info *vinfo,
1602 stmt_vec_info last_stmt_info, tree *type_out,
1603 tree_code orig_code, code_helper wide_code,
1604 bool shift_p, const char *name)
1605{
1606 gimple *last_stmt = last_stmt_info->stmt;
1607
1608 vect_unpromoted_value unprom[2];
1609 tree half_type;
1610 if (!vect_widened_op_tree (vinfo, stmt_info: last_stmt_info, code: orig_code, widened_code: orig_code,
1611 shift_p, max_nops: 2, unprom, common_type: &half_type))
1612
1613 return NULL;
1614
1615 /* Pattern detected. */
1616 vect_pattern_detected (name, stmt: last_stmt);
1617
1618 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1619 tree itype = type;
1620 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1621 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1622 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1623 TYPE_UNSIGNED (half_type));
1624
1625 /* Check target support */
1626 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1627 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1628 tree ctype = itype;
1629 tree vecctype = vecitype;
1630 if (orig_code == MINUS_EXPR
1631 && TYPE_UNSIGNED (itype)
1632 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1633 {
1634 /* Subtraction is special, even if half_type is unsigned and no matter
1635 whether type is signed or unsigned, if type is wider than itype,
1636 we need to sign-extend from the widening operation result to the
1637 result type.
1638 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1639 itype unsigned short and type either int or unsigned int.
1640 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1641 (unsigned short) 0xffff, but for type int we want the result -1
1642 and for type unsigned int 0xffffffff rather than 0xffff. */
1643 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1644 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1645 }
1646
1647 code_helper dummy_code;
1648 int dummy_int;
1649 auto_vec<tree> dummy_vec;
1650 if (!vectype
1651 || !vecitype
1652 || !vecctype
1653 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1654 vecitype, vectype,
1655 &dummy_code, &dummy_code,
1656 &dummy_int, &dummy_vec))
1657 return NULL;
1658
1659 *type_out = get_vectype_for_scalar_type (vinfo, type);
1660 if (!*type_out)
1661 return NULL;
1662
1663 tree oprnd[2];
1664 vect_convert_inputs (vinfo, stmt_info: last_stmt_info,
1665 n: 2, result: oprnd, type: half_type, unprom, vectype);
1666
1667 tree var = vect_recog_temp_ssa_var (type: itype, NULL);
1668 gimple *pattern_stmt = vect_gimple_build (var, wide_code, oprnd[0], oprnd[1]);
1669
1670 if (vecctype != vecitype)
1671 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type: ctype,
1672 pattern_stmt, vecitype);
1673
1674 return vect_convert_output (vinfo, stmt_info: last_stmt_info,
1675 type, pattern_stmt, vecitype: vecctype);
1676}
1677
1678/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1679 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1680
1681static gimple *
1682vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1683 tree *type_out)
1684{
1685 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1686 orig_code: MULT_EXPR, wide_code: WIDEN_MULT_EXPR, shift_p: false,
1687 name: "vect_recog_widen_mult_pattern");
1688}
1689
1690/* Try to detect addition on widened inputs, converting PLUS_EXPR
1691 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1692
1693static gimple *
1694vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1695 tree *type_out)
1696{
1697 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1698 orig_code: PLUS_EXPR, wide_code: IFN_VEC_WIDEN_PLUS,
1699 shift_p: false, name: "vect_recog_widen_plus_pattern");
1700}
1701
1702/* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1703 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1704static gimple *
1705vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1706 tree *type_out)
1707{
1708 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1709 orig_code: MINUS_EXPR, wide_code: IFN_VEC_WIDEN_MINUS,
1710 shift_p: false, name: "vect_recog_widen_minus_pattern");
1711}
1712
1713/* Try to detect abd on widened inputs, converting IFN_ABD
1714 to IFN_VEC_WIDEN_ABD. */
1715static gimple *
1716vect_recog_widen_abd_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1717 tree *type_out)
1718{
1719 gassign *last_stmt = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_vinfo));
1720 if (!last_stmt || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt)))
1721 return NULL;
1722
1723 tree last_rhs = gimple_assign_rhs1 (gs: last_stmt);
1724
1725 tree in_type = TREE_TYPE (last_rhs);
1726 tree out_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1727 if (!INTEGRAL_TYPE_P (in_type)
1728 || !INTEGRAL_TYPE_P (out_type)
1729 || TYPE_PRECISION (in_type) * 2 != TYPE_PRECISION (out_type)
1730 || !TYPE_UNSIGNED (in_type))
1731 return NULL;
1732
1733 vect_unpromoted_value unprom;
1734 tree op = vect_look_through_possible_promotion (vinfo, op: last_rhs, unprom: &unprom);
1735 if (!op || TYPE_PRECISION (TREE_TYPE (op)) != TYPE_PRECISION (in_type))
1736 return NULL;
1737
1738 stmt_vec_info abd_pattern_vinfo = vect_get_internal_def (vinfo, op);
1739 if (!abd_pattern_vinfo)
1740 return NULL;
1741
1742 abd_pattern_vinfo = vect_stmt_to_vectorize (stmt_info: abd_pattern_vinfo);
1743 gcall *abd_stmt = dyn_cast <gcall *> (STMT_VINFO_STMT (abd_pattern_vinfo));
1744 if (!abd_stmt
1745 || !gimple_call_internal_p (gs: abd_stmt)
1746 || gimple_call_internal_fn (gs: abd_stmt) != IFN_ABD)
1747 return NULL;
1748
1749 tree vectype_in = get_vectype_for_scalar_type (vinfo, in_type);
1750 tree vectype_out = get_vectype_for_scalar_type (vinfo, out_type);
1751
1752 code_helper dummy_code;
1753 int dummy_int;
1754 auto_vec<tree> dummy_vec;
1755 if (!supportable_widening_operation (vinfo, IFN_VEC_WIDEN_ABD, stmt_vinfo,
1756 vectype_out, vectype_in,
1757 &dummy_code, &dummy_code,
1758 &dummy_int, &dummy_vec))
1759 return NULL;
1760
1761 vect_pattern_detected (name: "vect_recog_widen_abd_pattern", stmt: last_stmt);
1762
1763 *type_out = vectype_out;
1764
1765 tree abd_oprnd0 = gimple_call_arg (gs: abd_stmt, index: 0);
1766 tree abd_oprnd1 = gimple_call_arg (gs: abd_stmt, index: 1);
1767 tree widen_abd_result = vect_recog_temp_ssa_var (type: out_type, NULL);
1768 gcall *widen_abd_stmt = gimple_build_call_internal (IFN_VEC_WIDEN_ABD, 2,
1769 abd_oprnd0, abd_oprnd1);
1770 gimple_call_set_lhs (gs: widen_abd_stmt, lhs: widen_abd_result);
1771 gimple_set_location (g: widen_abd_stmt, location: gimple_location (g: last_stmt));
1772 return widen_abd_stmt;
1773}
1774
1775/* Function vect_recog_ctz_ffs_pattern
1776
1777 Try to find the following pattern:
1778
1779 TYPE1 A;
1780 TYPE1 B;
1781
1782 B = __builtin_ctz{,l,ll} (A);
1783
1784 or
1785
1786 B = __builtin_ffs{,l,ll} (A);
1787
1788 Input:
1789
1790 * STMT_VINFO: The stmt from which the pattern search begins.
1791 here it starts with B = __builtin_* (A);
1792
1793 Output:
1794
1795 * TYPE_OUT: The vector type of the output of this pattern.
1796
1797 * Return value: A new stmt that will be used to replace the sequence of
1798 stmts that constitute the pattern, using clz or popcount builtins. */
1799
1800static gimple *
1801vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1802 tree *type_out)
1803{
1804 gimple *call_stmt = stmt_vinfo->stmt;
1805 gimple *pattern_stmt;
1806 tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type;
1807 tree new_var;
1808 internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
1809 bool defined_at_zero = true, defined_at_zero_new = false;
1810 int val = 0, val_new = 0, val_cmp = 0;
1811 int prec;
1812 int sub = 0, add = 0;
1813 location_t loc;
1814
1815 if (!is_gimple_call (gs: call_stmt))
1816 return NULL;
1817
1818 if (gimple_call_num_args (gs: call_stmt) != 1
1819 && gimple_call_num_args (gs: call_stmt) != 2)
1820 return NULL;
1821
1822 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
1823 rhs_type = TREE_TYPE (rhs_oprnd);
1824 lhs_oprnd = gimple_call_lhs (gs: call_stmt);
1825 if (!lhs_oprnd)
1826 return NULL;
1827 lhs_type = TREE_TYPE (lhs_oprnd);
1828 if (!INTEGRAL_TYPE_P (lhs_type)
1829 || !INTEGRAL_TYPE_P (rhs_type)
1830 || !type_has_mode_precision_p (t: rhs_type)
1831 || TREE_CODE (rhs_oprnd) != SSA_NAME)
1832 return NULL;
1833
1834 switch (gimple_call_combined_fn (call_stmt))
1835 {
1836 CASE_CFN_CTZ:
1837 ifn = IFN_CTZ;
1838 if (!gimple_call_internal_p (gs: call_stmt)
1839 || gimple_call_num_args (gs: call_stmt) != 2)
1840 defined_at_zero = false;
1841 else
1842 val = tree_to_shwi (gimple_call_arg (gs: call_stmt, index: 1));
1843 break;
1844 CASE_CFN_FFS:
1845 ifn = IFN_FFS;
1846 break;
1847 default:
1848 return NULL;
1849 }
1850
1851 prec = TYPE_PRECISION (rhs_type);
1852 loc = gimple_location (g: call_stmt);
1853
1854 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1855 if (!vec_type)
1856 return NULL;
1857
1858 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1859 if (!vec_rhs_type)
1860 return NULL;
1861
1862 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1863 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1864 popcount<vector_mode>2. */
1865 if (!vec_type
1866 || direct_internal_fn_supported_p (ifn, vec_rhs_type,
1867 OPTIMIZE_FOR_SPEED))
1868 return NULL;
1869
1870 if (ifn == IFN_FFS
1871 && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type,
1872 OPTIMIZE_FOR_SPEED))
1873 {
1874 ifnnew = IFN_CTZ;
1875 defined_at_zero_new
1876 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1877 val_new) == 2;
1878 }
1879 else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type,
1880 OPTIMIZE_FOR_SPEED))
1881 {
1882 ifnnew = IFN_CLZ;
1883 defined_at_zero_new
1884 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
1885 val_new) == 2;
1886 }
1887 if ((ifnnew == IFN_LAST
1888 || (defined_at_zero && !defined_at_zero_new))
1889 && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type,
1890 OPTIMIZE_FOR_SPEED))
1891 {
1892 ifnnew = IFN_POPCOUNT;
1893 defined_at_zero_new = true;
1894 val_new = prec;
1895 }
1896 if (ifnnew == IFN_LAST)
1897 return NULL;
1898
1899 vect_pattern_detected (name: "vec_recog_ctz_ffs_pattern", stmt: call_stmt);
1900
1901 val_cmp = val_new;
1902 if ((ifnnew == IFN_CLZ
1903 && defined_at_zero
1904 && defined_at_zero_new
1905 && val == prec
1906 && val_new == prec)
1907 || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ))
1908 {
1909 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1910 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1911 if (ifnnew == IFN_CLZ)
1912 sub = prec;
1913 val_cmp = prec;
1914
1915 if (!TYPE_UNSIGNED (rhs_type))
1916 {
1917 rhs_type = unsigned_type_for (rhs_type);
1918 vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type);
1919 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1920 pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd);
1921 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt,
1922 vectype: vec_rhs_type);
1923 rhs_oprnd = new_var;
1924 }
1925
1926 tree m1 = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1927 pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd,
1928 build_int_cst (rhs_type, -1));
1929 gimple_set_location (g: pattern_stmt, location: loc);
1930 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1931
1932 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1933 pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd);
1934 gimple_set_location (g: pattern_stmt, location: loc);
1935 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1936 rhs_oprnd = new_var;
1937
1938 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1939 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1940 m1, rhs_oprnd);
1941 gimple_set_location (g: pattern_stmt, location: loc);
1942 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1943 rhs_oprnd = new_var;
1944 }
1945 else if (ifnnew == IFN_CLZ)
1946 {
1947 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1948 .FFS (X) = PREC - .CLZ (X & -X). */
1949 sub = prec - (ifn == IFN_CTZ);
1950 val_cmp = sub - val_new;
1951
1952 tree neg = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1953 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1954 gimple_set_location (g: pattern_stmt, location: loc);
1955 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1956
1957 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1958 pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR,
1959 rhs_oprnd, neg);
1960 gimple_set_location (g: pattern_stmt, location: loc);
1961 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1962 rhs_oprnd = new_var;
1963 }
1964 else if (ifnnew == IFN_POPCOUNT)
1965 {
1966 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1967 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1968 sub = prec + (ifn == IFN_FFS);
1969 val_cmp = sub;
1970
1971 tree neg = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1972 pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
1973 gimple_set_location (g: pattern_stmt, location: loc);
1974 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1975
1976 new_var = vect_recog_temp_ssa_var (type: rhs_type, NULL);
1977 pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR,
1978 rhs_oprnd, neg);
1979 gimple_set_location (g: pattern_stmt, location: loc);
1980 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_rhs_type);
1981 rhs_oprnd = new_var;
1982 }
1983 else if (ifnnew == IFN_CTZ)
1984 {
1985 /* .FFS (X) = .CTZ (X) + 1. */
1986 add = 1;
1987 val_cmp++;
1988 }
1989
1990 /* Create B = .IFNNEW (A). */
1991 new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
1992 if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
1993 pattern_stmt
1994 = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
1995 build_int_cst (integer_type_node,
1996 val_new));
1997 else
1998 pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
1999 gimple_call_set_lhs (gs: pattern_stmt, lhs: new_var);
2000 gimple_set_location (g: pattern_stmt, location: loc);
2001 *type_out = vec_type;
2002
2003 if (sub)
2004 {
2005 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2006 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2007 pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR,
2008 build_int_cst (lhs_type, sub),
2009 new_var);
2010 gimple_set_location (g: pattern_stmt, location: loc);
2011 new_var = ret_var;
2012 }
2013 else if (add)
2014 {
2015 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2016 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2017 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2018 build_int_cst (lhs_type, add));
2019 gimple_set_location (g: pattern_stmt, location: loc);
2020 new_var = ret_var;
2021 }
2022
2023 if (defined_at_zero
2024 && (!defined_at_zero_new || val != val_cmp))
2025 {
2026 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2027 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2028 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
2029 rhs_type = TREE_TYPE (rhs_oprnd);
2030 tree cmp = build2_loc (loc, code: NE_EXPR, boolean_type_node,
2031 arg0: rhs_oprnd, arg1: build_zero_cst (rhs_type));
2032 pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp,
2033 new_var,
2034 build_int_cst (lhs_type, val));
2035 }
2036
2037 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_NOTE, vect_location,
2039 "created pattern stmt: %G", pattern_stmt);
2040
2041 return pattern_stmt;
2042}
2043
2044/* Function vect_recog_popcount_clz_ctz_ffs_pattern
2045
2046 Try to find the following pattern:
2047
2048 UTYPE1 A;
2049 TYPE1 B;
2050 UTYPE2 temp_in;
2051 TYPE3 temp_out;
2052 temp_in = (UTYPE2)A;
2053
2054 temp_out = __builtin_popcount{,l,ll} (temp_in);
2055 B = (TYPE1) temp_out;
2056
2057 TYPE2 may or may not be equal to TYPE3.
2058 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2059 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2060
2061 Input:
2062
2063 * STMT_VINFO: The stmt from which the pattern search begins.
2064 here it starts with B = (TYPE1) temp_out;
2065
2066 Output:
2067
2068 * TYPE_OUT: The vector type of the output of this pattern.
2069
2070 * Return value: A new stmt that will be used to replace the sequence of
2071 stmts that constitute the pattern. In this case it will be:
2072 B = .POPCOUNT (A);
2073
2074 Similarly for clz, ctz and ffs.
2075*/
2076
2077static gimple *
2078vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
2079 stmt_vec_info stmt_vinfo,
2080 tree *type_out)
2081{
2082 gassign *last_stmt = dyn_cast <gassign *> (p: stmt_vinfo->stmt);
2083 gimple *call_stmt, *pattern_stmt;
2084 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
2085 internal_fn ifn = IFN_LAST;
2086 int addend = 0;
2087
2088 /* Find B = (TYPE1) temp_out. */
2089 if (!last_stmt)
2090 return NULL;
2091 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
2092 if (!CONVERT_EXPR_CODE_P (code))
2093 return NULL;
2094
2095 lhs_oprnd = gimple_assign_lhs (gs: last_stmt);
2096 lhs_type = TREE_TYPE (lhs_oprnd);
2097 if (!INTEGRAL_TYPE_P (lhs_type))
2098 return NULL;
2099
2100 rhs_oprnd = gimple_assign_rhs1 (gs: last_stmt);
2101 if (TREE_CODE (rhs_oprnd) != SSA_NAME
2102 || !has_single_use (var: rhs_oprnd))
2103 return NULL;
2104 call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
2105
2106 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2107 if (!is_gimple_call (gs: call_stmt))
2108 return NULL;
2109 switch (gimple_call_combined_fn (call_stmt))
2110 {
2111 int val;
2112 CASE_CFN_POPCOUNT:
2113 ifn = IFN_POPCOUNT;
2114 break;
2115 CASE_CFN_CLZ:
2116 ifn = IFN_CLZ;
2117 /* Punt if call result is unsigned and defined value at zero
2118 is negative, as the negative value doesn't extend correctly. */
2119 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2120 && gimple_call_internal_p (gs: call_stmt)
2121 && CLZ_DEFINED_VALUE_AT_ZERO
2122 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2123 && val < 0)
2124 return NULL;
2125 break;
2126 CASE_CFN_CTZ:
2127 ifn = IFN_CTZ;
2128 /* Punt if call result is unsigned and defined value at zero
2129 is negative, as the negative value doesn't extend correctly. */
2130 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd))
2131 && gimple_call_internal_p (gs: call_stmt)
2132 && CTZ_DEFINED_VALUE_AT_ZERO
2133 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2
2134 && val < 0)
2135 return NULL;
2136 break;
2137 CASE_CFN_FFS:
2138 ifn = IFN_FFS;
2139 break;
2140 default:
2141 return NULL;
2142 }
2143
2144 if (gimple_call_num_args (gs: call_stmt) != 1
2145 && gimple_call_num_args (gs: call_stmt) != 2)
2146 return NULL;
2147
2148 rhs_oprnd = gimple_call_arg (gs: call_stmt, index: 0);
2149 vect_unpromoted_value unprom_diff;
2150 rhs_origin
2151 = vect_look_through_possible_promotion (vinfo, op: rhs_oprnd, unprom: &unprom_diff);
2152
2153 if (!rhs_origin)
2154 return NULL;
2155
2156 /* Input and output of .POPCOUNT should be same-precision integer. */
2157 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type))
2158 return NULL;
2159
2160 /* Also A should be unsigned or same precision as temp_in, otherwise
2161 different builtins/internal functions have different behaviors. */
2162 if (TYPE_PRECISION (unprom_diff.type)
2163 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))
2164 switch (ifn)
2165 {
2166 case IFN_POPCOUNT:
2167 /* For popcount require zero extension, which doesn't add any
2168 further bits to the count. */
2169 if (!TYPE_UNSIGNED (unprom_diff.type))
2170 return NULL;
2171 break;
2172 case IFN_CLZ:
2173 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2174 if it is undefined at zero or if it matches also for the
2175 defined value there. */
2176 if (!TYPE_UNSIGNED (unprom_diff.type))
2177 return NULL;
2178 if (!type_has_mode_precision_p (t: lhs_type)
2179 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd)))
2180 return NULL;
2181 addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
2182 - TYPE_PRECISION (lhs_type));
2183 if (gimple_call_internal_p (gs: call_stmt)
2184 && gimple_call_num_args (gs: call_stmt) == 2)
2185 {
2186 int val1, val2;
2187 val1 = tree_to_shwi (gimple_call_arg (gs: call_stmt, index: 1));
2188 int d2
2189 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2190 val2);
2191 if (d2 != 2 || val1 != val2 + addend)
2192 return NULL;
2193 }
2194 break;
2195 case IFN_CTZ:
2196 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2197 if it is undefined at zero or if it matches also for the
2198 defined value there. */
2199 if (gimple_call_internal_p (gs: call_stmt)
2200 && gimple_call_num_args (gs: call_stmt) == 2)
2201 {
2202 int val1, val2;
2203 val1 = tree_to_shwi (gimple_call_arg (gs: call_stmt, index: 1));
2204 int d2
2205 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2206 val2);
2207 if (d2 != 2 || val1 != val2)
2208 return NULL;
2209 }
2210 break;
2211 case IFN_FFS:
2212 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2213 break;
2214 default:
2215 gcc_unreachable ();
2216 }
2217
2218 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
2219 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2220 if (!vec_type)
2221 return NULL;
2222
2223 bool supported
2224 = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED);
2225 if (!supported)
2226 switch (ifn)
2227 {
2228 case IFN_POPCOUNT:
2229 case IFN_CLZ:
2230 return NULL;
2231 case IFN_FFS:
2232 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2233 if (direct_internal_fn_supported_p (IFN_CTZ, vec_type,
2234 OPTIMIZE_FOR_SPEED))
2235 break;
2236 /* FALLTHRU */
2237 case IFN_CTZ:
2238 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2239 clz or popcount. */
2240 if (direct_internal_fn_supported_p (IFN_CLZ, vec_type,
2241 OPTIMIZE_FOR_SPEED))
2242 break;
2243 if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
2244 OPTIMIZE_FOR_SPEED))
2245 break;
2246 return NULL;
2247 default:
2248 gcc_unreachable ();
2249 }
2250
2251 vect_pattern_detected (name: "vec_recog_popcount_clz_ctz_ffs_pattern",
2252 stmt: call_stmt);
2253
2254 /* Create B = .POPCOUNT (A). */
2255 new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2256 tree arg2 = NULL_TREE;
2257 int val;
2258 if (ifn == IFN_CLZ
2259 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2260 val) == 2)
2261 arg2 = build_int_cst (integer_type_node, val);
2262 else if (ifn == IFN_CTZ
2263 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
2264 val) == 2)
2265 arg2 = build_int_cst (integer_type_node, val);
2266 if (arg2)
2267 pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
2268 else
2269 pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
2270 gimple_call_set_lhs (gs: pattern_stmt, lhs: new_var);
2271 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
2272 *type_out = vec_type;
2273
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_NOTE, vect_location,
2276 "created pattern stmt: %G", pattern_stmt);
2277
2278 if (addend)
2279 {
2280 gcc_assert (supported);
2281 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vec_type);
2282 tree ret_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
2283 pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var,
2284 build_int_cst (lhs_type, addend));
2285 }
2286 else if (!supported)
2287 {
2288 stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt);
2289 STMT_VINFO_VECTYPE (new_stmt_info) = vec_type;
2290 pattern_stmt
2291 = vect_recog_ctz_ffs_pattern (vinfo, stmt_vinfo: new_stmt_info, type_out);
2292 if (pattern_stmt == NULL)
2293 return NULL;
2294 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info))
2295 {
2296 gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
2297 gimple_seq_add_seq_without_update (pseq, seq);
2298 }
2299 }
2300 return pattern_stmt;
2301}
2302
2303/* Function vect_recog_pow_pattern
2304
2305 Try to find the following pattern:
2306
2307 x = POW (y, N);
2308
2309 with POW being one of pow, powf, powi, powif and N being
2310 either 2 or 0.5.
2311
2312 Input:
2313
2314 * STMT_VINFO: The stmt from which the pattern search begins.
2315
2316 Output:
2317
2318 * TYPE_OUT: The type of the output of this pattern.
2319
2320 * Return value: A new stmt that will be used to replace the sequence of
2321 stmts that constitute the pattern. In this case it will be:
2322 x = x * x
2323 or
2324 x = sqrt (x)
2325*/
2326
2327static gimple *
2328vect_recog_pow_pattern (vec_info *vinfo,
2329 stmt_vec_info stmt_vinfo, tree *type_out)
2330{
2331 gimple *last_stmt = stmt_vinfo->stmt;
2332 tree base, exp;
2333 gimple *stmt;
2334 tree var;
2335
2336 if (!is_gimple_call (gs: last_stmt) || gimple_call_lhs (gs: last_stmt) == NULL)
2337 return NULL;
2338
2339 switch (gimple_call_combined_fn (last_stmt))
2340 {
2341 CASE_CFN_POW:
2342 CASE_CFN_POWI:
2343 break;
2344
2345 default:
2346 return NULL;
2347 }
2348
2349 base = gimple_call_arg (gs: last_stmt, index: 0);
2350 exp = gimple_call_arg (gs: last_stmt, index: 1);
2351 if (TREE_CODE (exp) != REAL_CST
2352 && TREE_CODE (exp) != INTEGER_CST)
2353 {
2354 if (flag_unsafe_math_optimizations
2355 && TREE_CODE (base) == REAL_CST
2356 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
2357 {
2358 combined_fn log_cfn;
2359 built_in_function exp_bfn;
2360 switch (DECL_FUNCTION_CODE (decl: gimple_call_fndecl (gs: last_stmt)))
2361 {
2362 case BUILT_IN_POW:
2363 log_cfn = CFN_BUILT_IN_LOG;
2364 exp_bfn = BUILT_IN_EXP;
2365 break;
2366 case BUILT_IN_POWF:
2367 log_cfn = CFN_BUILT_IN_LOGF;
2368 exp_bfn = BUILT_IN_EXPF;
2369 break;
2370 case BUILT_IN_POWL:
2371 log_cfn = CFN_BUILT_IN_LOGL;
2372 exp_bfn = BUILT_IN_EXPL;
2373 break;
2374 default:
2375 return NULL;
2376 }
2377 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
2378 tree exp_decl = builtin_decl_implicit (fncode: exp_bfn);
2379 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2380 does that, but if C is a power of 2, we want to use
2381 exp2 (log2 (C) * x) in the non-vectorized version, but for
2382 vectorization we don't have vectorized exp2. */
2383 if (logc
2384 && TREE_CODE (logc) == REAL_CST
2385 && exp_decl
2386 && lookup_attribute (attr_name: "omp declare simd",
2387 DECL_ATTRIBUTES (exp_decl)))
2388 {
2389 cgraph_node *node = cgraph_node::get_create (exp_decl);
2390 if (node->simd_clones == NULL)
2391 {
2392 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
2393 || node->definition)
2394 return NULL;
2395 expand_simd_clones (node);
2396 if (node->simd_clones == NULL)
2397 return NULL;
2398 }
2399 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2400 if (!*type_out)
2401 return NULL;
2402 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2403 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
2404 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: g);
2405 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2406 g = gimple_build_call (exp_decl, 1, def);
2407 gimple_call_set_lhs (gs: g, lhs: res);
2408 return g;
2409 }
2410 }
2411
2412 return NULL;
2413 }
2414
2415 /* We now have a pow or powi builtin function call with a constant
2416 exponent. */
2417
2418 /* Catch squaring. */
2419 if ((tree_fits_shwi_p (exp)
2420 && tree_to_shwi (exp) == 2)
2421 || (TREE_CODE (exp) == REAL_CST
2422 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
2423 {
2424 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), code: MULT_EXPR,
2425 TREE_TYPE (base), vecotype_out: type_out))
2426 return NULL;
2427
2428 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
2429 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
2430 return stmt;
2431 }
2432
2433 /* Catch square root. */
2434 if (TREE_CODE (exp) == REAL_CST
2435 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
2436 {
2437 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
2438 if (*type_out
2439 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
2440 OPTIMIZE_FOR_SPEED))
2441 {
2442 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
2443 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
2444 gimple_call_set_lhs (gs: stmt, lhs: var);
2445 gimple_call_set_nothrow (s: stmt, nothrow_p: true);
2446 return stmt;
2447 }
2448 }
2449
2450 return NULL;
2451}
2452
2453
2454/* Function vect_recog_widen_sum_pattern
2455
2456 Try to find the following pattern:
2457
2458 type x_t;
2459 TYPE x_T, sum = init;
2460 loop:
2461 sum_0 = phi <init, sum_1>
2462 S1 x_t = *p;
2463 S2 x_T = (TYPE) x_t;
2464 S3 sum_1 = x_T + sum_0;
2465
2466 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2467 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2468 a special case of a reduction computation.
2469
2470 Input:
2471
2472 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2473 when this function is called with S3, the pattern {S2,S3} will be detected.
2474
2475 Output:
2476
2477 * TYPE_OUT: The type of the output of this pattern.
2478
2479 * Return value: A new stmt that will be used to replace the sequence of
2480 stmts that constitute the pattern. In this case it will be:
2481 WIDEN_SUM <x_t, sum_0>
2482
2483 Note: The widening-sum idiom is a widening reduction pattern that is
2484 vectorized without preserving all the intermediate results. It
2485 produces only N/2 (widened) results (by summing up pairs of
2486 intermediate results) rather than all N results. Therefore, we
2487 cannot allow this pattern when we want to get all the results and in
2488 the correct order (as is the case when this computation is in an
2489 inner-loop nested in an outer-loop that us being vectorized). */
2490
2491static gimple *
2492vect_recog_widen_sum_pattern (vec_info *vinfo,
2493 stmt_vec_info stmt_vinfo, tree *type_out)
2494{
2495 gimple *last_stmt = stmt_vinfo->stmt;
2496 tree oprnd0, oprnd1;
2497 tree type;
2498 gimple *pattern_stmt;
2499 tree var;
2500
2501 /* Look for the following pattern
2502 DX = (TYPE) X;
2503 sum_1 = DX + sum_0;
2504 In which DX is at least double the size of X, and sum_1 has been
2505 recognized as a reduction variable.
2506 */
2507
2508 /* Starting from LAST_STMT, follow the defs of its uses in search
2509 of the above pattern. */
2510
2511 if (!vect_reassociating_reduction_p (vinfo, stmt_info: stmt_vinfo, code: PLUS_EXPR,
2512 op0_out: &oprnd0, op1_out: &oprnd1)
2513 || TREE_CODE (oprnd0) != SSA_NAME
2514 || !vinfo->lookup_def (oprnd0))
2515 return NULL;
2516
2517 type = TREE_TYPE (gimple_get_lhs (last_stmt));
2518
2519 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2520 we know that oprnd1 is the reduction variable (defined by a loop-header
2521 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2522 Left to check that oprnd0 is defined by a cast from type 'type' to type
2523 'TYPE'. */
2524
2525 vect_unpromoted_value unprom0;
2526 if (!vect_look_through_possible_promotion (vinfo, op: oprnd0, unprom: &unprom0)
2527 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
2528 return NULL;
2529
2530 vect_pattern_detected (name: "vect_recog_widen_sum_pattern", stmt: last_stmt);
2531
2532 if (!vect_supportable_direct_optab_p (vinfo, otype: type, code: WIDEN_SUM_EXPR,
2533 itype: unprom0.type, vecotype_out: type_out))
2534 return NULL;
2535
2536 var = vect_recog_temp_ssa_var (type, NULL);
2537 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
2538
2539 return pattern_stmt;
2540}
2541
2542/* Function vect_recog_bitfield_ref_pattern
2543
2544 Try to find the following pattern:
2545
2546 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2547 result = (type_out) bf_value;
2548
2549 or
2550
2551 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2552
2553 where type_out is a non-bitfield type, that is to say, it's precision matches
2554 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2555
2556 Input:
2557
2558 * STMT_VINFO: The stmt from which the pattern search begins.
2559 here it starts with:
2560 result = (type_out) bf_value;
2561
2562 or
2563
2564 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2565
2566 Output:
2567
2568 * TYPE_OUT: The vector type of the output of this pattern.
2569
2570 * Return value: A new stmt that will be used to replace the sequence of
2571 stmts that constitute the pattern. If the precision of type_out is bigger
2572 than the precision type of _1 we perform the widening before the shifting,
2573 since the new precision will be large enough to shift the value and moving
2574 widening operations up the statement chain enables the generation of
2575 widening loads. If we are widening and the operation after the pattern is
2576 an addition then we mask first and shift later, to enable the generation of
2577 shifting adds. In the case of narrowing we will always mask first, shift
2578 last and then perform a narrowing operation. This will enable the
2579 generation of narrowing shifts.
2580
2581 Widening with mask first, shift later:
2582 container = (type_out) container;
2583 masked = container & (((1 << bitsize) - 1) << bitpos);
2584 result = masked >> bitpos;
2585
2586 Widening with shift first, mask last:
2587 container = (type_out) container;
2588 shifted = container >> bitpos;
2589 result = shifted & ((1 << bitsize) - 1);
2590
2591 Narrowing:
2592 masked = container & (((1 << bitsize) - 1) << bitpos);
2593 result = masked >> bitpos;
2594 result = (type_out) result;
2595
2596 If the bitfield is signed and it's wider than type_out, we need to
2597 keep the result sign-extended:
2598 container = (type) container;
2599 masked = container << (prec - bitsize - bitpos);
2600 result = (type_out) (masked >> (prec - bitsize));
2601
2602 Here type is the signed variant of the wider of type_out and the type
2603 of container.
2604
2605 The shifting is always optional depending on whether bitpos != 0.
2606
2607 When the original bitfield was inside a gcond then an new gcond is also
2608 generated with the newly `result` as the operand to the comparison.
2609
2610*/
2611
2612static gimple *
2613vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2614 tree *type_out)
2615{
2616 gimple *bf_stmt = NULL;
2617 tree lhs = NULL_TREE;
2618 tree ret_type = NULL_TREE;
2619 gimple *stmt = STMT_VINFO_STMT (stmt_info);
2620 if (gcond *cond_stmt = dyn_cast <gcond *> (p: stmt))
2621 {
2622 tree op = gimple_cond_lhs (gs: cond_stmt);
2623 if (TREE_CODE (op) != SSA_NAME)
2624 return NULL;
2625 bf_stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (op));
2626 if (TREE_CODE (gimple_cond_rhs (cond_stmt)) != INTEGER_CST)
2627 return NULL;
2628 }
2629 else if (is_gimple_assign (gs: stmt)
2630 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
2631 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
2632 {
2633 gimple *second_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2634 bf_stmt = dyn_cast <gassign *> (p: second_stmt);
2635 lhs = gimple_assign_lhs (gs: stmt);
2636 ret_type = TREE_TYPE (lhs);
2637 }
2638
2639 if (!bf_stmt
2640 || gimple_assign_rhs_code (gs: bf_stmt) != BIT_FIELD_REF)
2641 return NULL;
2642
2643 tree bf_ref = gimple_assign_rhs1 (gs: bf_stmt);
2644 tree container = TREE_OPERAND (bf_ref, 0);
2645 ret_type = ret_type ? ret_type : TREE_TYPE (container);
2646
2647 if (!bit_field_offset (t: bf_ref).is_constant ()
2648 || !bit_field_size (t: bf_ref).is_constant ()
2649 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container))))
2650 return NULL;
2651
2652 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref))
2653 || !INTEGRAL_TYPE_P (TREE_TYPE (container))
2654 || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode)
2655 return NULL;
2656
2657 gimple *use_stmt, *pattern_stmt;
2658 use_operand_p use_p;
2659 bool shift_first = true;
2660 tree container_type = TREE_TYPE (container);
2661 tree vectype = get_vectype_for_scalar_type (vinfo, container_type);
2662
2663 /* Calculate shift_n before the adjustments for widening loads, otherwise
2664 the container may change and we have to consider offset change for
2665 widening loads on big endianness. The shift_n calculated here can be
2666 independent of widening. */
2667 unsigned HOST_WIDE_INT shift_n = bit_field_offset (t: bf_ref).to_constant ();
2668 unsigned HOST_WIDE_INT mask_width = bit_field_size (t: bf_ref).to_constant ();
2669 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2670 if (BYTES_BIG_ENDIAN)
2671 shift_n = prec - shift_n - mask_width;
2672
2673 bool ref_sext = (!TYPE_UNSIGNED (TREE_TYPE (bf_ref)) &&
2674 TYPE_PRECISION (ret_type) > mask_width);
2675 bool load_widen = (TYPE_PRECISION (TREE_TYPE (container)) <
2676 TYPE_PRECISION (ret_type));
2677
2678 /* We move the conversion earlier if the loaded type is smaller than the
2679 return type to enable the use of widening loads. And if we need a
2680 sign extension, we need to convert the loaded value early to a signed
2681 type as well. */
2682 if (ref_sext || load_widen)
2683 {
2684 tree type = load_widen ? ret_type : container_type;
2685 if (ref_sext)
2686 type = gimple_signed_type (type);
2687 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type),
2688 NOP_EXPR, container);
2689 container = gimple_get_lhs (pattern_stmt);
2690 container_type = TREE_TYPE (container);
2691 prec = tree_to_uhwi (TYPE_SIZE (container_type));
2692 vectype = get_vectype_for_scalar_type (vinfo, container_type);
2693 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2694 }
2695 else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type))
2696 /* If we are doing the conversion last then also delay the shift as we may
2697 be able to combine the shift and conversion in certain cases. */
2698 shift_first = false;
2699
2700 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2701 PLUS_EXPR then do the shift last as some targets can combine the shift and
2702 add into a single instruction. */
2703 if (lhs && single_imm_use (var: lhs, use_p: &use_p, stmt: &use_stmt))
2704 {
2705 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
2706 && gimple_assign_rhs_code (gs: use_stmt) == PLUS_EXPR)
2707 shift_first = false;
2708 }
2709
2710 /* If we don't have to shift we only generate the mask, so just fix the
2711 code-path to shift_first. */
2712 if (shift_n == 0)
2713 shift_first = true;
2714
2715 tree result;
2716 if (shift_first && !ref_sext)
2717 {
2718 tree shifted = container;
2719 if (shift_n)
2720 {
2721 pattern_stmt
2722 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2723 RSHIFT_EXPR, container,
2724 build_int_cst (sizetype, shift_n));
2725 shifted = gimple_assign_lhs (gs: pattern_stmt);
2726 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2727 }
2728
2729 tree mask = wide_int_to_tree (type: container_type,
2730 cst: wi::mask (width: mask_width, negate_p: false, precision: prec));
2731
2732 pattern_stmt
2733 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2734 BIT_AND_EXPR, shifted, mask);
2735 result = gimple_assign_lhs (gs: pattern_stmt);
2736 }
2737 else
2738 {
2739 tree temp = vect_recog_temp_ssa_var (type: container_type);
2740 if (!ref_sext)
2741 {
2742 tree mask = wide_int_to_tree (type: container_type,
2743 cst: wi::shifted_mask (start: shift_n,
2744 width: mask_width,
2745 negate_p: false, precision: prec));
2746 pattern_stmt = gimple_build_assign (temp, BIT_AND_EXPR,
2747 container, mask);
2748 }
2749 else
2750 {
2751 HOST_WIDE_INT shl = prec - shift_n - mask_width;
2752 shift_n += shl;
2753 pattern_stmt = gimple_build_assign (temp, LSHIFT_EXPR,
2754 container,
2755 build_int_cst (sizetype,
2756 shl));
2757 }
2758
2759 tree masked = gimple_assign_lhs (gs: pattern_stmt);
2760 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2761 pattern_stmt
2762 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2763 RSHIFT_EXPR, masked,
2764 build_int_cst (sizetype, shift_n));
2765 result = gimple_assign_lhs (gs: pattern_stmt);
2766 }
2767
2768 if (!useless_type_conversion_p (TREE_TYPE (result), ret_type))
2769 {
2770 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2771 pattern_stmt
2772 = gimple_build_assign (vect_recog_temp_ssa_var (type: ret_type),
2773 NOP_EXPR, result);
2774 }
2775
2776 if (!lhs)
2777 {
2778 if (!vectype)
2779 return NULL;
2780
2781 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt, vectype);
2782 vectype = truth_type_for (vectype);
2783
2784 /* FIXME: This part extracts the boolean value out of the bitfield in the
2785 same way as vect_recog_gcond_pattern does. However because
2786 patterns cannot match the same root twice, when we handle and
2787 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2788 apply anymore. We should really fix it so that we don't need to
2789 duplicate transformations like these. */
2790 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
2791 gcond *cond_stmt = dyn_cast <gcond *> (p: stmt_info->stmt);
2792 tree cond_cst = gimple_cond_rhs (gs: cond_stmt);
2793 gimple *new_stmt
2794 = gimple_build_assign (new_lhs, gimple_cond_code (gs: cond_stmt),
2795 gimple_get_lhs (pattern_stmt),
2796 fold_convert (container_type, cond_cst));
2797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype, scalar_type_for_mask: container_type);
2798 pattern_stmt
2799 = gimple_build_cond (NE_EXPR, new_lhs,
2800 build_zero_cst (TREE_TYPE (new_lhs)),
2801 NULL_TREE, NULL_TREE);
2802 }
2803
2804 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2805 vect_pattern_detected (name: "bitfield_ref pattern", stmt: stmt_info->stmt);
2806
2807 return pattern_stmt;
2808}
2809
2810/* Function vect_recog_bit_insert_pattern
2811
2812 Try to find the following pattern:
2813
2814 written = BIT_INSERT_EXPR (container, value, bitpos);
2815
2816 Input:
2817
2818 * STMT_VINFO: The stmt we want to replace.
2819
2820 Output:
2821
2822 * TYPE_OUT: The vector type of the output of this pattern.
2823
2824 * Return value: A new stmt that will be used to replace the sequence of
2825 stmts that constitute the pattern. In this case it will be:
2826 value = (container_type) value; // Make sure
2827 shifted = value << bitpos; // Shift value into place
2828 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2829 // the 'to-write value'.
2830 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2831 // write to from the value we want
2832 // to write to.
2833 written = cleared | masked; // Write bits.
2834
2835
2836 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2837 bits corresponding to the real size of the bitfield value we are writing to.
2838 The shifting is always optional depending on whether bitpos != 0.
2839
2840*/
2841
2842static gimple *
2843vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info,
2844 tree *type_out)
2845{
2846 gassign *bf_stmt = dyn_cast <gassign *> (p: stmt_info->stmt);
2847 if (!bf_stmt || gimple_assign_rhs_code (gs: bf_stmt) != BIT_INSERT_EXPR)
2848 return NULL;
2849
2850 tree container = gimple_assign_rhs1 (gs: bf_stmt);
2851 tree value = gimple_assign_rhs2 (gs: bf_stmt);
2852 tree shift = gimple_assign_rhs3 (gs: bf_stmt);
2853
2854 tree bf_type = TREE_TYPE (value);
2855 tree container_type = TREE_TYPE (container);
2856
2857 if (!INTEGRAL_TYPE_P (container_type)
2858 || !tree_fits_uhwi_p (TYPE_SIZE (container_type)))
2859 return NULL;
2860
2861 gimple *pattern_stmt;
2862
2863 vect_unpromoted_value unprom;
2864 unprom.set_op (op_in: value, dt_in: vect_internal_def);
2865 value = vect_convert_input (vinfo, stmt_info, type: container_type, unprom: &unprom,
2866 vectype: get_vectype_for_scalar_type (vinfo,
2867 container_type));
2868
2869 unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type);
2870 unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type));
2871 unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift);
2872 if (BYTES_BIG_ENDIAN)
2873 {
2874 shift_n = prec - shift_n - mask_width;
2875 shift = build_int_cst (TREE_TYPE (shift), shift_n);
2876 }
2877
2878 if (!useless_type_conversion_p (TREE_TYPE (value), container_type))
2879 {
2880 pattern_stmt =
2881 gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2882 NOP_EXPR, value);
2883 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2884 value = gimple_get_lhs (pattern_stmt);
2885 }
2886
2887 /* Shift VALUE into place. */
2888 tree shifted = value;
2889 if (shift_n)
2890 {
2891 gimple_seq stmts = NULL;
2892 shifted
2893 = gimple_build (seq: &stmts, code: LSHIFT_EXPR, type: container_type, ops: value, ops: shift);
2894 if (!gimple_seq_empty_p (s: stmts))
2895 append_pattern_def_seq (vinfo, stmt_info,
2896 new_stmt: gimple_seq_first_stmt (s: stmts));
2897 }
2898
2899 tree mask_t
2900 = wide_int_to_tree (type: container_type,
2901 cst: wi::shifted_mask (start: shift_n, width: mask_width, negate_p: false, precision: prec));
2902
2903 /* Clear bits we don't want to write back from SHIFTED. */
2904 gimple_seq stmts = NULL;
2905 tree masked = gimple_build (seq: &stmts, code: BIT_AND_EXPR, type: container_type, ops: shifted,
2906 ops: mask_t);
2907 if (!gimple_seq_empty_p (s: stmts))
2908 {
2909 pattern_stmt = gimple_seq_first_stmt (s: stmts);
2910 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2911 }
2912
2913 /* Mask off the bits in the container that we are to write to. */
2914 mask_t = wide_int_to_tree (type: container_type,
2915 cst: wi::shifted_mask (start: shift_n, width: mask_width, negate_p: true, precision: prec));
2916 tree cleared = vect_recog_temp_ssa_var (type: container_type);
2917 pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t);
2918 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt);
2919
2920 /* Write MASKED into CLEARED. */
2921 pattern_stmt
2922 = gimple_build_assign (vect_recog_temp_ssa_var (type: container_type),
2923 BIT_IOR_EXPR, cleared, masked);
2924
2925 *type_out = STMT_VINFO_VECTYPE (stmt_info);
2926 vect_pattern_detected (name: "bit_insert pattern", stmt: stmt_info->stmt);
2927
2928 return pattern_stmt;
2929}
2930
2931
2932/* Recognize cases in which an operation is performed in one type WTYPE
2933 but could be done more efficiently in a narrower type NTYPE. For example,
2934 if we have:
2935
2936 ATYPE a; // narrower than NTYPE
2937 BTYPE b; // narrower than NTYPE
2938 WTYPE aw = (WTYPE) a;
2939 WTYPE bw = (WTYPE) b;
2940 WTYPE res = aw + bw; // only uses of aw and bw
2941
2942 then it would be more efficient to do:
2943
2944 NTYPE an = (NTYPE) a;
2945 NTYPE bn = (NTYPE) b;
2946 NTYPE resn = an + bn;
2947 WTYPE res = (WTYPE) resn;
2948
2949 Other situations include things like:
2950
2951 ATYPE a; // NTYPE or narrower
2952 WTYPE aw = (WTYPE) a;
2953 WTYPE res = aw + b;
2954
2955 when only "(NTYPE) res" is significant. In that case it's more efficient
2956 to truncate "b" and do the operation on NTYPE instead:
2957
2958 NTYPE an = (NTYPE) a;
2959 NTYPE bn = (NTYPE) b; // truncation
2960 NTYPE resn = an + bn;
2961 WTYPE res = (WTYPE) resn;
2962
2963 All users of "res" should then use "resn" instead, making the final
2964 statement dead (not marked as relevant). The final statement is still
2965 needed to maintain the type correctness of the IR.
2966
2967 vect_determine_precisions has already determined the minimum
2968 precison of the operation and the minimum precision required
2969 by users of the result. */
2970
2971static gimple *
2972vect_recog_over_widening_pattern (vec_info *vinfo,
2973 stmt_vec_info last_stmt_info, tree *type_out)
2974{
2975 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
2976 if (!last_stmt)
2977 return NULL;
2978
2979 /* See whether we have found that this operation can be done on a
2980 narrower type without changing its semantics. */
2981 unsigned int new_precision = last_stmt_info->operation_precision;
2982 if (!new_precision)
2983 return NULL;
2984
2985 tree lhs = gimple_assign_lhs (gs: last_stmt);
2986 tree type = TREE_TYPE (lhs);
2987 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
2988
2989 /* Punt for reductions where we don't handle the type conversions. */
2990 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
2991 return NULL;
2992
2993 /* Keep the first operand of a COND_EXPR as-is: only the other two
2994 operands are interesting. */
2995 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
2996
2997 /* Check the operands. */
2998 unsigned int nops = gimple_num_ops (gs: last_stmt) - first_op;
2999 auto_vec <vect_unpromoted_value, 3> unprom (nops);
3000 unprom.quick_grow_cleared (len: nops);
3001 unsigned int min_precision = 0;
3002 bool single_use_p = false;
3003 for (unsigned int i = 0; i < nops; ++i)
3004 {
3005 tree op = gimple_op (gs: last_stmt, i: first_op + i);
3006 if (TREE_CODE (op) == INTEGER_CST)
3007 unprom[i].set_op (op_in: op, dt_in: vect_constant_def);
3008 else if (TREE_CODE (op) == SSA_NAME)
3009 {
3010 bool op_single_use_p = true;
3011 if (!vect_look_through_possible_promotion (vinfo, op, unprom: &unprom[i],
3012 single_use_p: &op_single_use_p))
3013 return NULL;
3014 /* If:
3015
3016 (1) N bits of the result are needed;
3017 (2) all inputs are widened from M<N bits; and
3018 (3) one operand OP is a single-use SSA name
3019
3020 we can shift the M->N widening from OP to the output
3021 without changing the number or type of extensions involved.
3022 This then reduces the number of copies of STMT_INFO.
3023
3024 If instead of (3) more than one operand is a single-use SSA name,
3025 shifting the extension to the output is even more of a win.
3026
3027 If instead:
3028
3029 (1) N bits of the result are needed;
3030 (2) one operand OP2 is widened from M2<N bits;
3031 (3) another operand OP1 is widened from M1<M2 bits; and
3032 (4) both OP1 and OP2 are single-use
3033
3034 the choice is between:
3035
3036 (a) truncating OP2 to M1, doing the operation on M1,
3037 and then widening the result to N
3038
3039 (b) widening OP1 to M2, doing the operation on M2, and then
3040 widening the result to N
3041
3042 Both shift the M2->N widening of the inputs to the output.
3043 (a) additionally shifts the M1->M2 widening to the output;
3044 it requires fewer copies of STMT_INFO but requires an extra
3045 M2->M1 truncation.
3046
3047 Which is better will depend on the complexity and cost of
3048 STMT_INFO, which is hard to predict at this stage. However,
3049 a clear tie-breaker in favor of (b) is the fact that the
3050 truncation in (a) increases the length of the operation chain.
3051
3052 If instead of (4) only one of OP1 or OP2 is single-use,
3053 (b) is still a win over doing the operation in N bits:
3054 it still shifts the M2->N widening on the single-use operand
3055 to the output and reduces the number of STMT_INFO copies.
3056
3057 If neither operand is single-use then operating on fewer than
3058 N bits might lead to more extensions overall. Whether it does
3059 or not depends on global information about the vectorization
3060 region, and whether that's a good trade-off would again
3061 depend on the complexity and cost of the statements involved,
3062 as well as things like register pressure that are not normally
3063 modelled at this stage. We therefore ignore these cases
3064 and just optimize the clear single-use wins above.
3065
3066 Thus we take the maximum precision of the unpromoted operands
3067 and record whether any operand is single-use. */
3068 if (unprom[i].dt == vect_internal_def)
3069 {
3070 min_precision = MAX (min_precision,
3071 TYPE_PRECISION (unprom[i].type));
3072 single_use_p |= op_single_use_p;
3073 }
3074 }
3075 else
3076 return NULL;
3077 }
3078
3079 /* Although the operation could be done in operation_precision, we have
3080 to balance that against introducing extra truncations or extensions.
3081 Calculate the minimum precision that can be handled efficiently.
3082
3083 The loop above determined that the operation could be handled
3084 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3085 extension from the inputs to the output without introducing more
3086 instructions, and would reduce the number of instructions required
3087 for STMT_INFO itself.
3088
3089 vect_determine_precisions has also determined that the result only
3090 needs min_output_precision bits. Truncating by a factor of N times
3091 requires a tree of N - 1 instructions, so if TYPE is N times wider
3092 than min_output_precision, doing the operation in TYPE and truncating
3093 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3094 In contrast:
3095
3096 - truncating the input to a unary operation and doing the operation
3097 in the new type requires at most N - 1 + 1 = N instructions per
3098 output vector
3099
3100 - doing the same for a binary operation requires at most
3101 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3102
3103 Both unary and binary operations require fewer instructions than
3104 this if the operands were extended from a suitable truncated form.
3105 Thus there is usually nothing to lose by doing operations in
3106 min_output_precision bits, but there can be something to gain. */
3107 if (!single_use_p)
3108 min_precision = last_stmt_info->min_output_precision;
3109 else
3110 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
3111
3112 /* Apply the minimum efficient precision we just calculated. */
3113 if (new_precision < min_precision)
3114 new_precision = min_precision;
3115 new_precision = vect_element_precision (precision: new_precision);
3116 if (new_precision >= TYPE_PRECISION (type))
3117 return NULL;
3118
3119 vect_pattern_detected (name: "vect_recog_over_widening_pattern", stmt: last_stmt);
3120
3121 *type_out = get_vectype_for_scalar_type (vinfo, type);
3122 if (!*type_out)
3123 return NULL;
3124
3125 /* We've found a viable pattern. Get the new type of the operation. */
3126 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
3127 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
3128
3129 /* If we're truncating an operation, we need to make sure that we
3130 don't introduce new undefined overflow. The codes tested here are
3131 a subset of those accepted by vect_truncatable_operation_p. */
3132 tree op_type = new_type;
3133 if (TYPE_OVERFLOW_UNDEFINED (new_type)
3134 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
3135 op_type = build_nonstandard_integer_type (new_precision, true);
3136
3137 /* We specifically don't check here whether the target supports the
3138 new operation, since it might be something that a later pattern
3139 wants to rewrite anyway. If targets have a minimum element size
3140 for some optabs, we should pattern-match smaller ops to larger ops
3141 where beneficial. */
3142 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3143 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
3144 if (!new_vectype || !op_vectype)
3145 return NULL;
3146
3147 if (dump_enabled_p ())
3148 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
3149 type, new_type);
3150
3151 /* Calculate the rhs operands for an operation on OP_TYPE. */
3152 tree ops[3] = {};
3153 for (unsigned int i = 1; i < first_op; ++i)
3154 ops[i - 1] = gimple_op (gs: last_stmt, i);
3155 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: nops, result: &ops[first_op - 1],
3156 type: op_type, unprom: &unprom[0], vectype: op_vectype);
3157
3158 /* Use the operation to produce a result of type OP_TYPE. */
3159 tree new_var = vect_recog_temp_ssa_var (type: op_type, NULL);
3160 gimple *pattern_stmt = gimple_build_assign (new_var, code,
3161 ops[0], ops[1], ops[2]);
3162 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
3163
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_NOTE, vect_location,
3166 "created pattern stmt: %G", pattern_stmt);
3167
3168 /* Convert back to the original signedness, if OP_TYPE is different
3169 from NEW_TYPE. */
3170 if (op_type != new_type)
3171 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type: new_type,
3172 pattern_stmt, vecitype: op_vectype);
3173
3174 /* Promote the result to the original type. */
3175 pattern_stmt = vect_convert_output (vinfo, stmt_info: last_stmt_info, type,
3176 pattern_stmt, vecitype: new_vectype);
3177
3178 return pattern_stmt;
3179}
3180
3181/* Recognize the following patterns:
3182
3183 ATYPE a; // narrower than TYPE
3184 BTYPE b; // narrower than TYPE
3185
3186 1) Multiply high with scaling
3187 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3188 Here, c is bitsize (TYPE) / 2 - 1.
3189
3190 2) ... or also with rounding
3191 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3192 Here, d is bitsize (TYPE) / 2 - 2.
3193
3194 3) Normal multiply high
3195 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3196 Here, e is bitsize (TYPE) / 2.
3197
3198 where only the bottom half of res is used. */
3199
3200static gimple *
3201vect_recog_mulhs_pattern (vec_info *vinfo,
3202 stmt_vec_info last_stmt_info, tree *type_out)
3203{
3204 /* Check for a right shift. */
3205 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3206 if (!last_stmt
3207 || gimple_assign_rhs_code (gs: last_stmt) != RSHIFT_EXPR)
3208 return NULL;
3209
3210 /* Check that the shift result is wider than the users of the
3211 result need (i.e. that narrowing would be a natural choice). */
3212 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3213 unsigned int target_precision
3214 = vect_element_precision (precision: last_stmt_info->min_output_precision);
3215 if (!INTEGRAL_TYPE_P (lhs_type)
3216 || target_precision >= TYPE_PRECISION (lhs_type))
3217 return NULL;
3218
3219 /* Look through any change in sign on the outer shift input. */
3220 vect_unpromoted_value unprom_rshift_input;
3221 tree rshift_input = vect_look_through_possible_promotion
3222 (vinfo, op: gimple_assign_rhs1 (gs: last_stmt), unprom: &unprom_rshift_input);
3223 if (!rshift_input
3224 || TYPE_PRECISION (TREE_TYPE (rshift_input))
3225 != TYPE_PRECISION (lhs_type))
3226 return NULL;
3227
3228 /* Get the definition of the shift input. */
3229 stmt_vec_info rshift_input_stmt_info
3230 = vect_get_internal_def (vinfo, op: rshift_input);
3231 if (!rshift_input_stmt_info)
3232 return NULL;
3233 gassign *rshift_input_stmt
3234 = dyn_cast <gassign *> (p: rshift_input_stmt_info->stmt);
3235 if (!rshift_input_stmt)
3236 return NULL;
3237
3238 stmt_vec_info mulh_stmt_info;
3239 tree scale_term;
3240 bool rounding_p = false;
3241
3242 /* Check for the presence of the rounding term. */
3243 if (gimple_assign_rhs_code (gs: rshift_input_stmt) == PLUS_EXPR)
3244 {
3245 /* Check that the outer shift was by 1. */
3246 if (!integer_onep (gimple_assign_rhs2 (gs: last_stmt)))
3247 return NULL;
3248
3249 /* Check that the second operand of the PLUS_EXPR is 1. */
3250 if (!integer_onep (gimple_assign_rhs2 (gs: rshift_input_stmt)))
3251 return NULL;
3252
3253 /* Look through any change in sign on the addition input. */
3254 vect_unpromoted_value unprom_plus_input;
3255 tree plus_input = vect_look_through_possible_promotion
3256 (vinfo, op: gimple_assign_rhs1 (gs: rshift_input_stmt), unprom: &unprom_plus_input);
3257 if (!plus_input
3258 || TYPE_PRECISION (TREE_TYPE (plus_input))
3259 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
3260 return NULL;
3261
3262 /* Get the definition of the multiply-high-scale part. */
3263 stmt_vec_info plus_input_stmt_info
3264 = vect_get_internal_def (vinfo, op: plus_input);
3265 if (!plus_input_stmt_info)
3266 return NULL;
3267 gassign *plus_input_stmt
3268 = dyn_cast <gassign *> (p: plus_input_stmt_info->stmt);
3269 if (!plus_input_stmt
3270 || gimple_assign_rhs_code (gs: plus_input_stmt) != RSHIFT_EXPR)
3271 return NULL;
3272
3273 /* Look through any change in sign on the scaling input. */
3274 vect_unpromoted_value unprom_scale_input;
3275 tree scale_input = vect_look_through_possible_promotion
3276 (vinfo, op: gimple_assign_rhs1 (gs: plus_input_stmt), unprom: &unprom_scale_input);
3277 if (!scale_input
3278 || TYPE_PRECISION (TREE_TYPE (scale_input))
3279 != TYPE_PRECISION (TREE_TYPE (plus_input)))
3280 return NULL;
3281
3282 /* Get the definition of the multiply-high part. */
3283 mulh_stmt_info = vect_get_internal_def (vinfo, op: scale_input);
3284 if (!mulh_stmt_info)
3285 return NULL;
3286
3287 /* Get the scaling term. */
3288 scale_term = gimple_assign_rhs2 (gs: plus_input_stmt);
3289 rounding_p = true;
3290 }
3291 else
3292 {
3293 mulh_stmt_info = rshift_input_stmt_info;
3294 scale_term = gimple_assign_rhs2 (gs: last_stmt);
3295 }
3296
3297 /* Check that the scaling factor is constant. */
3298 if (TREE_CODE (scale_term) != INTEGER_CST)
3299 return NULL;
3300
3301 /* Check whether the scaling input term can be seen as two widened
3302 inputs multiplied together. */
3303 vect_unpromoted_value unprom_mult[2];
3304 tree new_type;
3305 unsigned int nops
3306 = vect_widened_op_tree (vinfo, stmt_info: mulh_stmt_info, code: MULT_EXPR, widened_code: WIDEN_MULT_EXPR,
3307 shift_p: false, max_nops: 2, unprom: unprom_mult, common_type: &new_type);
3308 if (nops != 2)
3309 return NULL;
3310
3311 /* Adjust output precision. */
3312 if (TYPE_PRECISION (new_type) < target_precision)
3313 new_type = build_nonstandard_integer_type
3314 (target_precision, TYPE_UNSIGNED (new_type));
3315
3316 unsigned mult_precision = TYPE_PRECISION (new_type);
3317 internal_fn ifn;
3318 /* Check that the scaling factor is expected. Instead of
3319 target_precision, we should use the one that we actually
3320 use for internal function. */
3321 if (rounding_p)
3322 {
3323 /* Check pattern 2). */
3324 if (wi::to_widest (t: scale_term) + mult_precision + 2
3325 != TYPE_PRECISION (lhs_type))
3326 return NULL;
3327
3328 ifn = IFN_MULHRS;
3329 }
3330 else
3331 {
3332 /* Check for pattern 1). */
3333 if (wi::to_widest (t: scale_term) + mult_precision + 1
3334 == TYPE_PRECISION (lhs_type))
3335 ifn = IFN_MULHS;
3336 /* Check for pattern 3). */
3337 else if (wi::to_widest (t: scale_term) + mult_precision
3338 == TYPE_PRECISION (lhs_type))
3339 ifn = IFN_MULH;
3340 else
3341 return NULL;
3342 }
3343
3344 vect_pattern_detected (name: "vect_recog_mulhs_pattern", stmt: last_stmt);
3345
3346 /* Check for target support. */
3347 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3348 if (!new_vectype
3349 || !direct_internal_fn_supported_p
3350 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3351 return NULL;
3352
3353 /* The IR requires a valid vector type for the cast result, even though
3354 it's likely to be discarded. */
3355 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3356 if (!*type_out)
3357 return NULL;
3358
3359 /* Generate the IFN_MULHRS call. */
3360 tree new_var = vect_recog_temp_ssa_var (type: new_type, NULL);
3361 tree new_ops[2];
3362 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: 2, result: new_ops, type: new_type,
3363 unprom: unprom_mult, vectype: new_vectype);
3364 gcall *mulhrs_stmt
3365 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
3366 gimple_call_set_lhs (gs: mulhrs_stmt, lhs: new_var);
3367 gimple_set_location (g: mulhrs_stmt, location: gimple_location (g: last_stmt));
3368
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_NOTE, vect_location,
3371 "created pattern stmt: %G", (gimple *) mulhrs_stmt);
3372
3373 return vect_convert_output (vinfo, stmt_info: last_stmt_info, type: lhs_type,
3374 pattern_stmt: mulhrs_stmt, vecitype: new_vectype);
3375}
3376
3377/* Recognize the patterns:
3378
3379 ATYPE a; // narrower than TYPE
3380 BTYPE b; // narrower than TYPE
3381 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3382 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3383
3384 where only the bottom half of avg is used. Try to transform them into:
3385
3386 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3387 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3388
3389 followed by:
3390
3391 TYPE avg = (TYPE) avg';
3392
3393 where NTYPE is no wider than half of TYPE. Since only the bottom half
3394 of avg is used, all or part of the cast of avg' should become redundant.
3395
3396 If there is no target support available, generate code to distribute rshift
3397 over plus and add a carry. */
3398
3399static gimple *
3400vect_recog_average_pattern (vec_info *vinfo,
3401 stmt_vec_info last_stmt_info, tree *type_out)
3402{
3403 /* Check for a shift right by one bit. */
3404 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3405 if (!last_stmt
3406 || gimple_assign_rhs_code (gs: last_stmt) != RSHIFT_EXPR
3407 || !integer_onep (gimple_assign_rhs2 (gs: last_stmt)))
3408 return NULL;
3409
3410 /* Check that the shift result is wider than the users of the
3411 result need (i.e. that narrowing would be a natural choice). */
3412 tree lhs = gimple_assign_lhs (gs: last_stmt);
3413 tree type = TREE_TYPE (lhs);
3414 unsigned int target_precision
3415 = vect_element_precision (precision: last_stmt_info->min_output_precision);
3416 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
3417 return NULL;
3418
3419 /* Look through any change in sign on the shift input. */
3420 tree rshift_rhs = gimple_assign_rhs1 (gs: last_stmt);
3421 vect_unpromoted_value unprom_plus;
3422 rshift_rhs = vect_look_through_possible_promotion (vinfo, op: rshift_rhs,
3423 unprom: &unprom_plus);
3424 if (!rshift_rhs
3425 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
3426 return NULL;
3427
3428 /* Get the definition of the shift input. */
3429 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, op: rshift_rhs);
3430 if (!plus_stmt_info)
3431 return NULL;
3432
3433 /* Check whether the shift input can be seen as a tree of additions on
3434 2 or 3 widened inputs.
3435
3436 Note that the pattern should be a win even if the result of one or
3437 more additions is reused elsewhere: if the pattern matches, we'd be
3438 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3439 internal_fn ifn = IFN_AVG_FLOOR;
3440 vect_unpromoted_value unprom[3];
3441 tree new_type;
3442 unsigned int nops = vect_widened_op_tree (vinfo, stmt_info: plus_stmt_info, code: PLUS_EXPR,
3443 widened_code: IFN_VEC_WIDEN_PLUS, shift_p: false, max_nops: 3,
3444 unprom, common_type: &new_type);
3445 if (nops == 0)
3446 return NULL;
3447 if (nops == 3)
3448 {
3449 /* Check that one operand is 1. */
3450 unsigned int i;
3451 for (i = 0; i < 3; ++i)
3452 if (integer_onep (unprom[i].op))
3453 break;
3454 if (i == 3)
3455 return NULL;
3456 /* Throw away the 1 operand and keep the other two. */
3457 if (i < 2)
3458 unprom[i] = unprom[2];
3459 ifn = IFN_AVG_CEIL;
3460 }
3461
3462 vect_pattern_detected (name: "vect_recog_average_pattern", stmt: last_stmt);
3463
3464 /* We know that:
3465
3466 (a) the operation can be viewed as:
3467
3468 TYPE widened0 = (TYPE) UNPROM[0];
3469 TYPE widened1 = (TYPE) UNPROM[1];
3470 TYPE tmp1 = widened0 + widened1 {+ 1};
3471 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3472
3473 (b) the first two statements are equivalent to:
3474
3475 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3476 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3477
3478 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3479 where sensible;
3480
3481 (d) all the operations can be performed correctly at twice the width of
3482 NEW_TYPE, due to the nature of the average operation; and
3483
3484 (e) users of the result of the right shift need only TARGET_PRECISION
3485 bits, where TARGET_PRECISION is no more than half of TYPE's
3486 precision.
3487
3488 Under these circumstances, the only situation in which NEW_TYPE
3489 could be narrower than TARGET_PRECISION is if widened0, widened1
3490 and an addition result are all used more than once. Thus we can
3491 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3492 as "free", whereas widening the result of the average instruction
3493 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3494 therefore better not to go narrower than TARGET_PRECISION. */
3495 if (TYPE_PRECISION (new_type) < target_precision)
3496 new_type = build_nonstandard_integer_type (target_precision,
3497 TYPE_UNSIGNED (new_type));
3498
3499 /* Check for target support. */
3500 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
3501 if (!new_vectype)
3502 return NULL;
3503
3504 bool fallback_p = false;
3505
3506 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
3507 ;
3508 else if (TYPE_UNSIGNED (new_type)
3509 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
3510 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
3511 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
3512 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
3513 fallback_p = true;
3514 else
3515 return NULL;
3516
3517 /* The IR requires a valid vector type for the cast result, even though
3518 it's likely to be discarded. */
3519 *type_out = get_vectype_for_scalar_type (vinfo, type);
3520 if (!*type_out)
3521 return NULL;
3522
3523 tree new_var = vect_recog_temp_ssa_var (type: new_type, NULL);
3524 tree new_ops[2];
3525 vect_convert_inputs (vinfo, stmt_info: last_stmt_info, n: 2, result: new_ops, type: new_type,
3526 unprom, vectype: new_vectype);
3527
3528 if (fallback_p)
3529 {
3530 /* As a fallback, generate code for following sequence:
3531
3532 shifted_op0 = new_ops[0] >> 1;
3533 shifted_op1 = new_ops[1] >> 1;
3534 sum_of_shifted = shifted_op0 + shifted_op1;
3535 unmasked_carry = new_ops[0] and/or new_ops[1];
3536 carry = unmasked_carry & 1;
3537 new_var = sum_of_shifted + carry;
3538 */
3539
3540 tree one_cst = build_one_cst (new_type);
3541 gassign *g;
3542
3543 tree shifted_op0 = vect_recog_temp_ssa_var (type: new_type, NULL);
3544 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
3545 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3546
3547 tree shifted_op1 = vect_recog_temp_ssa_var (type: new_type, NULL);
3548 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
3549 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3550
3551 tree sum_of_shifted = vect_recog_temp_ssa_var (type: new_type, NULL);
3552 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
3553 shifted_op0, shifted_op1);
3554 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3555
3556 tree unmasked_carry = vect_recog_temp_ssa_var (type: new_type, NULL);
3557 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
3558 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
3559 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3560
3561 tree carry = vect_recog_temp_ssa_var (type: new_type, NULL);
3562 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
3563 append_pattern_def_seq (vinfo, stmt_info: last_stmt_info, new_stmt: g, vectype: new_vectype);
3564
3565 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
3566 return vect_convert_output (vinfo, stmt_info: last_stmt_info, type, pattern_stmt: g, vecitype: new_vectype);
3567 }
3568
3569 /* Generate the IFN_AVG* call. */
3570 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
3571 new_ops[1]);
3572 gimple_call_set_lhs (gs: average_stmt, lhs: new_var);
3573 gimple_set_location (g: average_stmt, location: gimple_location (g: last_stmt));
3574
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_NOTE, vect_location,
3577 "created pattern stmt: %G", (gimple *) average_stmt);
3578
3579 return vect_convert_output (vinfo, stmt_info: last_stmt_info,
3580 type, pattern_stmt: average_stmt, vecitype: new_vectype);
3581}
3582
3583/* Recognize cases in which the input to a cast is wider than its
3584 output, and the input is fed by a widening operation. Fold this
3585 by removing the unnecessary intermediate widening. E.g.:
3586
3587 unsigned char a;
3588 unsigned int b = (unsigned int) a;
3589 unsigned short c = (unsigned short) b;
3590
3591 -->
3592
3593 unsigned short c = (unsigned short) a;
3594
3595 Although this is rare in input IR, it is an expected side-effect
3596 of the over-widening pattern above.
3597
3598 This is beneficial also for integer-to-float conversions, if the
3599 widened integer has more bits than the float, and if the unwidened
3600 input doesn't. */
3601
3602static gimple *
3603vect_recog_cast_forwprop_pattern (vec_info *vinfo,
3604 stmt_vec_info last_stmt_info, tree *type_out)
3605{
3606 /* Check for a cast, including an integer-to-float conversion. */
3607 gassign *last_stmt = dyn_cast <gassign *> (p: last_stmt_info->stmt);
3608 if (!last_stmt)
3609 return NULL;
3610 tree_code code = gimple_assign_rhs_code (gs: last_stmt);
3611 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
3612 return NULL;
3613
3614 /* Make sure that the rhs is a scalar with a natural bitsize. */
3615 tree lhs = gimple_assign_lhs (gs: last_stmt);
3616 if (!lhs)
3617 return NULL;
3618 tree lhs_type = TREE_TYPE (lhs);
3619 scalar_mode lhs_mode;
3620 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
3621 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), result: &lhs_mode))
3622 return NULL;
3623
3624 /* Check for a narrowing operation (from a vector point of view). */
3625 tree rhs = gimple_assign_rhs1 (gs: last_stmt);
3626 tree rhs_type = TREE_TYPE (rhs);
3627 if (!INTEGRAL_TYPE_P (rhs_type)
3628 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
3629 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (mode: lhs_mode))
3630 return NULL;
3631
3632 /* Try to find an unpromoted input. */
3633 vect_unpromoted_value unprom;
3634 if (!vect_look_through_possible_promotion (vinfo, op: rhs, unprom: &unprom)
3635 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
3636 return NULL;
3637
3638 /* If the bits above RHS_TYPE matter, make sure that they're the
3639 same when extending from UNPROM as they are when extending from RHS. */
3640 if (!INTEGRAL_TYPE_P (lhs_type)
3641 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
3642 return NULL;
3643
3644 /* We can get the same result by casting UNPROM directly, to avoid
3645 the unnecessary widening and narrowing. */
3646 vect_pattern_detected (name: "vect_recog_cast_forwprop_pattern", stmt: last_stmt);
3647
3648 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
3649 if (!*type_out)
3650 return NULL;
3651
3652 tree new_var = vect_recog_temp_ssa_var (type: lhs_type, NULL);
3653 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
3654 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
3655
3656 return pattern_stmt;
3657}
3658
3659/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3660 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3661
3662static gimple *
3663vect_recog_widen_shift_pattern (vec_info *vinfo,
3664 stmt_vec_info last_stmt_info, tree *type_out)
3665{
3666 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
3667 orig_code: LSHIFT_EXPR, wide_code: WIDEN_LSHIFT_EXPR, shift_p: true,
3668 name: "vect_recog_widen_shift_pattern");
3669}
3670
3671/* Detect a rotate pattern wouldn't be otherwise vectorized:
3672
3673 type a_t, b_t, c_t;
3674
3675 S0 a_t = b_t r<< c_t;
3676
3677 Input/Output:
3678
3679 * STMT_VINFO: The stmt from which the pattern search begins,
3680 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3681 with a sequence:
3682
3683 S1 d_t = -c_t;
3684 S2 e_t = d_t & (B - 1);
3685 S3 f_t = b_t << c_t;
3686 S4 g_t = b_t >> e_t;
3687 S0 a_t = f_t | g_t;
3688
3689 where B is element bitsize of type.
3690
3691 Output:
3692
3693 * TYPE_OUT: The type of the output of this pattern.
3694
3695 * Return value: A new stmt that will be used to replace the rotate
3696 S0 stmt. */
3697
3698static gimple *
3699vect_recog_rotate_pattern (vec_info *vinfo,
3700 stmt_vec_info stmt_vinfo, tree *type_out)
3701{
3702 gimple *last_stmt = stmt_vinfo->stmt;
3703 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
3704 gimple *pattern_stmt, *def_stmt;
3705 enum tree_code rhs_code;
3706 enum vect_def_type dt;
3707 optab optab1, optab2;
3708 edge ext_def = NULL;
3709 bool bswap16_p = false;
3710
3711 if (is_gimple_assign (gs: last_stmt))
3712 {
3713 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
3714 switch (rhs_code)
3715 {
3716 case LROTATE_EXPR:
3717 case RROTATE_EXPR:
3718 break;
3719 default:
3720 return NULL;
3721 }
3722
3723 lhs = gimple_assign_lhs (gs: last_stmt);
3724 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
3725 type = TREE_TYPE (oprnd0);
3726 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
3727 }
3728 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
3729 {
3730 /* __builtin_bswap16 (x) is another form of x r>> 8.
3731 The vectorizer has bswap support, but only if the argument isn't
3732 promoted. */
3733 lhs = gimple_call_lhs (gs: last_stmt);
3734 oprnd0 = gimple_call_arg (gs: last_stmt, index: 0);
3735 type = TREE_TYPE (oprnd0);
3736 if (!lhs
3737 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
3738 || TYPE_PRECISION (type) <= 16
3739 || TREE_CODE (oprnd0) != SSA_NAME
3740 || BITS_PER_UNIT != 8)
3741 return NULL;
3742
3743 stmt_vec_info def_stmt_info;
3744 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
3745 return NULL;
3746
3747 if (dt != vect_internal_def)
3748 return NULL;
3749
3750 if (gimple_assign_cast_p (s: def_stmt))
3751 {
3752 def = gimple_assign_rhs1 (gs: def_stmt);
3753 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
3754 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
3755 oprnd0 = def;
3756 }
3757
3758 type = TREE_TYPE (lhs);
3759 vectype = get_vectype_for_scalar_type (vinfo, type);
3760 if (vectype == NULL_TREE)
3761 return NULL;
3762
3763 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
3764 {
3765 /* The encoding uses one stepped pattern for each byte in the
3766 16-bit word. */
3767 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (node: char_vectype), 2, 3);
3768 for (unsigned i = 0; i < 3; ++i)
3769 for (unsigned j = 0; j < 2; ++j)
3770 elts.quick_push (obj: (i + 1) * 2 - j - 1);
3771
3772 vec_perm_indices indices (elts, 1,
3773 TYPE_VECTOR_SUBPARTS (node: char_vectype));
3774 machine_mode vmode = TYPE_MODE (char_vectype);
3775 if (can_vec_perm_const_p (vmode, vmode, indices))
3776 {
3777 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3778 undo the argument promotion. */
3779 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3780 {
3781 def = vect_recog_temp_ssa_var (type, NULL);
3782 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3783 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
3784 oprnd0 = def;
3785 }
3786
3787 /* Pattern detected. */
3788 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3789
3790 *type_out = vectype;
3791
3792 /* Pattern supported. Create a stmt to be used to replace the
3793 pattern, with the unpromoted argument. */
3794 var = vect_recog_temp_ssa_var (type, NULL);
3795 pattern_stmt = gimple_build_call (gimple_call_fndecl (gs: last_stmt),
3796 1, oprnd0);
3797 gimple_call_set_lhs (gs: pattern_stmt, lhs: var);
3798 gimple_call_set_fntype (call_stmt: as_a <gcall *> (p: pattern_stmt),
3799 fntype: gimple_call_fntype (gs: last_stmt));
3800 return pattern_stmt;
3801 }
3802 }
3803
3804 oprnd1 = build_int_cst (integer_type_node, 8);
3805 rhs_code = LROTATE_EXPR;
3806 bswap16_p = true;
3807 }
3808 else
3809 return NULL;
3810
3811 if (TREE_CODE (oprnd0) != SSA_NAME
3812 || !INTEGRAL_TYPE_P (type)
3813 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type))
3814 return NULL;
3815
3816 stmt_vec_info def_stmt_info;
3817 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
3818 return NULL;
3819
3820 if (dt != vect_internal_def
3821 && dt != vect_constant_def
3822 && dt != vect_external_def)
3823 return NULL;
3824
3825 vectype = get_vectype_for_scalar_type (vinfo, type);
3826 if (vectype == NULL_TREE)
3827 return NULL;
3828
3829 /* If vector/vector or vector/scalar rotate is supported by the target,
3830 don't do anything here. */
3831 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
3832 if (optab1
3833 && optab_handler (op: optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3834 {
3835 use_rotate:
3836 if (bswap16_p)
3837 {
3838 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
3839 {
3840 def = vect_recog_temp_ssa_var (type, NULL);
3841 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3842 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
3843 oprnd0 = def;
3844 }
3845
3846 /* Pattern detected. */
3847 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3848
3849 *type_out = vectype;
3850
3851 /* Pattern supported. Create a stmt to be used to replace the
3852 pattern. */
3853 var = vect_recog_temp_ssa_var (type, NULL);
3854 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
3855 oprnd1);
3856 return pattern_stmt;
3857 }
3858 return NULL;
3859 }
3860
3861 if (is_a <bb_vec_info> (p: vinfo) || dt != vect_internal_def)
3862 {
3863 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
3864 if (optab2
3865 && optab_handler (op: optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3866 goto use_rotate;
3867 }
3868
3869 tree utype = unsigned_type_for (type);
3870 tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
3871 if (!uvectype)
3872 return NULL;
3873
3874 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3875 don't do anything here either. */
3876 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
3877 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
3878 if (!optab1
3879 || optab_handler (op: optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3880 || !optab2
3881 || optab_handler (op: optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3882 {
3883 if (! is_a <bb_vec_info> (p: vinfo) && dt == vect_internal_def)
3884 return NULL;
3885 optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
3886 optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
3887 if (!optab1
3888 || optab_handler (op: optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
3889 || !optab2
3890 || optab_handler (op: optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
3891 return NULL;
3892 }
3893
3894 *type_out = vectype;
3895
3896 if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
3897 {
3898 def = vect_recog_temp_ssa_var (type: utype, NULL);
3899 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
3900 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3901 oprnd0 = def;
3902 }
3903
3904 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3905 ext_def = vect_get_external_def_edge (vinfo, var: oprnd1);
3906
3907 def = NULL_TREE;
3908 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
3909 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
3910 def = oprnd1;
3911 else if (def_stmt && gimple_assign_cast_p (s: def_stmt))
3912 {
3913 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
3914 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
3915 && TYPE_PRECISION (TREE_TYPE (rhs1))
3916 == TYPE_PRECISION (type))
3917 def = rhs1;
3918 }
3919
3920 if (def == NULL_TREE)
3921 {
3922 def = vect_recog_temp_ssa_var (type: utype, NULL);
3923 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
3924 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3925 }
3926 stype = TREE_TYPE (def);
3927
3928 if (TREE_CODE (def) == INTEGER_CST)
3929 {
3930 if (!tree_fits_uhwi_p (def)
3931 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
3932 || integer_zerop (def))
3933 return NULL;
3934 def2 = build_int_cst (stype,
3935 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
3936 }
3937 else
3938 {
3939 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
3940
3941 if (vecstype == NULL_TREE)
3942 return NULL;
3943 def2 = vect_recog_temp_ssa_var (type: stype, NULL);
3944 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
3945 if (ext_def)
3946 {
3947 basic_block new_bb
3948 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3949 gcc_assert (!new_bb);
3950 }
3951 else
3952 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
3953
3954 def2 = vect_recog_temp_ssa_var (type: stype, NULL);
3955 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
3956 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
3957 gimple_assign_lhs (gs: def_stmt), mask);
3958 if (ext_def)
3959 {
3960 basic_block new_bb
3961 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
3962 gcc_assert (!new_bb);
3963 }
3964 else
3965 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
3966 }
3967
3968 var1 = vect_recog_temp_ssa_var (type: utype, NULL);
3969 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
3970 ? LSHIFT_EXPR : RSHIFT_EXPR,
3971 oprnd0, def);
3972 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3973
3974 var2 = vect_recog_temp_ssa_var (type: utype, NULL);
3975 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
3976 ? RSHIFT_EXPR : LSHIFT_EXPR,
3977 oprnd0, def2);
3978 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: uvectype);
3979
3980 /* Pattern detected. */
3981 vect_pattern_detected (name: "vect_recog_rotate_pattern", stmt: last_stmt);
3982
3983 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3984 var = vect_recog_temp_ssa_var (type: utype, NULL);
3985 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
3986
3987 if (!useless_type_conversion_p (type, utype))
3988 {
3989 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: uvectype);
3990 tree result = vect_recog_temp_ssa_var (type, NULL);
3991 pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
3992 }
3993 return pattern_stmt;
3994}
3995
3996/* Detect a vector by vector shift pattern that wouldn't be otherwise
3997 vectorized:
3998
3999 type a_t;
4000 TYPE b_T, res_T;
4001
4002 S1 a_t = ;
4003 S2 b_T = ;
4004 S3 res_T = b_T op a_t;
4005
4006 where type 'TYPE' is a type with different size than 'type',
4007 and op is <<, >> or rotate.
4008
4009 Also detect cases:
4010
4011 type a_t;
4012 TYPE b_T, c_T, res_T;
4013
4014 S0 c_T = ;
4015 S1 a_t = (type) c_T;
4016 S2 b_T = ;
4017 S3 res_T = b_T op a_t;
4018
4019 Input/Output:
4020
4021 * STMT_VINFO: The stmt from which the pattern search begins,
4022 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4023 with a shift/rotate which has same type on both operands, in the
4024 second case just b_T op c_T, in the first case with added cast
4025 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4026
4027 Output:
4028
4029 * TYPE_OUT: The type of the output of this pattern.
4030
4031 * Return value: A new stmt that will be used to replace the shift/rotate
4032 S3 stmt. */
4033
4034static gimple *
4035vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
4036 stmt_vec_info stmt_vinfo,
4037 tree *type_out)
4038{
4039 gimple *last_stmt = stmt_vinfo->stmt;
4040 tree oprnd0, oprnd1, lhs, var;
4041 gimple *pattern_stmt;
4042 enum tree_code rhs_code;
4043
4044 if (!is_gimple_assign (gs: last_stmt))
4045 return NULL;
4046
4047 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
4048 switch (rhs_code)
4049 {
4050 case LSHIFT_EXPR:
4051 case RSHIFT_EXPR:
4052 case LROTATE_EXPR:
4053 case RROTATE_EXPR:
4054 break;
4055 default:
4056 return NULL;
4057 }
4058
4059 lhs = gimple_assign_lhs (gs: last_stmt);
4060 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4061 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4062 if (TREE_CODE (oprnd0) != SSA_NAME
4063 || TREE_CODE (oprnd1) != SSA_NAME
4064 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
4065 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0))
4066 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
4067 || TYPE_PRECISION (TREE_TYPE (lhs))
4068 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
4069 return NULL;
4070
4071 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, op: oprnd1);
4072 if (!def_vinfo)
4073 return NULL;
4074
4075 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
4076 if (*type_out == NULL_TREE)
4077 return NULL;
4078
4079 tree def = NULL_TREE;
4080 gassign *def_stmt = dyn_cast <gassign *> (p: def_vinfo->stmt);
4081 if (def_stmt && gimple_assign_cast_p (s: def_stmt))
4082 {
4083 tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
4084 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
4085 && TYPE_PRECISION (TREE_TYPE (rhs1))
4086 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
4087 {
4088 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
4089 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
4090 def = rhs1;
4091 else
4092 {
4093 tree mask
4094 = build_low_bits_mask (TREE_TYPE (rhs1),
4095 TYPE_PRECISION (TREE_TYPE (oprnd1)));
4096 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4097 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
4098 tree vecstype = get_vectype_for_scalar_type (vinfo,
4099 TREE_TYPE (rhs1));
4100 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecstype);
4101 }
4102 }
4103 }
4104
4105 if (def == NULL_TREE)
4106 {
4107 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4108 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
4109 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4110 }
4111
4112 /* Pattern detected. */
4113 vect_pattern_detected (name: "vect_recog_vector_vector_shift_pattern", stmt: last_stmt);
4114
4115 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4116 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
4117 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
4118
4119 return pattern_stmt;
4120}
4121
4122/* Return true iff the target has a vector optab implementing the operation
4123 CODE on type VECTYPE. */
4124
4125static bool
4126target_has_vecop_for_code (tree_code code, tree vectype)
4127{
4128 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
4129 return voptab
4130 && optab_handler (op: voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
4131}
4132
4133/* Verify that the target has optabs of VECTYPE to perform all the steps
4134 needed by the multiplication-by-immediate synthesis algorithm described by
4135 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4136 present. Return true iff the target supports all the steps. */
4137
4138static bool
4139target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
4140 tree vectype, bool synth_shift_p)
4141{
4142 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
4143 return false;
4144
4145 bool supports_vminus = target_has_vecop_for_code (code: MINUS_EXPR, vectype);
4146 bool supports_vplus = target_has_vecop_for_code (code: PLUS_EXPR, vectype);
4147
4148 if (var == negate_variant
4149 && !target_has_vecop_for_code (code: NEGATE_EXPR, vectype))
4150 return false;
4151
4152 /* If we must synthesize shifts with additions make sure that vector
4153 addition is available. */
4154 if ((var == add_variant || synth_shift_p) && !supports_vplus)
4155 return false;
4156
4157 for (int i = 1; i < alg->ops; i++)
4158 {
4159 switch (alg->op[i])
4160 {
4161 case alg_shift:
4162 break;
4163 case alg_add_t_m2:
4164 case alg_add_t2_m:
4165 case alg_add_factor:
4166 if (!supports_vplus)
4167 return false;
4168 break;
4169 case alg_sub_t_m2:
4170 case alg_sub_t2_m:
4171 case alg_sub_factor:
4172 if (!supports_vminus)
4173 return false;
4174 break;
4175 case alg_unknown:
4176 case alg_m:
4177 case alg_zero:
4178 case alg_impossible:
4179 return false;
4180 default:
4181 gcc_unreachable ();
4182 }
4183 }
4184
4185 return true;
4186}
4187
4188/* Synthesize a left shift of OP by AMNT bits using a series of additions and
4189 putting the final result in DEST. Append all statements but the last into
4190 VINFO. Return the last statement. */
4191
4192static gimple *
4193synth_lshift_by_additions (vec_info *vinfo,
4194 tree dest, tree op, HOST_WIDE_INT amnt,
4195 stmt_vec_info stmt_info)
4196{
4197 HOST_WIDE_INT i;
4198 tree itype = TREE_TYPE (op);
4199 tree prev_res = op;
4200 gcc_assert (amnt >= 0);
4201 for (i = 0; i < amnt; i++)
4202 {
4203 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (type: itype, NULL)
4204 : dest;
4205 gimple *stmt
4206 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
4207 prev_res = tmp_var;
4208 if (i < amnt - 1)
4209 append_pattern_def_seq (vinfo, stmt_info, new_stmt: stmt);
4210 else
4211 return stmt;
4212 }
4213 gcc_unreachable ();
4214 return NULL;
4215}
4216
4217/* Helper for vect_synth_mult_by_constant. Apply a binary operation
4218 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4219 the process if necessary. Append the resulting assignment statements
4220 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4221 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4222 left shifts using additions. */
4223
4224static tree
4225apply_binop_and_append_stmt (vec_info *vinfo,
4226 tree_code code, tree op1, tree op2,
4227 stmt_vec_info stmt_vinfo, bool synth_shift_p)
4228{
4229 if (integer_zerop (op2)
4230 && (code == LSHIFT_EXPR
4231 || code == PLUS_EXPR))
4232 {
4233 gcc_assert (TREE_CODE (op1) == SSA_NAME);
4234 return op1;
4235 }
4236
4237 gimple *stmt;
4238 tree itype = TREE_TYPE (op1);
4239 tree tmp_var = vect_recog_temp_ssa_var (type: itype, NULL);
4240
4241 if (code == LSHIFT_EXPR
4242 && synth_shift_p)
4243 {
4244 stmt = synth_lshift_by_additions (vinfo, dest: tmp_var, op: op1,
4245 TREE_INT_CST_LOW (op2), stmt_info: stmt_vinfo);
4246 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4247 return tmp_var;
4248 }
4249
4250 stmt = gimple_build_assign (tmp_var, code, op1, op2);
4251 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4252 return tmp_var;
4253}
4254
4255/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4256 and simple arithmetic operations to be vectorized. Record the statements
4257 produced in STMT_VINFO and return the last statement in the sequence or
4258 NULL if it's not possible to synthesize such a multiplication.
4259 This function mirrors the behavior of expand_mult_const in expmed.cc but
4260 works on tree-ssa form. */
4261
4262static gimple *
4263vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
4264 stmt_vec_info stmt_vinfo)
4265{
4266 tree itype = TREE_TYPE (op);
4267 machine_mode mode = TYPE_MODE (itype);
4268 struct algorithm alg;
4269 mult_variant variant;
4270 if (!tree_fits_shwi_p (val))
4271 return NULL;
4272
4273 /* Multiplication synthesis by shifts, adds and subs can introduce
4274 signed overflow where the original operation didn't. Perform the
4275 operations on an unsigned type and cast back to avoid this.
4276 In the future we may want to relax this for synthesis algorithms
4277 that we can prove do not cause unexpected overflow. */
4278 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
4279
4280 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
4281 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
4282 if (!vectype)
4283 return NULL;
4284
4285 /* Targets that don't support vector shifts but support vector additions
4286 can synthesize shifts that way. */
4287 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
4288
4289 HOST_WIDE_INT hwval = tree_to_shwi (val);
4290 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4291 The vectorizer's benefit analysis will decide whether it's beneficial
4292 to do this. */
4293 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
4294 ? TYPE_MODE (vectype) : mode,
4295 hwval, &alg, &variant, MAX_COST);
4296 if (!possible)
4297 return NULL;
4298
4299 if (!target_supports_mult_synth_alg (alg: &alg, var: variant, vectype, synth_shift_p))
4300 return NULL;
4301
4302 tree accumulator;
4303
4304 /* Clear out the sequence of statements so we can populate it below. */
4305 gimple *stmt = NULL;
4306
4307 if (cast_to_unsigned_p)
4308 {
4309 tree tmp_op = vect_recog_temp_ssa_var (type: multtype, NULL);
4310 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
4311 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4312 op = tmp_op;
4313 }
4314
4315 if (alg.op[0] == alg_zero)
4316 accumulator = build_int_cst (multtype, 0);
4317 else
4318 accumulator = op;
4319
4320 bool needs_fixup = (variant == negate_variant)
4321 || (variant == add_variant);
4322
4323 for (int i = 1; i < alg.ops; i++)
4324 {
4325 tree shft_log = build_int_cst (multtype, alg.log[i]);
4326 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4327 tree tmp_var = NULL_TREE;
4328
4329 switch (alg.op[i])
4330 {
4331 case alg_shift:
4332 if (synth_shift_p)
4333 stmt
4334 = synth_lshift_by_additions (vinfo, dest: accum_tmp, op: accumulator,
4335 amnt: alg.log[i], stmt_info: stmt_vinfo);
4336 else
4337 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
4338 shft_log);
4339 break;
4340 case alg_add_t_m2:
4341 tmp_var
4342 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: op, op2: shft_log,
4343 stmt_vinfo, synth_shift_p);
4344 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4345 tmp_var);
4346 break;
4347 case alg_sub_t_m2:
4348 tmp_var = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: op,
4349 op2: shft_log, stmt_vinfo,
4350 synth_shift_p);
4351 /* In some algorithms the first step involves zeroing the
4352 accumulator. If subtracting from such an accumulator
4353 just emit the negation directly. */
4354 if (integer_zerop (accumulator))
4355 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
4356 else
4357 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
4358 tmp_var);
4359 break;
4360 case alg_add_t2_m:
4361 tmp_var
4362 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4363 op2: shft_log, stmt_vinfo, synth_shift_p);
4364 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
4365 break;
4366 case alg_sub_t2_m:
4367 tmp_var
4368 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4369 op2: shft_log, stmt_vinfo, synth_shift_p);
4370 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
4371 break;
4372 case alg_add_factor:
4373 tmp_var
4374 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4375 op2: shft_log, stmt_vinfo, synth_shift_p);
4376 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
4377 tmp_var);
4378 break;
4379 case alg_sub_factor:
4380 tmp_var
4381 = apply_binop_and_append_stmt (vinfo, code: LSHIFT_EXPR, op1: accumulator,
4382 op2: shft_log, stmt_vinfo, synth_shift_p);
4383 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
4384 accumulator);
4385 break;
4386 default:
4387 gcc_unreachable ();
4388 }
4389 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4390 but rather return it directly. */
4391
4392 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
4393 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4394 accumulator = accum_tmp;
4395 }
4396 if (variant == negate_variant)
4397 {
4398 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4399 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
4400 accumulator = accum_tmp;
4401 if (cast_to_unsigned_p)
4402 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4403 }
4404 else if (variant == add_variant)
4405 {
4406 tree accum_tmp = vect_recog_temp_ssa_var (type: multtype, NULL);
4407 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
4408 accumulator = accum_tmp;
4409 if (cast_to_unsigned_p)
4410 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: stmt);
4411 }
4412 /* Move back to a signed if needed. */
4413 if (cast_to_unsigned_p)
4414 {
4415 tree accum_tmp = vect_recog_temp_ssa_var (type: itype, NULL);
4416 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
4417 }
4418
4419 return stmt;
4420}
4421
4422/* Detect multiplication by constant and convert it into a sequence of
4423 shifts and additions, subtractions, negations. We reuse the
4424 choose_mult_variant algorithms from expmed.cc
4425
4426 Input/Output:
4427
4428 STMT_VINFO: The stmt from which the pattern search begins,
4429 i.e. the mult stmt.
4430
4431 Output:
4432
4433 * TYPE_OUT: The type of the output of this pattern.
4434
4435 * Return value: A new stmt that will be used to replace
4436 the multiplication. */
4437
4438static gimple *
4439vect_recog_mult_pattern (vec_info *vinfo,
4440 stmt_vec_info stmt_vinfo, tree *type_out)
4441{
4442 gimple *last_stmt = stmt_vinfo->stmt;
4443 tree oprnd0, oprnd1, vectype, itype;
4444 gimple *pattern_stmt;
4445
4446 if (!is_gimple_assign (gs: last_stmt))
4447 return NULL;
4448
4449 if (gimple_assign_rhs_code (gs: last_stmt) != MULT_EXPR)
4450 return NULL;
4451
4452 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4453 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4454 itype = TREE_TYPE (oprnd0);
4455
4456 if (TREE_CODE (oprnd0) != SSA_NAME
4457 || TREE_CODE (oprnd1) != INTEGER_CST
4458 || !INTEGRAL_TYPE_P (itype)
4459 || !type_has_mode_precision_p (t: itype))
4460 return NULL;
4461
4462 vectype = get_vectype_for_scalar_type (vinfo, itype);
4463 if (vectype == NULL_TREE)
4464 return NULL;
4465
4466 /* If the target can handle vectorized multiplication natively,
4467 don't attempt to optimize this. */
4468 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
4469 if (mul_optab != unknown_optab)
4470 {
4471 machine_mode vec_mode = TYPE_MODE (vectype);
4472 int icode = (int) optab_handler (op: mul_optab, mode: vec_mode);
4473 if (icode != CODE_FOR_nothing)
4474 return NULL;
4475 }
4476
4477 pattern_stmt = vect_synth_mult_by_constant (vinfo,
4478 op: oprnd0, val: oprnd1, stmt_vinfo);
4479 if (!pattern_stmt)
4480 return NULL;
4481
4482 /* Pattern detected. */
4483 vect_pattern_detected (name: "vect_recog_mult_pattern", stmt: last_stmt);
4484
4485 *type_out = vectype;
4486
4487 return pattern_stmt;
4488}
4489
4490/* Detect a signed division by a constant that wouldn't be
4491 otherwise vectorized:
4492
4493 type a_t, b_t;
4494
4495 S1 a_t = b_t / N;
4496
4497 where type 'type' is an integral type and N is a constant.
4498
4499 Similarly handle modulo by a constant:
4500
4501 S4 a_t = b_t % N;
4502
4503 Input/Output:
4504
4505 * STMT_VINFO: The stmt from which the pattern search begins,
4506 i.e. the division stmt. S1 is replaced by if N is a power
4507 of two constant and type is signed:
4508 S3 y_t = b_t < 0 ? N - 1 : 0;
4509 S2 x_t = b_t + y_t;
4510 S1' a_t = x_t >> log2 (N);
4511
4512 S4 is replaced if N is a power of two constant and
4513 type is signed by (where *_T temporaries have unsigned type):
4514 S9 y_T = b_t < 0 ? -1U : 0U;
4515 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4516 S7 z_t = (type) z_T;
4517 S6 w_t = b_t + z_t;
4518 S5 x_t = w_t & (N - 1);
4519 S4' a_t = x_t - z_t;
4520
4521 Output:
4522
4523 * TYPE_OUT: The type of the output of this pattern.
4524
4525 * Return value: A new stmt that will be used to replace the division
4526 S1 or modulo S4 stmt. */
4527
4528static gimple *
4529vect_recog_divmod_pattern (vec_info *vinfo,
4530 stmt_vec_info stmt_vinfo, tree *type_out)
4531{
4532 gimple *last_stmt = stmt_vinfo->stmt;
4533 tree oprnd0, oprnd1, vectype, itype, cond;
4534 gimple *pattern_stmt, *def_stmt;
4535 enum tree_code rhs_code;
4536 optab optab;
4537 tree q, cst;
4538 int dummy_int, prec;
4539
4540 if (!is_gimple_assign (gs: last_stmt))
4541 return NULL;
4542
4543 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
4544 switch (rhs_code)
4545 {
4546 case TRUNC_DIV_EXPR:
4547 case EXACT_DIV_EXPR:
4548 case TRUNC_MOD_EXPR:
4549 break;
4550 default:
4551 return NULL;
4552 }
4553
4554 oprnd0 = gimple_assign_rhs1 (gs: last_stmt);
4555 oprnd1 = gimple_assign_rhs2 (gs: last_stmt);
4556 itype = TREE_TYPE (oprnd0);
4557 if (TREE_CODE (oprnd0) != SSA_NAME
4558 || TREE_CODE (oprnd1) != INTEGER_CST
4559 || TREE_CODE (itype) != INTEGER_TYPE
4560 || !type_has_mode_precision_p (t: itype))
4561 return NULL;
4562
4563 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
4564 vectype = get_vectype_for_scalar_type (vinfo, itype);
4565 if (vectype == NULL_TREE)
4566 return NULL;
4567
4568 if (optimize_bb_for_size_p (gimple_bb (g: last_stmt)))
4569 {
4570 /* If the target can handle vectorized division or modulo natively,
4571 don't attempt to optimize this, since native division is likely
4572 to give smaller code. */
4573 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
4574 if (optab != unknown_optab)
4575 {
4576 machine_mode vec_mode = TYPE_MODE (vectype);
4577 int icode = (int) optab_handler (op: optab, mode: vec_mode);
4578 if (icode != CODE_FOR_nothing)
4579 return NULL;
4580 }
4581 }
4582
4583 prec = TYPE_PRECISION (itype);
4584 if (integer_pow2p (oprnd1))
4585 {
4586 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
4587 return NULL;
4588
4589 /* Pattern detected. */
4590 vect_pattern_detected (name: "vect_recog_divmod_pattern", stmt: last_stmt);
4591
4592 *type_out = vectype;
4593
4594 /* Check if the target supports this internal function. */
4595 internal_fn ifn = IFN_DIV_POW2;
4596 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
4597 {
4598 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
4599
4600 tree var_div = vect_recog_temp_ssa_var (type: itype, NULL);
4601 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
4602 gimple_call_set_lhs (gs: div_stmt, lhs: var_div);
4603
4604 if (rhs_code == TRUNC_MOD_EXPR)
4605 {
4606 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: div_stmt);
4607 def_stmt
4608 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4609 LSHIFT_EXPR, var_div, shift);
4610 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4611 pattern_stmt
4612 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4613 MINUS_EXPR, oprnd0,
4614 gimple_assign_lhs (gs: def_stmt));
4615 }
4616 else
4617 pattern_stmt = div_stmt;
4618 gimple_set_location (g: pattern_stmt, location: gimple_location (g: last_stmt));
4619
4620 return pattern_stmt;
4621 }
4622
4623 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
4624 build_int_cst (itype, 0));
4625 if (rhs_code == TRUNC_DIV_EXPR
4626 || rhs_code == EXACT_DIV_EXPR)
4627 {
4628 tree var = vect_recog_temp_ssa_var (type: itype, NULL);
4629 tree shift;
4630 def_stmt
4631 = gimple_build_assign (var, COND_EXPR, cond,
4632 fold_build2 (MINUS_EXPR, itype, oprnd1,
4633 build_int_cst (itype, 1)),
4634 build_int_cst (itype, 0));
4635 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4636 var = vect_recog_temp_ssa_var (type: itype, NULL);
4637 def_stmt
4638 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
4639 gimple_assign_lhs (gs: def_stmt));
4640 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4641
4642 shift = build_int_cst (itype, tree_log2 (oprnd1));
4643 pattern_stmt
4644 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4645 RSHIFT_EXPR, var, shift);
4646 }
4647 else
4648 {
4649 tree signmask;
4650 if (compare_tree_int (oprnd1, 2) == 0)
4651 {
4652 signmask = vect_recog_temp_ssa_var (type: itype, NULL);
4653 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
4654 build_int_cst (itype, 1),
4655 build_int_cst (itype, 0));
4656 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4657 }
4658 else
4659 {
4660 tree utype
4661 = build_nonstandard_integer_type (prec, 1);
4662 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
4663 tree shift
4664 = build_int_cst (utype, GET_MODE_BITSIZE (mode: itype_mode)
4665 - tree_log2 (oprnd1));
4666 tree var = vect_recog_temp_ssa_var (type: utype, NULL);
4667
4668 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
4669 build_int_cst (utype, -1),
4670 build_int_cst (utype, 0));
4671 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecutype);
4672 var = vect_recog_temp_ssa_var (type: utype, NULL);
4673 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
4674 gimple_assign_lhs (gs: def_stmt),
4675 shift);
4676 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecutype);
4677 signmask = vect_recog_temp_ssa_var (type: itype, NULL);
4678 def_stmt
4679 = gimple_build_assign (signmask, NOP_EXPR, var);
4680 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4681 }
4682 def_stmt
4683 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4684 PLUS_EXPR, oprnd0, signmask);
4685 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4686 def_stmt
4687 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4688 BIT_AND_EXPR, gimple_assign_lhs (gs: def_stmt),
4689 fold_build2 (MINUS_EXPR, itype, oprnd1,
4690 build_int_cst (itype, 1)));
4691 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4692
4693 pattern_stmt
4694 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
4695 MINUS_EXPR, gimple_assign_lhs (gs: def_stmt),
4696 signmask);
4697 }
4698
4699 return pattern_stmt;
4700 }
4701
4702 if ((cst = uniform_integer_cst_p (oprnd1))
4703 && TYPE_UNSIGNED (itype)
4704 && rhs_code == TRUNC_DIV_EXPR
4705 && vectype
4706 && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
4707 {
4708 /* We can use the relationship:
4709
4710 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4711
4712 to optimize cases where N+1 is a power of 2, and where // (N+1)
4713 is therefore a shift right. When operating in modes that are
4714 multiples of a byte in size, there are two cases:
4715
4716 (1) N(N+3) is not representable, in which case the question
4717 becomes whether the replacement expression overflows.
4718 It is enough to test that x+N+2 does not overflow,
4719 i.e. that x < MAX-(N+1).
4720
4721 (2) N(N+3) is representable, in which case it is the (only)
4722 bound that we need to check.
4723
4724 ??? For now we just handle the case where // (N+1) is a shift
4725 right by half the precision, since some architectures can
4726 optimize the associated addition and shift combinations
4727 into single instructions. */
4728
4729 auto wcst = wi::to_wide (t: cst);
4730 int pow = wi::exact_log2 (wcst + 1);
4731 if (pow == prec / 2)
4732 {
4733 gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
4734
4735 gimple_ranger ranger;
4736 int_range_max r;
4737
4738 /* Check that no overflow will occur. If we don't have range
4739 information we can't perform the optimization. */
4740
4741 if (ranger.range_of_expr (r, name: oprnd0, stmt) && !r.undefined_p ())
4742 {
4743 wide_int max = r.upper_bound ();
4744 wide_int one = wi::shwi (val: 1, precision: prec);
4745 wide_int adder = wi::add (x: one, y: wi::lshift (x: one, y: pow));
4746 wi::overflow_type ovf;
4747 wi::add (x: max, y: adder, sgn: UNSIGNED, overflow: &ovf);
4748 if (ovf == wi::OVF_NONE)
4749 {
4750 *type_out = vectype;
4751 tree tadder = wide_int_to_tree (type: itype, cst: adder);
4752 tree rshift = wide_int_to_tree (type: itype, cst: pow);
4753
4754 tree new_lhs1 = vect_recog_temp_ssa_var (type: itype, NULL);
4755 gassign *patt1
4756 = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
4757 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4758
4759 tree new_lhs2 = vect_recog_temp_ssa_var (type: itype, NULL);
4760 patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
4761 rshift);
4762 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4763
4764 tree new_lhs3 = vect_recog_temp_ssa_var (type: itype, NULL);
4765 patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
4766 oprnd0);
4767 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: patt1, vectype);
4768
4769 tree new_lhs4 = vect_recog_temp_ssa_var (type: itype, NULL);
4770 pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
4771 new_lhs3, rshift);
4772
4773 return pattern_stmt;
4774 }
4775 }
4776 }
4777 }
4778
4779 if (prec > HOST_BITS_PER_WIDE_INT
4780 || integer_zerop (oprnd1))
4781 return NULL;
4782
4783 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
4784 return NULL;
4785
4786 if (TYPE_UNSIGNED (itype))
4787 {
4788 unsigned HOST_WIDE_INT mh, ml;
4789 int pre_shift, post_shift;
4790 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
4791 & GET_MODE_MASK (itype_mode));
4792 tree t1, t2, t3, t4;
4793
4794 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
4795 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4796 return NULL;
4797
4798 /* Find a suitable multiplier and right shift count
4799 instead of multiplying with D. */
4800 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
4801
4802 /* If the suggested multiplier is more than SIZE bits, we can do better
4803 for even divisors, using an initial right shift. */
4804 if (mh != 0 && (d & 1) == 0)
4805 {
4806 pre_shift = ctz_or_zero (x: d);
4807 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
4808 &ml, &post_shift, &dummy_int);
4809 gcc_assert (!mh);
4810 }
4811 else
4812 pre_shift = 0;
4813
4814 if (mh != 0)
4815 {
4816 if (post_shift - 1 >= prec)
4817 return NULL;
4818
4819 /* t1 = oprnd0 h* ml;
4820 t2 = oprnd0 - t1;
4821 t3 = t2 >> 1;
4822 t4 = t1 + t3;
4823 q = t4 >> (post_shift - 1); */
4824 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4825 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4826 build_int_cst (itype, ml));
4827 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4828
4829 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4830 def_stmt
4831 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
4832 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4833
4834 t3 = vect_recog_temp_ssa_var (type: itype, NULL);
4835 def_stmt
4836 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
4837 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4838
4839 t4 = vect_recog_temp_ssa_var (type: itype, NULL);
4840 def_stmt
4841 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
4842
4843 if (post_shift != 1)
4844 {
4845 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4846
4847 q = vect_recog_temp_ssa_var (type: itype, NULL);
4848 pattern_stmt
4849 = gimple_build_assign (q, RSHIFT_EXPR, t4,
4850 build_int_cst (itype, post_shift - 1));
4851 }
4852 else
4853 {
4854 q = t4;
4855 pattern_stmt = def_stmt;
4856 }
4857 }
4858 else
4859 {
4860 if (pre_shift >= prec || post_shift >= prec)
4861 return NULL;
4862
4863 /* t1 = oprnd0 >> pre_shift;
4864 t2 = t1 h* ml;
4865 q = t2 >> post_shift; */
4866 if (pre_shift)
4867 {
4868 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4869 def_stmt
4870 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
4871 build_int_cst (NULL, pre_shift));
4872 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4873 }
4874 else
4875 t1 = oprnd0;
4876
4877 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4878 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
4879 build_int_cst (itype, ml));
4880
4881 if (post_shift)
4882 {
4883 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4884
4885 q = vect_recog_temp_ssa_var (type: itype, NULL);
4886 def_stmt
4887 = gimple_build_assign (q, RSHIFT_EXPR, t2,
4888 build_int_cst (itype, post_shift));
4889 }
4890 else
4891 q = t2;
4892
4893 pattern_stmt = def_stmt;
4894 }
4895 }
4896 else
4897 {
4898 unsigned HOST_WIDE_INT ml;
4899 int post_shift;
4900 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
4901 unsigned HOST_WIDE_INT abs_d;
4902 bool add = false;
4903 tree t1, t2, t3, t4;
4904
4905 /* Give up for -1. */
4906 if (d == -1)
4907 return NULL;
4908
4909 /* Since d might be INT_MIN, we have to cast to
4910 unsigned HOST_WIDE_INT before negating to avoid
4911 undefined signed overflow. */
4912 abs_d = (d >= 0
4913 ? (unsigned HOST_WIDE_INT) d
4914 : - (unsigned HOST_WIDE_INT) d);
4915
4916 /* n rem d = n rem -d */
4917 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
4918 {
4919 d = abs_d;
4920 oprnd1 = build_int_cst (itype, abs_d);
4921 }
4922 if (HOST_BITS_PER_WIDE_INT >= prec
4923 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
4924 /* This case is not handled correctly below. */
4925 return NULL;
4926
4927 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
4928 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
4929 {
4930 add = true;
4931 ml |= HOST_WIDE_INT_M1U << (prec - 1);
4932 }
4933 if (post_shift >= prec)
4934 return NULL;
4935
4936 /* t1 = oprnd0 h* ml; */
4937 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
4938 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
4939 build_int_cst (itype, ml));
4940
4941 if (add)
4942 {
4943 /* t2 = t1 + oprnd0; */
4944 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4945 t2 = vect_recog_temp_ssa_var (type: itype, NULL);
4946 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
4947 }
4948 else
4949 t2 = t1;
4950
4951 if (post_shift)
4952 {
4953 /* t3 = t2 >> post_shift; */
4954 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4955 t3 = vect_recog_temp_ssa_var (type: itype, NULL);
4956 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
4957 build_int_cst (itype, post_shift));
4958 }
4959 else
4960 t3 = t2;
4961
4962 int msb = 1;
4963 value_range r;
4964 get_range_query (cfun)->range_of_expr (r, expr: oprnd0);
4965 if (!r.varying_p () && !r.undefined_p ())
4966 {
4967 if (!wi::neg_p (x: r.lower_bound (), TYPE_SIGN (itype)))
4968 msb = 0;
4969 else if (wi::neg_p (x: r.upper_bound (), TYPE_SIGN (itype)))
4970 msb = -1;
4971 }
4972
4973 if (msb == 0 && d >= 0)
4974 {
4975 /* q = t3; */
4976 q = t3;
4977 pattern_stmt = def_stmt;
4978 }
4979 else
4980 {
4981 /* t4 = oprnd0 >> (prec - 1);
4982 or if we know from VRP that oprnd0 >= 0
4983 t4 = 0;
4984 or if we know from VRP that oprnd0 < 0
4985 t4 = -1; */
4986 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4987 t4 = vect_recog_temp_ssa_var (type: itype, NULL);
4988 if (msb != 1)
4989 def_stmt = gimple_build_assign (t4, INTEGER_CST,
4990 build_int_cst (itype, msb));
4991 else
4992 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
4993 build_int_cst (itype, prec - 1));
4994 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
4995
4996 /* q = t3 - t4; or q = t4 - t3; */
4997 q = vect_recog_temp_ssa_var (type: itype, NULL);
4998 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
4999 d < 0 ? t3 : t4);
5000 }
5001 }
5002
5003 if (rhs_code == TRUNC_MOD_EXPR)
5004 {
5005 tree r, t1;
5006
5007 /* We divided. Now finish by:
5008 t1 = q * oprnd1;
5009 r = oprnd0 - t1; */
5010 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt);
5011
5012 t1 = vect_recog_temp_ssa_var (type: itype, NULL);
5013 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
5014 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt);
5015
5016 r = vect_recog_temp_ssa_var (type: itype, NULL);
5017 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
5018 }
5019
5020 /* Pattern detected. */
5021 vect_pattern_detected (name: "vect_recog_divmod_pattern", stmt: last_stmt);
5022
5023 *type_out = vectype;
5024 return pattern_stmt;
5025}
5026
5027/* Function vect_recog_mixed_size_cond_pattern
5028
5029 Try to find the following pattern:
5030
5031 type x_t, y_t;
5032 TYPE a_T, b_T, c_T;
5033 loop:
5034 S1 a_T = x_t CMP y_t ? b_T : c_T;
5035
5036 where type 'TYPE' is an integral type which has different size
5037 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5038 than 'type', the constants need to fit into an integer type
5039 with the same width as 'type') or results of conversion from 'type'.
5040
5041 Input:
5042
5043 * STMT_VINFO: The stmt from which the pattern search begins.
5044
5045 Output:
5046
5047 * TYPE_OUT: The type of the output of this pattern.
5048
5049 * Return value: A new stmt that will be used to replace the pattern.
5050 Additionally a def_stmt is added.
5051
5052 a_it = x_t CMP y_t ? b_it : c_it;
5053 a_T = (TYPE) a_it; */
5054
5055static gimple *
5056vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
5057 stmt_vec_info stmt_vinfo, tree *type_out)
5058{
5059 gimple *last_stmt = stmt_vinfo->stmt;
5060 tree cond_expr, then_clause, else_clause;
5061 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
5062 gimple *pattern_stmt, *def_stmt;
5063 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
5064 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
5065 bool promotion;
5066 tree comp_scalar_type;
5067
5068 if (!is_gimple_assign (gs: last_stmt)
5069 || gimple_assign_rhs_code (gs: last_stmt) != COND_EXPR
5070 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
5071 return NULL;
5072
5073 cond_expr = gimple_assign_rhs1 (gs: last_stmt);
5074 then_clause = gimple_assign_rhs2 (gs: last_stmt);
5075 else_clause = gimple_assign_rhs3 (gs: last_stmt);
5076
5077 if (!COMPARISON_CLASS_P (cond_expr))
5078 return NULL;
5079
5080 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
5081 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
5082 if (comp_vectype == NULL_TREE)
5083 return NULL;
5084
5085 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
5086 if (types_compatible_p (type1: type, type2: comp_scalar_type)
5087 || ((TREE_CODE (then_clause) != INTEGER_CST
5088 || TREE_CODE (else_clause) != INTEGER_CST)
5089 && !INTEGRAL_TYPE_P (comp_scalar_type))
5090 || !INTEGRAL_TYPE_P (type))
5091 return NULL;
5092
5093 if ((TREE_CODE (then_clause) != INTEGER_CST
5094 && !type_conversion_p (vinfo, name: then_clause, check_sign: false,
5095 orig_type: &orig_type0, def_stmt: &def_stmt0, promotion: &promotion))
5096 || (TREE_CODE (else_clause) != INTEGER_CST
5097 && !type_conversion_p (vinfo, name: else_clause, check_sign: false,
5098 orig_type: &orig_type1, def_stmt: &def_stmt1, promotion: &promotion)))
5099 return NULL;
5100
5101 if (orig_type0 && orig_type1
5102 && !types_compatible_p (type1: orig_type0, type2: orig_type1))
5103 return NULL;
5104
5105 if (orig_type0)
5106 {
5107 if (!types_compatible_p (type1: orig_type0, type2: comp_scalar_type))
5108 return NULL;
5109 then_clause = gimple_assign_rhs1 (gs: def_stmt0);
5110 itype = orig_type0;
5111 }
5112
5113 if (orig_type1)
5114 {
5115 if (!types_compatible_p (type1: orig_type1, type2: comp_scalar_type))
5116 return NULL;
5117 else_clause = gimple_assign_rhs1 (gs: def_stmt1);
5118 itype = orig_type1;
5119 }
5120
5121
5122 HOST_WIDE_INT cmp_mode_size
5123 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
5124
5125 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
5126 if (GET_MODE_BITSIZE (mode: type_mode) == cmp_mode_size)
5127 return NULL;
5128
5129 vectype = get_vectype_for_scalar_type (vinfo, type);
5130 if (vectype == NULL_TREE)
5131 return NULL;
5132
5133 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
5134 return NULL;
5135
5136 if (itype == NULL_TREE)
5137 itype = build_nonstandard_integer_type (cmp_mode_size,
5138 TYPE_UNSIGNED (type));
5139
5140 if (itype == NULL_TREE
5141 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
5142 return NULL;
5143
5144 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5145 if (vecitype == NULL_TREE)
5146 return NULL;
5147
5148 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
5149 return NULL;
5150
5151 if (GET_MODE_BITSIZE (mode: type_mode) > cmp_mode_size)
5152 {
5153 if ((TREE_CODE (then_clause) == INTEGER_CST
5154 && !int_fits_type_p (then_clause, itype))
5155 || (TREE_CODE (else_clause) == INTEGER_CST
5156 && !int_fits_type_p (else_clause, itype)))
5157 return NULL;
5158 }
5159
5160 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5161 COND_EXPR, unshare_expr (cond_expr),
5162 fold_convert (itype, then_clause),
5163 fold_convert (itype, else_clause));
5164 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5165 NOP_EXPR, gimple_assign_lhs (gs: def_stmt));
5166
5167 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: def_stmt, vectype: vecitype);
5168 *type_out = vectype;
5169
5170 vect_pattern_detected (name: "vect_recog_mixed_size_cond_pattern", stmt: last_stmt);
5171
5172 return pattern_stmt;
5173}
5174
5175
5176/* Helper function of vect_recog_bool_pattern. Called recursively, return
5177 true if bool VAR can and should be optimized that way. Assume it shouldn't
5178 in case it's a result of a comparison which can be directly vectorized into
5179 a vector comparison. Fills in STMTS with all stmts visited during the
5180 walk. */
5181
5182static bool
5183check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
5184{
5185 tree rhs1;
5186 enum tree_code rhs_code;
5187
5188 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, op: var);
5189 if (!def_stmt_info)
5190 return false;
5191
5192 gassign *def_stmt = dyn_cast <gassign *> (p: def_stmt_info->stmt);
5193 if (!def_stmt)
5194 return false;
5195
5196 if (stmts.contains (k: def_stmt))
5197 return true;
5198
5199 rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5200 rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5201 switch (rhs_code)
5202 {
5203 case SSA_NAME:
5204 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5205 return false;
5206 break;
5207
5208 CASE_CONVERT:
5209 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
5210 return false;
5211 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5212 return false;
5213 break;
5214
5215 case BIT_NOT_EXPR:
5216 if (! check_bool_pattern (var: rhs1, vinfo, stmts))
5217 return false;
5218 break;
5219
5220 case BIT_AND_EXPR:
5221 case BIT_IOR_EXPR:
5222 case BIT_XOR_EXPR:
5223 if (! check_bool_pattern (var: rhs1, vinfo, stmts)
5224 || ! check_bool_pattern (var: gimple_assign_rhs2 (gs: def_stmt), vinfo, stmts))
5225 return false;
5226 break;
5227
5228 default:
5229 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5230 {
5231 tree vecitype, comp_vectype;
5232
5233 /* If the comparison can throw, then is_gimple_condexpr will be
5234 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5235 if (stmt_could_throw_p (cfun, def_stmt))
5236 return false;
5237
5238 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
5239 if (comp_vectype == NULL_TREE)
5240 return false;
5241
5242 tree mask_type = get_mask_type_for_scalar_type (vinfo,
5243 TREE_TYPE (rhs1));
5244 if (mask_type
5245 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5246 return false;
5247
5248 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
5249 {
5250 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5251 tree itype
5252 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5253 vecitype = get_vectype_for_scalar_type (vinfo, itype);
5254 if (vecitype == NULL_TREE)
5255 return false;
5256 }
5257 else
5258 vecitype = comp_vectype;
5259 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
5260 return false;
5261 }
5262 else
5263 return false;
5264 break;
5265 }
5266
5267 bool res = stmts.add (k: def_stmt);
5268 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5269 gcc_assert (!res);
5270
5271 return true;
5272}
5273
5274
5275/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5276 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5277 pattern sequence. */
5278
5279static tree
5280adjust_bool_pattern_cast (vec_info *vinfo,
5281 tree type, tree var, stmt_vec_info stmt_info)
5282{
5283 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
5284 NOP_EXPR, var);
5285 append_pattern_def_seq (vinfo, stmt_info, new_stmt: cast_stmt,
5286 vectype: get_vectype_for_scalar_type (vinfo, type));
5287 return gimple_assign_lhs (gs: cast_stmt);
5288}
5289
5290/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5291 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5292 type, OUT_TYPE is the desired final integer type of the whole pattern.
5293 STMT_INFO is the info of the pattern root and is where pattern stmts should
5294 be associated with. DEFS is a map of pattern defs. */
5295
5296static void
5297adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
5298 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
5299{
5300 gimple *stmt = SSA_NAME_DEF_STMT (var);
5301 enum tree_code rhs_code, def_rhs_code;
5302 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
5303 location_t loc;
5304 gimple *pattern_stmt, *def_stmt;
5305 tree trueval = NULL_TREE;
5306
5307 rhs1 = gimple_assign_rhs1 (gs: stmt);
5308 rhs2 = gimple_assign_rhs2 (gs: stmt);
5309 rhs_code = gimple_assign_rhs_code (gs: stmt);
5310 loc = gimple_location (g: stmt);
5311 switch (rhs_code)
5312 {
5313 case SSA_NAME:
5314 CASE_CONVERT:
5315 irhs1 = *defs.get (k: rhs1);
5316 itype = TREE_TYPE (irhs1);
5317 pattern_stmt
5318 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5319 SSA_NAME, irhs1);
5320 break;
5321
5322 case BIT_NOT_EXPR:
5323 irhs1 = *defs.get (k: rhs1);
5324 itype = TREE_TYPE (irhs1);
5325 pattern_stmt
5326 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5327 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
5328 break;
5329
5330 case BIT_AND_EXPR:
5331 /* Try to optimize x = y & (a < b ? 1 : 0); into
5332 x = (a < b ? y : 0);
5333
5334 E.g. for:
5335 bool a_b, b_b, c_b;
5336 TYPE d_T;
5337
5338 S1 a_b = x1 CMP1 y1;
5339 S2 b_b = x2 CMP2 y2;
5340 S3 c_b = a_b & b_b;
5341 S4 d_T = (TYPE) c_b;
5342
5343 we would normally emit:
5344
5345 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5346 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5347 S3' c_T = a_T & b_T;
5348 S4' d_T = c_T;
5349
5350 but we can save one stmt by using the
5351 result of one of the COND_EXPRs in the other COND_EXPR and leave
5352 BIT_AND_EXPR stmt out:
5353
5354 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5355 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5356 S4' f_T = c_T;
5357
5358 At least when VEC_COND_EXPR is implemented using masks
5359 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5360 computes the comparison masks and ands it, in one case with
5361 all ones vector, in the other case with a vector register.
5362 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5363 often more expensive. */
5364 def_stmt = SSA_NAME_DEF_STMT (rhs2);
5365 def_rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5366 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5367 {
5368 irhs1 = *defs.get (k: rhs1);
5369 tree def_rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5370 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5371 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5372 {
5373 rhs_code = def_rhs_code;
5374 rhs1 = def_rhs1;
5375 rhs2 = gimple_assign_rhs2 (gs: def_stmt);
5376 trueval = irhs1;
5377 goto do_compare;
5378 }
5379 else
5380 irhs2 = *defs.get (k: rhs2);
5381 goto and_ior_xor;
5382 }
5383 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5384 def_rhs_code = gimple_assign_rhs_code (gs: def_stmt);
5385 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
5386 {
5387 irhs2 = *defs.get (k: rhs2);
5388 tree def_rhs1 = gimple_assign_rhs1 (gs: def_stmt);
5389 if (TYPE_PRECISION (TREE_TYPE (irhs2))
5390 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
5391 {
5392 rhs_code = def_rhs_code;
5393 rhs1 = def_rhs1;
5394 rhs2 = gimple_assign_rhs2 (gs: def_stmt);
5395 trueval = irhs2;
5396 goto do_compare;
5397 }
5398 else
5399 irhs1 = *defs.get (k: rhs1);
5400 goto and_ior_xor;
5401 }
5402 /* FALLTHRU */
5403 case BIT_IOR_EXPR:
5404 case BIT_XOR_EXPR:
5405 irhs1 = *defs.get (k: rhs1);
5406 irhs2 = *defs.get (k: rhs2);
5407 and_ior_xor:
5408 if (TYPE_PRECISION (TREE_TYPE (irhs1))
5409 != TYPE_PRECISION (TREE_TYPE (irhs2)))
5410 {
5411 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
5412 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
5413 int out_prec = TYPE_PRECISION (out_type);
5414 if (absu_hwi (x: out_prec - prec1) < absu_hwi (x: out_prec - prec2))
5415 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), var: irhs2,
5416 stmt_info);
5417 else if (absu_hwi (x: out_prec - prec1) > absu_hwi (x: out_prec - prec2))
5418 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), var: irhs1,
5419 stmt_info);
5420 else
5421 {
5422 irhs1 = adjust_bool_pattern_cast (vinfo,
5423 type: out_type, var: irhs1, stmt_info);
5424 irhs2 = adjust_bool_pattern_cast (vinfo,
5425 type: out_type, var: irhs2, stmt_info);
5426 }
5427 }
5428 itype = TREE_TYPE (irhs1);
5429 pattern_stmt
5430 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5431 rhs_code, irhs1, irhs2);
5432 break;
5433
5434 default:
5435 do_compare:
5436 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
5437 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
5438 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
5439 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
5440 b: GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
5441 {
5442 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
5443 itype
5444 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
5445 }
5446 else
5447 itype = TREE_TYPE (rhs1);
5448 cond_expr = build2_loc (loc, code: rhs_code, type: itype, arg0: rhs1, arg1: rhs2);
5449 if (trueval == NULL_TREE)
5450 trueval = build_int_cst (itype, 1);
5451 else
5452 gcc_checking_assert (useless_type_conversion_p (itype,
5453 TREE_TYPE (trueval)));
5454 pattern_stmt
5455 = gimple_build_assign (vect_recog_temp_ssa_var (type: itype, NULL),
5456 COND_EXPR, cond_expr, trueval,
5457 build_int_cst (itype, 0));
5458 break;
5459 }
5460
5461 gimple_set_location (g: pattern_stmt, location: loc);
5462 append_pattern_def_seq (vinfo, stmt_info, new_stmt: pattern_stmt,
5463 vectype: get_vectype_for_scalar_type (vinfo, itype));
5464 defs.put (k: var, v: gimple_assign_lhs (gs: pattern_stmt));
5465}
5466
5467/* Comparison function to qsort a vector of gimple stmts after UID. */
5468
5469static int
5470sort_after_uid (const void *p1, const void *p2)
5471{
5472 const gimple *stmt1 = *(const gimple * const *)p1;
5473 const gimple *stmt2 = *(const gimple * const *)p2;
5474 return gimple_uid (g: stmt1) - gimple_uid (g: stmt2);
5475}
5476
5477/* Create pattern stmts for all stmts participating in the bool pattern
5478 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5479 OUT_TYPE. Return the def of the pattern root. */
5480
5481static tree
5482adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
5483 tree out_type, stmt_vec_info stmt_info)
5484{
5485 /* Gather original stmts in the bool pattern in their order of appearance
5486 in the IL. */
5487 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
5488 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
5489 i != bool_stmt_set.end (); ++i)
5490 bool_stmts.quick_push (obj: *i);
5491 bool_stmts.qsort (sort_after_uid);
5492
5493 /* Now process them in that order, producing pattern stmts. */
5494 hash_map <tree, tree> defs;
5495 for (unsigned i = 0; i < bool_stmts.length (); ++i)
5496 adjust_bool_pattern (vinfo, var: gimple_assign_lhs (gs: bool_stmts[i]),
5497 out_type, stmt_info, defs);
5498
5499 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5500 gimple *pattern_stmt
5501 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5502 return gimple_assign_lhs (gs: pattern_stmt);
5503}
5504
5505/* Return the proper type for converting bool VAR into
5506 an integer value or NULL_TREE if no such type exists.
5507 The type is chosen so that the converted value has the
5508 same number of elements as VAR's vector type. */
5509
5510static tree
5511integer_type_for_mask (tree var, vec_info *vinfo)
5512{
5513 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5514 return NULL_TREE;
5515
5516 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, op: var);
5517 if (!def_stmt_info || !vect_use_mask_type_p (stmt_info: def_stmt_info))
5518 return NULL_TREE;
5519
5520 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5521}
5522
5523/* Function vect_recog_gcond_pattern
5524
5525 Try to find pattern like following:
5526
5527 if (a op b)
5528
5529 where operator 'op' is not != and convert it to an adjusted boolean pattern
5530
5531 mask = a op b
5532 if (mask != 0)
5533
5534 and set the mask type on MASK.
5535
5536 Input:
5537
5538 * STMT_VINFO: The stmt at the end from which the pattern
5539 search begins, i.e. cast of a bool to
5540 an integer type.
5541
5542 Output:
5543
5544 * TYPE_OUT: The type of the output of this pattern.
5545
5546 * Return value: A new stmt that will be used to replace the pattern. */
5547
5548static gimple *
5549vect_recog_gcond_pattern (vec_info *vinfo,
5550 stmt_vec_info stmt_vinfo, tree *type_out)
5551{
5552 /* Currently we only support this for loop vectorization and when multiple
5553 exits. */
5554 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5555 if (!loop_vinfo || !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
5556 return NULL;
5557
5558 gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
5559 gcond* cond = NULL;
5560 if (!(cond = dyn_cast <gcond *> (p: last_stmt)))
5561 return NULL;
5562
5563 auto lhs = gimple_cond_lhs (gs: cond);
5564 auto rhs = gimple_cond_rhs (gs: cond);
5565 auto code = gimple_cond_code (gs: cond);
5566
5567 tree scalar_type = TREE_TYPE (lhs);
5568 if (VECTOR_TYPE_P (scalar_type))
5569 return NULL;
5570
5571 if (code == NE_EXPR
5572 && zerop (rhs)
5573 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
5574 return NULL;
5575
5576 tree vecitype = get_vectype_for_scalar_type (vinfo, scalar_type);
5577 if (vecitype == NULL_TREE)
5578 return NULL;
5579
5580 tree vectype = truth_type_for (vecitype);
5581
5582 tree new_lhs = vect_recog_temp_ssa_var (boolean_type_node, NULL);
5583 gimple *new_stmt = gimple_build_assign (new_lhs, code, lhs, rhs);
5584 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt, vectype, scalar_type_for_mask: scalar_type);
5585
5586 gimple *pattern_stmt
5587 = gimple_build_cond (NE_EXPR, new_lhs,
5588 build_int_cst (TREE_TYPE (new_lhs), 0),
5589 NULL_TREE, NULL_TREE);
5590 *type_out = vectype;
5591 vect_pattern_detected (name: "vect_recog_gcond_pattern", stmt: last_stmt);
5592 return pattern_stmt;
5593}
5594
5595/* Function vect_recog_bool_pattern
5596
5597 Try to find pattern like following:
5598
5599 bool a_b, b_b, c_b, d_b, e_b;
5600 TYPE f_T;
5601 loop:
5602 S1 a_b = x1 CMP1 y1;
5603 S2 b_b = x2 CMP2 y2;
5604 S3 c_b = a_b & b_b;
5605 S4 d_b = x3 CMP3 y3;
5606 S5 e_b = c_b | d_b;
5607 S6 f_T = (TYPE) e_b;
5608
5609 where type 'TYPE' is an integral type. Or a similar pattern
5610 ending in
5611
5612 S6 f_Y = e_b ? r_Y : s_Y;
5613
5614 as results from if-conversion of a complex condition.
5615
5616 Input:
5617
5618 * STMT_VINFO: The stmt at the end from which the pattern
5619 search begins, i.e. cast of a bool to
5620 an integer type.
5621
5622 Output:
5623
5624 * TYPE_OUT: The type of the output of this pattern.
5625
5626 * Return value: A new stmt that will be used to replace the pattern.
5627
5628 Assuming size of TYPE is the same as size of all comparisons
5629 (otherwise some casts would be added where needed), the above
5630 sequence we create related pattern stmts:
5631 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5632 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5633 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5634 S5' e_T = c_T | d_T;
5635 S6' f_T = e_T;
5636
5637 Instead of the above S3' we could emit:
5638 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5639 S3' c_T = a_T | b_T;
5640 but the above is more efficient. */
5641
5642static gimple *
5643vect_recog_bool_pattern (vec_info *vinfo,
5644 stmt_vec_info stmt_vinfo, tree *type_out)
5645{
5646 gimple *last_stmt = stmt_vinfo->stmt;
5647 enum tree_code rhs_code;
5648 tree var, lhs, rhs, vectype;
5649 gimple *pattern_stmt;
5650
5651 if (!is_gimple_assign (gs: last_stmt))
5652 return NULL;
5653
5654 var = gimple_assign_rhs1 (gs: last_stmt);
5655 lhs = gimple_assign_lhs (gs: last_stmt);
5656 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
5657
5658 if (rhs_code == VIEW_CONVERT_EXPR)
5659 var = TREE_OPERAND (var, 0);
5660
5661 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
5662 return NULL;
5663
5664 hash_set<gimple *> bool_stmts;
5665
5666 if (CONVERT_EXPR_CODE_P (rhs_code)
5667 || rhs_code == VIEW_CONVERT_EXPR)
5668 {
5669 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
5670 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5671 return NULL;
5672 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5673
5674 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5675 {
5676 rhs = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts,
5677 TREE_TYPE (lhs), stmt_info: stmt_vinfo);
5678 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5679 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5680 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5681 else
5682 pattern_stmt
5683 = gimple_build_assign (lhs, NOP_EXPR, rhs);
5684 }
5685 else
5686 {
5687 tree type = integer_type_for_mask (var, vinfo);
5688 tree cst0, cst1, tmp;
5689
5690 if (!type)
5691 return NULL;
5692
5693 /* We may directly use cond with narrowed type to avoid
5694 multiple cond exprs with following result packing and
5695 perform single cond with packed mask instead. In case
5696 of widening we better make cond first and then extract
5697 results. */
5698 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
5699 type = TREE_TYPE (lhs);
5700
5701 cst0 = build_int_cst (type, 0);
5702 cst1 = build_int_cst (type, 1);
5703 tmp = vect_recog_temp_ssa_var (type, NULL);
5704 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
5705
5706 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
5707 {
5708 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
5709 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo,
5710 new_stmt: pattern_stmt, vectype: new_vectype);
5711
5712 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5713 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
5714 }
5715 }
5716
5717 *type_out = vectype;
5718 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5719
5720 return pattern_stmt;
5721 }
5722 else if (rhs_code == COND_EXPR
5723 && TREE_CODE (var) == SSA_NAME)
5724 {
5725 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5726 if (vectype == NULL_TREE)
5727 return NULL;
5728
5729 /* Build a scalar type for the boolean result that when
5730 vectorized matches the vector type of the result in
5731 size and number of elements. */
5732 unsigned prec
5733 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
5734 TYPE_VECTOR_SUBPARTS (vectype));
5735
5736 tree type
5737 = build_nonstandard_integer_type (prec,
5738 TYPE_UNSIGNED (TREE_TYPE (var)));
5739 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
5740 return NULL;
5741
5742 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5743 var = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts, out_type: type, stmt_info: stmt_vinfo);
5744 else if (integer_type_for_mask (var, vinfo))
5745 return NULL;
5746
5747 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5748 pattern_stmt
5749 = gimple_build_assign (lhs, COND_EXPR,
5750 build2 (NE_EXPR, boolean_type_node,
5751 var, build_int_cst (TREE_TYPE (var), 0)),
5752 gimple_assign_rhs2 (gs: last_stmt),
5753 gimple_assign_rhs3 (gs: last_stmt));
5754 *type_out = vectype;
5755 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5756
5757 return pattern_stmt;
5758 }
5759 else if (rhs_code == SSA_NAME
5760 && STMT_VINFO_DATA_REF (stmt_vinfo))
5761 {
5762 stmt_vec_info pattern_stmt_info;
5763 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5764 if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype)))
5765 return NULL;
5766
5767 if (check_bool_pattern (var, vinfo, stmts&: bool_stmts))
5768 rhs = adjust_bool_stmts (vinfo, bool_stmt_set&: bool_stmts,
5769 TREE_TYPE (vectype), stmt_info: stmt_vinfo);
5770 else
5771 {
5772 tree type = integer_type_for_mask (var, vinfo);
5773 tree cst0, cst1, new_vectype;
5774
5775 if (!type)
5776 return NULL;
5777
5778 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
5779 type = TREE_TYPE (vectype);
5780
5781 cst0 = build_int_cst (type, 0);
5782 cst1 = build_int_cst (type, 1);
5783 new_vectype = get_vectype_for_scalar_type (vinfo, type);
5784
5785 rhs = vect_recog_temp_ssa_var (type, NULL);
5786 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
5787 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: new_vectype);
5788 }
5789
5790 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
5791 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
5792 {
5793 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5794 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
5795 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: cast_stmt);
5796 rhs = rhs2;
5797 }
5798 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
5799 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5800 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5801 *type_out = vectype;
5802 vect_pattern_detected (name: "vect_recog_bool_pattern", stmt: last_stmt);
5803
5804 return pattern_stmt;
5805 }
5806 else
5807 return NULL;
5808}
5809
5810
5811/* A helper for vect_recog_mask_conversion_pattern. Build
5812 conversion of MASK to a type suitable for masking VECTYPE.
5813 Built statement gets required vectype and is appended to
5814 a pattern sequence of STMT_VINFO.
5815
5816 Return converted mask. */
5817
5818static tree
5819build_mask_conversion (vec_info *vinfo,
5820 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
5821{
5822 gimple *stmt;
5823 tree masktype, tmp;
5824
5825 masktype = truth_type_for (vectype);
5826 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
5827 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
5828 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo,
5829 new_stmt: stmt, vectype: masktype, TREE_TYPE (vectype));
5830
5831 return tmp;
5832}
5833
5834
5835/* Function vect_recog_mask_conversion_pattern
5836
5837 Try to find statements which require boolean type
5838 converison. Additional conversion statements are
5839 added to handle such cases. For example:
5840
5841 bool m_1, m_2, m_3;
5842 int i_4, i_5;
5843 double d_6, d_7;
5844 char c_1, c_2, c_3;
5845
5846 S1 m_1 = i_4 > i_5;
5847 S2 m_2 = d_6 < d_7;
5848 S3 m_3 = m_1 & m_2;
5849 S4 c_1 = m_3 ? c_2 : c_3;
5850
5851 Will be transformed into:
5852
5853 S1 m_1 = i_4 > i_5;
5854 S2 m_2 = d_6 < d_7;
5855 S3'' m_2' = (_Bool[bitsize=32])m_2
5856 S3' m_3' = m_1 & m_2';
5857 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5858 S4' c_1' = m_3'' ? c_2 : c_3; */
5859
5860static gimple *
5861vect_recog_mask_conversion_pattern (vec_info *vinfo,
5862 stmt_vec_info stmt_vinfo, tree *type_out)
5863{
5864 gimple *last_stmt = stmt_vinfo->stmt;
5865 enum tree_code rhs_code;
5866 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
5867 tree vectype1, vectype2;
5868 stmt_vec_info pattern_stmt_info;
5869 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
5870 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
5871
5872 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5873 conversion. */
5874 if (is_gimple_call (gs: last_stmt)
5875 && gimple_call_internal_p (gs: last_stmt))
5876 {
5877 gcall *pattern_stmt;
5878
5879 internal_fn ifn = gimple_call_internal_fn (gs: last_stmt);
5880 int mask_argno = internal_fn_mask_index (ifn);
5881 if (mask_argno < 0)
5882 return NULL;
5883
5884 bool store_p = internal_store_fn_p (ifn);
5885 bool load_p = internal_store_fn_p (ifn);
5886 if (store_p)
5887 {
5888 int rhs_index = internal_fn_stored_value_index (ifn);
5889 tree rhs = gimple_call_arg (gs: last_stmt, index: rhs_index);
5890 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
5891 }
5892 else
5893 {
5894 lhs = gimple_call_lhs (gs: last_stmt);
5895 if (!lhs)
5896 return NULL;
5897 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5898 }
5899
5900 if (!vectype1)
5901 return NULL;
5902
5903 tree mask_arg = gimple_call_arg (gs: last_stmt, index: mask_argno);
5904 tree mask_arg_type = integer_type_for_mask (var: mask_arg, vinfo);
5905 if (mask_arg_type)
5906 {
5907 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
5908
5909 if (!vectype2
5910 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
5911 TYPE_VECTOR_SUBPARTS (vectype2)))
5912 return NULL;
5913 }
5914 else if (store_p || load_p)
5915 return NULL;
5916
5917 tmp = build_mask_conversion (vinfo, mask: mask_arg, vectype: vectype1, stmt_vinfo);
5918
5919 auto_vec<tree, 8> args;
5920 unsigned int nargs = gimple_call_num_args (gs: last_stmt);
5921 args.safe_grow (len: nargs, exact: true);
5922 for (unsigned int i = 0; i < nargs; ++i)
5923 args[i] = ((int) i == mask_argno
5924 ? tmp
5925 : gimple_call_arg (gs: last_stmt, index: i));
5926 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
5927
5928 if (!store_p)
5929 {
5930 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
5931 gimple_call_set_lhs (gs: pattern_stmt, lhs);
5932 }
5933
5934 if (load_p || store_p)
5935 gimple_call_set_nothrow (s: pattern_stmt, nothrow_p: true);
5936
5937 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5938 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5939 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
5940
5941 *type_out = vectype1;
5942 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
5943
5944 return pattern_stmt;
5945 }
5946
5947 if (!is_gimple_assign (gs: last_stmt))
5948 return NULL;
5949
5950 gimple *pattern_stmt;
5951 lhs = gimple_assign_lhs (gs: last_stmt);
5952 rhs1 = gimple_assign_rhs1 (gs: last_stmt);
5953 rhs_code = gimple_assign_rhs_code (gs: last_stmt);
5954
5955 /* Check for cond expression requiring mask conversion. */
5956 if (rhs_code == COND_EXPR)
5957 {
5958 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
5959
5960 if (TREE_CODE (rhs1) == SSA_NAME)
5961 {
5962 rhs1_type = integer_type_for_mask (var: rhs1, vinfo);
5963 if (!rhs1_type)
5964 return NULL;
5965 }
5966 else if (COMPARISON_CLASS_P (rhs1))
5967 {
5968 /* Check whether we're comparing scalar booleans and (if so)
5969 whether a better mask type exists than the mask associated
5970 with boolean-sized elements. This avoids unnecessary packs
5971 and unpacks if the booleans are set from comparisons of
5972 wider types. E.g. in:
5973
5974 int x1, x2, x3, x4, y1, y1;
5975 ...
5976 bool b1 = (x1 == x2);
5977 bool b2 = (x3 == x4);
5978 ... = b1 == b2 ? y1 : y2;
5979
5980 it is better for b1 and b2 to use the mask type associated
5981 with int elements rather bool (byte) elements. */
5982 rhs1_op0 = TREE_OPERAND (rhs1, 0);
5983 rhs1_op1 = TREE_OPERAND (rhs1, 1);
5984 if (!rhs1_op0 || !rhs1_op1)
5985 return NULL;
5986 rhs1_op0_type = integer_type_for_mask (var: rhs1_op0, vinfo);
5987 rhs1_op1_type = integer_type_for_mask (var: rhs1_op1, vinfo);
5988
5989 if (!rhs1_op0_type)
5990 rhs1_type = TREE_TYPE (rhs1_op0);
5991 else if (!rhs1_op1_type)
5992 rhs1_type = TREE_TYPE (rhs1_op1);
5993 else if (TYPE_PRECISION (rhs1_op0_type)
5994 != TYPE_PRECISION (rhs1_op1_type))
5995 {
5996 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
5997 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
5998 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
5999 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
6000 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
6001 {
6002 if (abs (x: tmp0) > abs (x: tmp1))
6003 rhs1_type = rhs1_op1_type;
6004 else
6005 rhs1_type = rhs1_op0_type;
6006 }
6007 else
6008 rhs1_type = build_nonstandard_integer_type
6009 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
6010 }
6011 else
6012 rhs1_type = rhs1_op0_type;
6013 }
6014 else
6015 return NULL;
6016
6017 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6018
6019 if (!vectype1 || !vectype2)
6020 return NULL;
6021
6022 /* Continue if a conversion is needed. Also continue if we have
6023 a comparison whose vector type would normally be different from
6024 VECTYPE2 when considered in isolation. In that case we'll
6025 replace the comparison with an SSA name (so that we can record
6026 its vector type) and behave as though the comparison was an SSA
6027 name from the outset. */
6028 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
6029 TYPE_VECTOR_SUBPARTS (vectype2))
6030 && !rhs1_op0_type
6031 && !rhs1_op1_type)
6032 return NULL;
6033
6034 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6035 in place, we can handle it in vectorizable_condition. This avoids
6036 unnecessary promotion stmts and increased vectorization factor. */
6037 if (COMPARISON_CLASS_P (rhs1)
6038 && INTEGRAL_TYPE_P (rhs1_type)
6039 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
6040 TYPE_VECTOR_SUBPARTS (vectype2)))
6041 {
6042 enum vect_def_type dt;
6043 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6044 && dt == vect_external_def
6045 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6046 && (dt == vect_external_def
6047 || dt == vect_constant_def))
6048 {
6049 tree wide_scalar_type = build_nonstandard_integer_type
6050 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
6051 tree vectype3 = get_vectype_for_scalar_type (vinfo,
6052 wide_scalar_type);
6053 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
6054 return NULL;
6055 }
6056 }
6057
6058 /* If rhs1 is a comparison we need to move it into a
6059 separate statement. */
6060 if (TREE_CODE (rhs1) != SSA_NAME)
6061 {
6062 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
6063 if (rhs1_op0_type
6064 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
6065 rhs1_op0 = build_mask_conversion (vinfo, mask: rhs1_op0,
6066 vectype: vectype2, stmt_vinfo);
6067 if (rhs1_op1_type
6068 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
6069 rhs1_op1 = build_mask_conversion (vinfo, mask: rhs1_op1,
6070 vectype: vectype2, stmt_vinfo);
6071 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
6072 rhs1_op0, rhs1_op1);
6073 rhs1 = tmp;
6074 append_pattern_def_seq (vinfo, stmt_info: stmt_vinfo, new_stmt: pattern_stmt, vectype: vectype2,
6075 scalar_type_for_mask: rhs1_type);
6076 }
6077
6078 if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
6079 b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
6080 tmp = build_mask_conversion (vinfo, mask: rhs1, vectype: vectype1, stmt_vinfo);
6081 else
6082 tmp = rhs1;
6083
6084 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6085 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
6086 gimple_assign_rhs2 (gs: last_stmt),
6087 gimple_assign_rhs3 (gs: last_stmt));
6088
6089 *type_out = vectype1;
6090 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
6091
6092 return pattern_stmt;
6093 }
6094
6095 /* Now check for binary boolean operations requiring conversion for
6096 one of operands. */
6097 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6098 return NULL;
6099
6100 if (rhs_code != BIT_IOR_EXPR
6101 && rhs_code != BIT_XOR_EXPR
6102 && rhs_code != BIT_AND_EXPR
6103 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
6104 return NULL;
6105
6106 rhs2 = gimple_assign_rhs2 (gs: last_stmt);
6107
6108 rhs1_type = integer_type_for_mask (var: rhs1, vinfo);
6109 rhs2_type = integer_type_for_mask (var: rhs2, vinfo);
6110
6111 if (!rhs1_type || !rhs2_type
6112 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
6113 return NULL;
6114
6115 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
6116 {
6117 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
6118 if (!vectype1)
6119 return NULL;
6120 rhs2 = build_mask_conversion (vinfo, mask: rhs2, vectype: vectype1, stmt_vinfo);
6121 }
6122 else
6123 {
6124 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
6125 if (!vectype1)
6126 return NULL;
6127 rhs1 = build_mask_conversion (vinfo, mask: rhs1, vectype: vectype1, stmt_vinfo);
6128 }
6129
6130 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
6131 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
6132
6133 *type_out = vectype1;
6134 vect_pattern_detected (name: "vect_recog_mask_conversion_pattern", stmt: last_stmt);
6135
6136 return pattern_stmt;
6137}
6138
6139/* STMT_INFO is a load or store. If the load or store is conditional, return
6140 the boolean condition under which it occurs, otherwise return null. */
6141
6142static tree
6143vect_get_load_store_mask (stmt_vec_info stmt_info)
6144{
6145 if (gassign *def_assign = dyn_cast <gassign *> (p: stmt_info->stmt))
6146 {
6147 gcc_assert (gimple_assign_single_p (def_assign));
6148 return NULL_TREE;
6149 }
6150
6151 if (gcall *def_call = dyn_cast <gcall *> (p: stmt_info->stmt))
6152 {
6153 internal_fn ifn = gimple_call_internal_fn (gs: def_call);
6154 int mask_index = internal_fn_mask_index (ifn);
6155 return gimple_call_arg (gs: def_call, index: mask_index);
6156 }
6157
6158 gcc_unreachable ();
6159}
6160
6161/* Return MASK if MASK is suitable for masking an operation on vectors
6162 of type VECTYPE, otherwise convert it into such a form and return
6163 the result. Associate any conversion statements with STMT_INFO's
6164 pattern. */
6165
6166static tree
6167vect_convert_mask_for_vectype (tree mask, tree vectype,
6168 stmt_vec_info stmt_info, vec_info *vinfo)
6169{
6170 tree mask_type = integer_type_for_mask (var: mask, vinfo);
6171 if (mask_type)
6172 {
6173 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
6174 if (mask_vectype
6175 && maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype),
6176 b: TYPE_VECTOR_SUBPARTS (node: mask_vectype)))
6177 mask = build_mask_conversion (vinfo, mask, vectype, stmt_vinfo: stmt_info);
6178 }
6179 return mask;
6180}
6181
6182/* Return the equivalent of:
6183
6184 fold_convert (TYPE, VALUE)
6185
6186 with the expectation that the operation will be vectorized.
6187 If new statements are needed, add them as pattern statements
6188 to STMT_INFO. */
6189
6190static tree
6191vect_add_conversion_to_pattern (vec_info *vinfo,
6192 tree type, tree value, stmt_vec_info stmt_info)
6193{
6194 if (useless_type_conversion_p (type, TREE_TYPE (value)))
6195 return value;
6196
6197 tree new_value = vect_recog_temp_ssa_var (type, NULL);
6198 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
6199 append_pattern_def_seq (vinfo, stmt_info, new_stmt: conversion,
6200 vectype: get_vectype_for_scalar_type (vinfo, type));
6201 return new_value;
6202}
6203
6204/* Try to convert STMT_INFO into a call to a gather load or scatter store
6205 internal function. Return the final statement on success and set
6206 *TYPE_OUT to the vector type being loaded or stored.
6207
6208 This function only handles gathers and scatters that were recognized
6209 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6210
6211static gimple *
6212vect_recog_gather_scatter_pattern (vec_info *vinfo,
6213 stmt_vec_info stmt_info, tree *type_out)
6214{
6215 /* Currently we only support this for loop vectorization. */
6216 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6217 if (!loop_vinfo)
6218 return NULL;
6219
6220 /* Make sure that we're looking at a gather load or scatter store. */
6221 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
6222 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6223 return NULL;
6224
6225 /* Get the boolean that controls whether the load or store happens.
6226 This is null if the operation is unconditional. */
6227 tree mask = vect_get_load_store_mask (stmt_info);
6228
6229 /* Make sure that the target supports an appropriate internal
6230 function for the gather/scatter operation. */
6231 gather_scatter_info gs_info;
6232 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
6233 || gs_info.ifn == IFN_LAST)
6234 return NULL;
6235
6236 /* Convert the mask to the right form. */
6237 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
6238 gs_info.element_type);
6239 if (mask)
6240 mask = vect_convert_mask_for_vectype (mask, vectype: gs_vectype, stmt_info,
6241 vinfo: loop_vinfo);
6242 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
6243 || gs_info.ifn == IFN_MASK_GATHER_LOAD
6244 || gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE
6245 || gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
6246 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
6247
6248 /* Get the invariant base and non-invariant offset, converting the
6249 latter to the same width as the vector elements. */
6250 tree base = gs_info.base;
6251 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
6252 tree offset = vect_add_conversion_to_pattern (vinfo, type: offset_type,
6253 value: gs_info.offset, stmt_info);
6254
6255 /* Build the new pattern statement. */
6256 tree scale = size_int (gs_info.scale);
6257 gcall *pattern_stmt;
6258 if (DR_IS_READ (dr))
6259 {
6260 tree zero = build_zero_cst (gs_info.element_type);
6261 if (mask != NULL)
6262 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
6263 offset, scale, zero, mask);
6264 else
6265 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
6266 offset, scale, zero);
6267 tree load_lhs = vect_recog_temp_ssa_var (type: gs_info.element_type, NULL);
6268 gimple_call_set_lhs (gs: pattern_stmt, lhs: load_lhs);
6269 }
6270 else
6271 {
6272 tree rhs = vect_get_store_rhs (stmt_info);
6273 if (mask != NULL)
6274 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
6275 base, offset, scale, rhs,
6276 mask);
6277 else
6278 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
6279 base, offset, scale, rhs);
6280 }
6281 gimple_call_set_nothrow (s: pattern_stmt, nothrow_p: true);
6282
6283 /* Copy across relevant vectorization info and associate DR with the
6284 new pattern statement instead of the original statement. */
6285 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
6286 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
6287
6288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6289 *type_out = vectype;
6290 vect_pattern_detected (name: "gather/scatter pattern", stmt: stmt_info->stmt);
6291
6292 return pattern_stmt;
6293}
6294
6295/* Return true if TYPE is a non-boolean integer type. These are the types
6296 that we want to consider for narrowing. */
6297
6298static bool
6299vect_narrowable_type_p (tree type)
6300{
6301 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
6302}
6303
6304/* Return true if the operation given by CODE can be truncated to N bits
6305 when only N bits of the output are needed. This is only true if bit N+1
6306 of the inputs has no effect on the low N bits of the result. */
6307
6308static bool
6309vect_truncatable_operation_p (tree_code code)
6310{
6311 switch (code)
6312 {
6313 case NEGATE_EXPR:
6314 case PLUS_EXPR:
6315 case MINUS_EXPR:
6316 case MULT_EXPR:
6317 case BIT_NOT_EXPR:
6318 case BIT_AND_EXPR:
6319 case BIT_IOR_EXPR:
6320 case BIT_XOR_EXPR:
6321 case COND_EXPR:
6322 return true;
6323
6324 default:
6325 return false;
6326 }
6327}
6328
6329/* Record that STMT_INFO could be changed from operating on TYPE to
6330 operating on a type with the precision and sign given by PRECISION
6331 and SIGN respectively. PRECISION is an arbitrary bit precision;
6332 it might not be a whole number of bytes. */
6333
6334static void
6335vect_set_operation_type (stmt_vec_info stmt_info, tree type,
6336 unsigned int precision, signop sign)
6337{
6338 /* Round the precision up to a whole number of bytes. */
6339 precision = vect_element_precision (precision);
6340 if (precision < TYPE_PRECISION (type)
6341 && (!stmt_info->operation_precision
6342 || stmt_info->operation_precision > precision))
6343 {
6344 stmt_info->operation_precision = precision;
6345 stmt_info->operation_sign = sign;
6346 }
6347}
6348
6349/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6350 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6351 is an arbitrary bit precision; it might not be a whole number of bytes. */
6352
6353static void
6354vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
6355 unsigned int min_input_precision)
6356{
6357 /* This operation in isolation only requires the inputs to have
6358 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6359 that MIN_INPUT_PRECISION is a natural precision for the chain
6360 as a whole. E.g. consider something like:
6361
6362 unsigned short *x, *y;
6363 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6364
6365 The right shift can be done on unsigned chars, and only requires the
6366 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6367 approach would mean turning a natural chain of single-vector unsigned
6368 short operations into one that truncates "*x" and then extends
6369 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6370 operation and one vector for each unsigned char operation.
6371 This would be a significant pessimization.
6372
6373 Instead only propagate the maximum of this precision and the precision
6374 required by the users of the result. This means that we don't pessimize
6375 the case above but continue to optimize things like:
6376
6377 unsigned char *y;
6378 unsigned short *x;
6379 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6380
6381 Here we would truncate two vectors of *x to a single vector of
6382 unsigned chars and use single-vector unsigned char operations for
6383 everything else, rather than doing two unsigned short copies of
6384 "(*x & 0xf0) >> 4" and then truncating the result. */
6385 min_input_precision = MAX (min_input_precision,
6386 stmt_info->min_output_precision);
6387
6388 if (min_input_precision < TYPE_PRECISION (type)
6389 && (!stmt_info->min_input_precision
6390 || stmt_info->min_input_precision > min_input_precision))
6391 stmt_info->min_input_precision = min_input_precision;
6392}
6393
6394/* Subroutine of vect_determine_min_output_precision. Return true if
6395 we can calculate a reduced number of output bits for STMT_INFO,
6396 whose result is LHS. */
6397
6398static bool
6399vect_determine_min_output_precision_1 (vec_info *vinfo,
6400 stmt_vec_info stmt_info, tree lhs)
6401{
6402 /* Take the maximum precision required by users of the result. */
6403 unsigned int precision = 0;
6404 imm_use_iterator iter;
6405 use_operand_p use;
6406 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
6407 {
6408 gimple *use_stmt = USE_STMT (use);
6409 if (is_gimple_debug (gs: use_stmt))
6410 continue;
6411 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
6412 if (!use_stmt_info || !use_stmt_info->min_input_precision)
6413 return false;
6414 /* The input precision recorded for COND_EXPRs applies only to the
6415 "then" and "else" values. */
6416 gassign *assign = dyn_cast <gassign *> (p: stmt_info->stmt);
6417 if (assign
6418 && gimple_assign_rhs_code (gs: assign) == COND_EXPR
6419 && use->use != gimple_assign_rhs2_ptr (gs: assign)
6420 && use->use != gimple_assign_rhs3_ptr (gs: assign))
6421 return false;
6422 precision = MAX (precision, use_stmt_info->min_input_precision);
6423 }
6424
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_NOTE, vect_location,
6427 "only the low %d bits of %T are significant\n",
6428 precision, lhs);
6429 stmt_info->min_output_precision = precision;
6430 return true;
6431}
6432
6433/* Calculate min_output_precision for STMT_INFO. */
6434
6435static void
6436vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6437{
6438 /* We're only interested in statements with a narrowable result. */
6439 tree lhs = gimple_get_lhs (stmt_info->stmt);
6440 if (!lhs
6441 || TREE_CODE (lhs) != SSA_NAME
6442 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
6443 return;
6444
6445 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
6446 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
6447}
6448
6449/* Use range information to decide whether STMT (described by STMT_INFO)
6450 could be done in a narrower type. This is effectively a forward
6451 propagation, since it uses context-independent information that applies
6452 to all users of an SSA name. */
6453
6454static void
6455vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
6456{
6457 tree lhs = gimple_assign_lhs (gs: stmt);
6458 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
6459 return;
6460
6461 tree type = TREE_TYPE (lhs);
6462 if (!vect_narrowable_type_p (type))
6463 return;
6464
6465 /* First see whether we have any useful range information for the result. */
6466 unsigned int precision = TYPE_PRECISION (type);
6467 signop sign = TYPE_SIGN (type);
6468 wide_int min_value, max_value;
6469 if (!vect_get_range_info (var: lhs, min_value: &min_value, max_value: &max_value))
6470 return;
6471
6472 tree_code code = gimple_assign_rhs_code (gs: stmt);
6473 unsigned int nops = gimple_num_ops (gs: stmt);
6474
6475 if (!vect_truncatable_operation_p (code))
6476 {
6477 /* Handle operations that can be computed in type T if all inputs
6478 and outputs can be represented in type T. Also handle left and
6479 right shifts, where (in addition) the maximum shift amount must
6480 be less than the number of bits in T. */
6481 bool is_shift;
6482 switch (code)
6483 {
6484 case LSHIFT_EXPR:
6485 case RSHIFT_EXPR:
6486 is_shift = true;
6487 break;
6488
6489 case ABS_EXPR:
6490 case MIN_EXPR:
6491 case MAX_EXPR:
6492 case TRUNC_DIV_EXPR:
6493 case CEIL_DIV_EXPR:
6494 case FLOOR_DIV_EXPR:
6495 case ROUND_DIV_EXPR:
6496 case EXACT_DIV_EXPR:
6497 /* Modulus is excluded because it is typically calculated by doing
6498 a division, for which minimum signed / -1 isn't representable in
6499 the original signed type. We could take the division range into
6500 account instead, if handling modulus ever becomes important. */
6501 is_shift = false;
6502 break;
6503
6504 default:
6505 return;
6506 }
6507 for (unsigned int i = 1; i < nops; ++i)
6508 {
6509 tree op = gimple_op (gs: stmt, i);
6510 wide_int op_min_value, op_max_value;
6511 if (TREE_CODE (op) == INTEGER_CST)
6512 {
6513 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
6514 op_min_value = op_max_value = wi::to_wide (t: op, prec: op_precision);
6515 }
6516 else if (TREE_CODE (op) == SSA_NAME)
6517 {
6518 if (!vect_get_range_info (var: op, min_value: &op_min_value, max_value: &op_max_value))
6519 return;
6520 }
6521 else
6522 return;
6523
6524 if (is_shift && i == 2)
6525 {
6526 /* There needs to be one more bit than the maximum shift amount.
6527
6528 If the maximum shift amount is already 1 less than PRECISION
6529 then we can't narrow the shift further. Dealing with that
6530 case first ensures that we can safely use an unsigned range
6531 below.
6532
6533 op_min_value isn't relevant, since shifts by negative amounts
6534 are UB. */
6535 if (wi::geu_p (x: op_max_value, y: precision - 1))
6536 return;
6537 unsigned int min_bits = op_max_value.to_uhwi () + 1;
6538
6539 /* As explained below, we can convert a signed shift into an
6540 unsigned shift if the sign bit is always clear. At this
6541 point we've already processed the ranges of the output and
6542 the first input. */
6543 auto op_sign = sign;
6544 if (sign == SIGNED && !wi::neg_p (x: min_value))
6545 op_sign = UNSIGNED;
6546 op_min_value = wide_int::from (x: wi::min_value (min_bits, op_sign),
6547 precision, sgn: op_sign);
6548 op_max_value = wide_int::from (x: wi::max_value (min_bits, op_sign),
6549 precision, sgn: op_sign);
6550 }
6551 min_value = wi::min (x: min_value, y: op_min_value, sgn: sign);
6552 max_value = wi::max (x: max_value, y: op_max_value, sgn: sign);
6553 }
6554 }
6555
6556 /* Try to switch signed types for unsigned types if we can.
6557 This is better for two reasons. First, unsigned ops tend
6558 to be cheaper than signed ops. Second, it means that we can
6559 handle things like:
6560
6561 signed char c;
6562 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6563
6564 as:
6565
6566 signed char c;
6567 unsigned short res_1 = (unsigned short) c & 0xff00;
6568 int res = (int) res_1;
6569
6570 where the intermediate result res_1 has unsigned rather than
6571 signed type. */
6572 if (sign == SIGNED && !wi::neg_p (x: min_value))
6573 sign = UNSIGNED;
6574
6575 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6576 unsigned int precision1 = wi::min_precision (x: min_value, sgn: sign);
6577 unsigned int precision2 = wi::min_precision (x: max_value, sgn: sign);
6578 unsigned int value_precision = MAX (precision1, precision2);
6579 if (value_precision >= precision)
6580 return;
6581
6582 if (dump_enabled_p ())
6583 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6584 " without loss of precision: %G",
6585 sign == SIGNED ? "signed" : "unsigned",
6586 value_precision, (gimple *) stmt);
6587
6588 vect_set_operation_type (stmt_info, type, precision: value_precision, sign);
6589 vect_set_min_input_precision (stmt_info, type, min_input_precision: value_precision);
6590}
6591
6592/* Use information about the users of STMT's result to decide whether
6593 STMT (described by STMT_INFO) could be done in a narrower type.
6594 This is effectively a backward propagation. */
6595
6596static void
6597vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
6598{
6599 tree_code code = gimple_assign_rhs_code (gs: stmt);
6600 unsigned int opno = (code == COND_EXPR ? 2 : 1);
6601 tree type = TREE_TYPE (gimple_op (stmt, opno));
6602 if (!vect_narrowable_type_p (type))
6603 return;
6604
6605 unsigned int precision = TYPE_PRECISION (type);
6606 unsigned int operation_precision, min_input_precision;
6607 switch (code)
6608 {
6609 CASE_CONVERT:
6610 /* Only the bits that contribute to the output matter. Don't change
6611 the precision of the operation itself. */
6612 operation_precision = precision;
6613 min_input_precision = stmt_info->min_output_precision;
6614 break;
6615
6616 case LSHIFT_EXPR:
6617 case RSHIFT_EXPR:
6618 {
6619 tree shift = gimple_assign_rhs2 (gs: stmt);
6620 if (TREE_CODE (shift) != INTEGER_CST
6621 || !wi::ltu_p (x: wi::to_widest (t: shift), y: precision))
6622 return;
6623 unsigned int const_shift = TREE_INT_CST_LOW (shift);
6624 if (code == LSHIFT_EXPR)
6625 {
6626 /* Avoid creating an undefined shift.
6627
6628 ??? We could instead use min_output_precision as-is and
6629 optimize out-of-range shifts to zero. However, only
6630 degenerate testcases shift away all their useful input data,
6631 and it isn't natural to drop input operations in the middle
6632 of vectorization. This sort of thing should really be
6633 handled before vectorization. */
6634 operation_precision = MAX (stmt_info->min_output_precision,
6635 const_shift + 1);
6636 /* We need CONST_SHIFT fewer bits of the input. */
6637 min_input_precision = (MAX (operation_precision, const_shift)
6638 - const_shift);
6639 }
6640 else
6641 {
6642 /* We need CONST_SHIFT extra bits to do the operation. */
6643 operation_precision = (stmt_info->min_output_precision
6644 + const_shift);
6645 min_input_precision = operation_precision;
6646 }
6647 break;
6648 }
6649
6650 default:
6651 if (vect_truncatable_operation_p (code))
6652 {
6653 /* Input bit N has no effect on output bits N-1 and lower. */
6654 operation_precision = stmt_info->min_output_precision;
6655 min_input_precision = operation_precision;
6656 break;
6657 }
6658 return;
6659 }
6660
6661 if (operation_precision < precision)
6662 {
6663 if (dump_enabled_p ())
6664 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
6665 " without affecting users: %G",
6666 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
6667 operation_precision, (gimple *) stmt);
6668 vect_set_operation_type (stmt_info, type, precision: operation_precision,
6669 TYPE_SIGN (type));
6670 }
6671 vect_set_min_input_precision (stmt_info, type, min_input_precision);
6672}
6673
6674/* Return true if the statement described by STMT_INFO sets a boolean
6675 SSA_NAME and if we know how to vectorize this kind of statement using
6676 vector mask types. */
6677
6678static bool
6679possible_vector_mask_operation_p (stmt_vec_info stmt_info)
6680{
6681 tree lhs = gimple_get_lhs (stmt_info->stmt);
6682 tree_code code = ERROR_MARK;
6683 gassign *assign = NULL;
6684 gcond *cond = NULL;
6685
6686 if ((assign = dyn_cast <gassign *> (p: stmt_info->stmt)))
6687 code = gimple_assign_rhs_code (gs: assign);
6688 else if ((cond = dyn_cast <gcond *> (p: stmt_info->stmt)))
6689 {
6690 lhs = gimple_cond_lhs (gs: cond);
6691 code = gimple_cond_code (gs: cond);
6692 }
6693
6694 if (!lhs
6695 || TREE_CODE (lhs) != SSA_NAME
6696 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
6697 return false;
6698
6699 if (code != ERROR_MARK)
6700 {
6701 switch (code)
6702 {
6703 CASE_CONVERT:
6704 case SSA_NAME:
6705 case BIT_NOT_EXPR:
6706 case BIT_IOR_EXPR:
6707 case BIT_XOR_EXPR:
6708 case BIT_AND_EXPR:
6709 return true;
6710
6711 default:
6712 return TREE_CODE_CLASS (code) == tcc_comparison;
6713 }
6714 }
6715 else if (is_a <gphi *> (p: stmt_info->stmt))
6716 return true;
6717 return false;
6718}
6719
6720/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6721 a vector mask type instead of a normal vector type. Record the
6722 result in STMT_INFO->mask_precision. */
6723
6724static void
6725vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
6726{
6727 if (!possible_vector_mask_operation_p (stmt_info))
6728 return;
6729
6730 /* If at least one boolean input uses a vector mask type,
6731 pick the mask type with the narrowest elements.
6732
6733 ??? This is the traditional behavior. It should always produce
6734 the smallest number of operations, but isn't necessarily the
6735 optimal choice. For example, if we have:
6736
6737 a = b & c
6738
6739 where:
6740
6741 - the user of a wants it to have a mask type for 16-bit elements (M16)
6742 - b also uses M16
6743 - c uses a mask type for 8-bit elements (M8)
6744
6745 then picking M8 gives:
6746
6747 - 1 M16->M8 pack for b
6748 - 1 M8 AND for a
6749 - 2 M8->M16 unpacks for the user of a
6750
6751 whereas picking M16 would have given:
6752
6753 - 2 M8->M16 unpacks for c
6754 - 2 M16 ANDs for a
6755
6756 The number of operations are equal, but M16 would have given
6757 a shorter dependency chain and allowed more ILP. */
6758 unsigned int precision = ~0U;
6759 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6760
6761 /* If the statement compares two values that shouldn't use vector masks,
6762 try comparing the values as normal scalars instead. */
6763 tree_code code = ERROR_MARK;
6764 tree op0_type;
6765 unsigned int nops = -1;
6766 unsigned int ops_start = 0;
6767
6768 if (gassign *assign = dyn_cast <gassign *> (p: stmt))
6769 {
6770 code = gimple_assign_rhs_code (gs: assign);
6771 op0_type = TREE_TYPE (gimple_assign_rhs1 (assign));
6772 nops = gimple_num_ops (gs: assign);
6773 ops_start = 1;
6774 }
6775 else if (gcond *cond = dyn_cast <gcond *> (p: stmt))
6776 {
6777 code = gimple_cond_code (gs: cond);
6778 op0_type = TREE_TYPE (gimple_cond_lhs (cond));
6779 nops = 2;
6780 ops_start = 0;
6781 }
6782
6783 if (code != ERROR_MARK)
6784 {
6785 for (unsigned int i = ops_start; i < nops; ++i)
6786 {
6787 tree rhs = gimple_op (gs: stmt, i);
6788 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
6789 continue;
6790
6791 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6792 if (!def_stmt_info)
6793 /* Don't let external or constant operands influence the choice.
6794 We can convert them to whichever vector type we pick. */
6795 continue;
6796
6797 if (def_stmt_info->mask_precision)
6798 {
6799 if (precision > def_stmt_info->mask_precision)
6800 precision = def_stmt_info->mask_precision;
6801 }
6802 }
6803
6804 if (precision == ~0U
6805 && TREE_CODE_CLASS (code) == tcc_comparison)
6806 {
6807 scalar_mode mode;
6808 tree vectype, mask_type;
6809 if (is_a <scalar_mode> (TYPE_MODE (op0_type), result: &mode)
6810 && (vectype = get_vectype_for_scalar_type (vinfo, op0_type))
6811 && (mask_type = get_mask_type_for_scalar_type (vinfo, op0_type))
6812 && expand_vec_cmp_expr_p (vectype, mask_type, code))
6813 precision = GET_MODE_BITSIZE (mode);
6814 }
6815 }
6816 else
6817 {
6818 gphi *phi = as_a <gphi *> (p: stmt_info->stmt);
6819 for (unsigned i = 0; i < gimple_phi_num_args (gs: phi); ++i)
6820 {
6821 tree rhs = gimple_phi_arg_def (gs: phi, index: i);
6822
6823 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
6824 if (!def_stmt_info)
6825 /* Don't let external or constant operands influence the choice.
6826 We can convert them to whichever vector type we pick. */
6827 continue;
6828
6829 if (def_stmt_info->mask_precision)
6830 {
6831 if (precision > def_stmt_info->mask_precision)
6832 precision = def_stmt_info->mask_precision;
6833 }
6834 }
6835 }
6836
6837 if (dump_enabled_p ())
6838 {
6839 if (precision == ~0U)
6840 dump_printf_loc (MSG_NOTE, vect_location,
6841 "using normal nonmask vectors for %G",
6842 stmt_info->stmt);
6843 else
6844 dump_printf_loc (MSG_NOTE, vect_location,
6845 "using boolean precision %d for %G",
6846 precision, stmt_info->stmt);
6847 }
6848
6849 stmt_info->mask_precision = precision;
6850}
6851
6852/* Handle vect_determine_precisions for STMT_INFO, given that we
6853 have already done so for the users of its result. */
6854
6855void
6856vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
6857{
6858 vect_determine_min_output_precision (vinfo, stmt_info);
6859 if (gassign *stmt = dyn_cast <gassign *> (p: stmt_info->stmt))
6860 {
6861 vect_determine_precisions_from_range (stmt_info, stmt);
6862 vect_determine_precisions_from_users (stmt_info, stmt);
6863 }
6864}
6865
6866/* Walk backwards through the vectorizable region to determine the
6867 values of these fields:
6868
6869 - min_output_precision
6870 - min_input_precision
6871 - operation_precision
6872 - operation_sign. */
6873
6874void
6875vect_determine_precisions (vec_info *vinfo)
6876{
6877 DUMP_VECT_SCOPE ("vect_determine_precisions");
6878
6879 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
6880 {
6881 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6882 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
6883 unsigned int nbbs = loop->num_nodes;
6884
6885 for (unsigned int i = 0; i < nbbs; i++)
6886 {
6887 basic_block bb = bbs[i];
6888 for (auto gsi = gsi_start_phis (bb);
6889 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6890 {
6891 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6892 if (stmt_info)
6893 vect_determine_mask_precision (vinfo, stmt_info);
6894 }
6895 for (auto si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
6896 if (!is_gimple_debug (gs: gsi_stmt (i: si)))
6897 vect_determine_mask_precision
6898 (vinfo, stmt_info: vinfo->lookup_stmt (gsi_stmt (i: si)));
6899 }
6900 for (unsigned int i = 0; i < nbbs; i++)
6901 {
6902 basic_block bb = bbs[nbbs - i - 1];
6903 for (gimple_stmt_iterator si = gsi_last_bb (bb);
6904 !gsi_end_p (i: si); gsi_prev (i: &si))
6905 if (!is_gimple_debug (gs: gsi_stmt (i: si)))
6906 vect_determine_stmt_precisions
6907 (vinfo, stmt_info: vinfo->lookup_stmt (gsi_stmt (i: si)));
6908 for (auto gsi = gsi_start_phis (bb);
6909 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6910 {
6911 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6912 if (stmt_info)
6913 vect_determine_stmt_precisions (vinfo, stmt_info);
6914 }
6915 }
6916 }
6917 else
6918 {
6919 bb_vec_info bb_vinfo = as_a <bb_vec_info> (p: vinfo);
6920 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6921 {
6922 basic_block bb = bb_vinfo->bbs[i];
6923 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6924 {
6925 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6926 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6927 vect_determine_mask_precision (vinfo, stmt_info);
6928 }
6929 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6930 {
6931 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: gsi));
6932 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6933 vect_determine_mask_precision (vinfo, stmt_info);
6934 }
6935 }
6936 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
6937 {
6938 for (gimple_stmt_iterator gsi = gsi_last_bb (bb: bb_vinfo->bbs[i]);
6939 !gsi_end_p (i: gsi); gsi_prev (i: &gsi))
6940 {
6941 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: gsi));
6942 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6943 vect_determine_stmt_precisions (vinfo, stmt_info);
6944 }
6945 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
6946 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
6947 {
6948 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
6949 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
6950 vect_determine_stmt_precisions (vinfo, stmt_info);
6951 }
6952 }
6953 }
6954}
6955
6956typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
6957
6958struct vect_recog_func
6959{
6960 vect_recog_func_ptr fn;
6961 const char *name;
6962};
6963
6964/* Note that ordering matters - the first pattern matching on a stmt is
6965 taken which means usually the more complex one needs to preceed the
6966 less comples onex (widen_sum only after dot_prod or sad for example). */
6967static vect_recog_func vect_vect_recog_func_ptrs[] = {
6968 { .fn: vect_recog_bitfield_ref_pattern, .name: "bitfield_ref" },
6969 { .fn: vect_recog_bit_insert_pattern, .name: "bit_insert" },
6970 { .fn: vect_recog_abd_pattern, .name: "abd" },
6971 { .fn: vect_recog_over_widening_pattern, .name: "over_widening" },
6972 /* Must come after over_widening, which narrows the shift as much as
6973 possible beforehand. */
6974 { .fn: vect_recog_average_pattern, .name: "average" },
6975 { .fn: vect_recog_cond_expr_convert_pattern, .name: "cond_expr_convert" },
6976 { .fn: vect_recog_mulhs_pattern, .name: "mult_high" },
6977 { .fn: vect_recog_cast_forwprop_pattern, .name: "cast_forwprop" },
6978 { .fn: vect_recog_widen_mult_pattern, .name: "widen_mult" },
6979 { .fn: vect_recog_dot_prod_pattern, .name: "dot_prod" },
6980 { .fn: vect_recog_sad_pattern, .name: "sad" },
6981 { .fn: vect_recog_widen_sum_pattern, .name: "widen_sum" },
6982 { .fn: vect_recog_pow_pattern, .name: "pow" },
6983 { .fn: vect_recog_popcount_clz_ctz_ffs_pattern, .name: "popcount_clz_ctz_ffs" },
6984 { .fn: vect_recog_ctz_ffs_pattern, .name: "ctz_ffs" },
6985 { .fn: vect_recog_widen_shift_pattern, .name: "widen_shift" },
6986 { .fn: vect_recog_rotate_pattern, .name: "rotate" },
6987 { .fn: vect_recog_vector_vector_shift_pattern, .name: "vector_vector_shift" },
6988 { .fn: vect_recog_divmod_pattern, .name: "divmod" },
6989 { .fn: vect_recog_mult_pattern, .name: "mult" },
6990 { .fn: vect_recog_mixed_size_cond_pattern, .name: "mixed_size_cond" },
6991 { .fn: vect_recog_gcond_pattern, .name: "gcond" },
6992 { .fn: vect_recog_bool_pattern, .name: "bool" },
6993 /* This must come before mask conversion, and includes the parts
6994 of mask conversion that are needed for gather and scatter
6995 internal functions. */
6996 { .fn: vect_recog_gather_scatter_pattern, .name: "gather_scatter" },
6997 { .fn: vect_recog_mask_conversion_pattern, .name: "mask_conversion" },
6998 { .fn: vect_recog_widen_plus_pattern, .name: "widen_plus" },
6999 { .fn: vect_recog_widen_minus_pattern, .name: "widen_minus" },
7000 { .fn: vect_recog_widen_abd_pattern, .name: "widen_abd" },
7001 /* These must come after the double widening ones. */
7002};
7003
7004const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
7005
7006/* Mark statements that are involved in a pattern. */
7007
7008void
7009vect_mark_pattern_stmts (vec_info *vinfo,
7010 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
7011 tree pattern_vectype)
7012{
7013 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
7014 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7015
7016 gimple *orig_pattern_stmt = NULL;
7017 if (is_pattern_stmt_p (stmt_info: orig_stmt_info))
7018 {
7019 /* We're replacing a statement in an existing pattern definition
7020 sequence. */
7021 orig_pattern_stmt = orig_stmt_info->stmt;
7022 if (dump_enabled_p ())
7023 dump_printf_loc (MSG_NOTE, vect_location,
7024 "replacing earlier pattern %G", orig_pattern_stmt);
7025
7026 /* To keep the book-keeping simple, just swap the lhs of the
7027 old and new statements, so that the old one has a valid but
7028 unused lhs. */
7029 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
7030 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
7031 gimple_set_lhs (pattern_stmt, old_lhs);
7032
7033 if (dump_enabled_p ())
7034 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
7035
7036 /* Switch to the statement that ORIG replaces. */
7037 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
7038
7039 /* We shouldn't be replacing the main pattern statement. */
7040 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
7041 != orig_pattern_stmt);
7042 }
7043
7044 if (def_seq)
7045 for (gimple_stmt_iterator si = gsi_start (seq&: def_seq);
7046 !gsi_end_p (i: si); gsi_next (i: &si))
7047 {
7048 if (dump_enabled_p ())
7049 dump_printf_loc (MSG_NOTE, vect_location,
7050 "extra pattern stmt: %G", gsi_stmt (i: si));
7051 stmt_vec_info pattern_stmt_info
7052 = vect_init_pattern_stmt (vinfo, pattern_stmt: gsi_stmt (i: si),
7053 orig_stmt_info, vectype: pattern_vectype);
7054 /* Stmts in the def sequence are not vectorizable cycle or
7055 induction defs, instead they should all be vect_internal_def
7056 feeding the main pattern stmt which retains this def type. */
7057 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
7058 }
7059
7060 if (orig_pattern_stmt)
7061 {
7062 vect_init_pattern_stmt (vinfo, pattern_stmt,
7063 orig_stmt_info, vectype: pattern_vectype);
7064
7065 /* Insert all the new pattern statements before the original one. */
7066 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
7067 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
7068 orig_def_seq);
7069 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
7070 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
7071
7072 /* Remove the pattern statement that this new pattern replaces. */
7073 gsi_remove (&gsi, false);
7074 }
7075 else
7076 vect_set_pattern_stmt (vinfo,
7077 pattern_stmt, orig_stmt_info, vectype: pattern_vectype);
7078
7079 /* For any conditionals mark them as vect_condition_def. */
7080 if (is_a <gcond *> (p: pattern_stmt))
7081 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info)) = vect_condition_def;
7082
7083 /* Transfer reduction path info to the pattern. */
7084 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
7085 {
7086 gimple_match_op op;
7087 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
7088 gcc_unreachable ();
7089 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
7090 /* Search the pattern def sequence and the main pattern stmt. Note
7091 we may have inserted all into a containing pattern def sequence
7092 so the following is a bit awkward. */
7093 gimple_stmt_iterator si;
7094 gimple *s;
7095 if (def_seq)
7096 {
7097 si = gsi_start (seq&: def_seq);
7098 s = gsi_stmt (i: si);
7099 gsi_next (i: &si);
7100 }
7101 else
7102 {
7103 si = gsi_none ();
7104 s = pattern_stmt;
7105 }
7106 do
7107 {
7108 bool found = false;
7109 if (gimple_extract_op (s, &op))
7110 for (unsigned i = 0; i < op.num_ops; ++i)
7111 if (op.ops[i] == lookfor)
7112 {
7113 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
7114 lookfor = gimple_get_lhs (s);
7115 found = true;
7116 break;
7117 }
7118 if (s == pattern_stmt)
7119 {
7120 if (!found && dump_enabled_p ())
7121 dump_printf_loc (MSG_NOTE, vect_location,
7122 "failed to update reduction index.\n");
7123 break;
7124 }
7125 if (gsi_end_p (i: si))
7126 s = pattern_stmt;
7127 else
7128 {
7129 s = gsi_stmt (i: si);
7130 if (s == pattern_stmt)
7131 /* Found the end inside a bigger pattern def seq. */
7132 si = gsi_none ();
7133 else
7134 gsi_next (i: &si);
7135 }
7136 } while (1);
7137 }
7138}
7139
7140/* Function vect_pattern_recog_1
7141
7142 Input:
7143 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7144 computation pattern.
7145 STMT_INFO: A stmt from which the pattern search should start.
7146
7147 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7148 a sequence of statements that has the same functionality and can be
7149 used to replace STMT_INFO. It returns the last statement in the sequence
7150 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7151 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7152 statement, having first checked that the target supports the new operation
7153 in that type.
7154
7155 This function also does some bookkeeping, as explained in the documentation
7156 for vect_recog_pattern. */
7157
7158static void
7159vect_pattern_recog_1 (vec_info *vinfo,
7160 vect_recog_func *recog_func, stmt_vec_info stmt_info)
7161{
7162 gimple *pattern_stmt;
7163 loop_vec_info loop_vinfo;
7164 tree pattern_vectype;
7165
7166 /* If this statement has already been replaced with pattern statements,
7167 leave the original statement alone, since the first match wins.
7168 Instead try to match against the definition statements that feed
7169 the main pattern statement. */
7170 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7171 {
7172 gimple_stmt_iterator gsi;
7173 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7174 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7175 vect_pattern_recog_1 (vinfo, recog_func,
7176 stmt_info: vinfo->lookup_stmt (gsi_stmt (i: gsi)));
7177 return;
7178 }
7179
7180 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7181 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
7182 if (!pattern_stmt)
7183 {
7184 /* Clear any half-formed pattern definition sequence. */
7185 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
7186 return;
7187 }
7188
7189 loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7190
7191 /* Found a vectorizable pattern. */
7192 if (dump_enabled_p ())
7193 dump_printf_loc (MSG_NOTE, vect_location,
7194 "%s pattern recognized: %G",
7195 recog_func->name, pattern_stmt);
7196
7197 /* Mark the stmts that are involved in the pattern. */
7198 vect_mark_pattern_stmts (vinfo, orig_stmt_info: stmt_info, pattern_stmt, pattern_vectype);
7199
7200 /* Patterns cannot be vectorized using SLP, because they change the order of
7201 computation. */
7202 if (loop_vinfo)
7203 {
7204 unsigned ix, ix2;
7205 stmt_vec_info *elem_ptr;
7206 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
7207 elem_ptr, *elem_ptr == stmt_info);
7208 }
7209}
7210
7211
7212/* Function vect_pattern_recog
7213
7214 Input:
7215 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7216 computation idioms.
7217
7218 Output - for each computation idiom that is detected we create a new stmt
7219 that provides the same functionality and that can be vectorized. We
7220 also record some information in the struct_stmt_info of the relevant
7221 stmts, as explained below:
7222
7223 At the entry to this function we have the following stmts, with the
7224 following initial value in the STMT_VINFO fields:
7225
7226 stmt in_pattern_p related_stmt vec_stmt
7227 S1: a_i = .... - - -
7228 S2: a_2 = ..use(a_i).. - - -
7229 S3: a_1 = ..use(a_2).. - - -
7230 S4: a_0 = ..use(a_1).. - - -
7231 S5: ... = ..use(a_0).. - - -
7232
7233 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7234 represented by a single stmt. We then:
7235 - create a new stmt S6 equivalent to the pattern (the stmt is not
7236 inserted into the code)
7237 - fill in the STMT_VINFO fields as follows:
7238
7239 in_pattern_p related_stmt vec_stmt
7240 S1: a_i = .... - - -
7241 S2: a_2 = ..use(a_i).. - - -
7242 S3: a_1 = ..use(a_2).. - - -
7243 S4: a_0 = ..use(a_1).. true S6 -
7244 '---> S6: a_new = .... - S4 -
7245 S5: ... = ..use(a_0).. - - -
7246
7247 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7248 to each other through the RELATED_STMT field).
7249
7250 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7251 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7252 remain irrelevant unless used by stmts other than S4.
7253
7254 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7255 (because they are marked as irrelevant). It will vectorize S6, and record
7256 a pointer to the new vector stmt VS6 from S6 (as usual).
7257 S4 will be skipped, and S5 will be vectorized as usual:
7258
7259 in_pattern_p related_stmt vec_stmt
7260 S1: a_i = .... - - -
7261 S2: a_2 = ..use(a_i).. - - -
7262 S3: a_1 = ..use(a_2).. - - -
7263 > VS6: va_new = .... - - -
7264 S4: a_0 = ..use(a_1).. true S6 VS6
7265 '---> S6: a_new = .... - S4 VS6
7266 > VS5: ... = ..vuse(va_new).. - - -
7267 S5: ... = ..use(a_0).. - - -
7268
7269 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7270 elsewhere), and we'll end up with:
7271
7272 VS6: va_new = ....
7273 VS5: ... = ..vuse(va_new)..
7274
7275 In case of more than one pattern statements, e.g., widen-mult with
7276 intermediate type:
7277
7278 S1 a_t = ;
7279 S2 a_T = (TYPE) a_t;
7280 '--> S3: a_it = (interm_type) a_t;
7281 S4 prod_T = a_T * CONST;
7282 '--> S5: prod_T' = a_it w* CONST;
7283
7284 there may be other users of a_T outside the pattern. In that case S2 will
7285 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7286 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7287 be recorded in S3. */
7288
7289void
7290vect_pattern_recog (vec_info *vinfo)
7291{
7292 class loop *loop;
7293 basic_block *bbs;
7294 unsigned int nbbs;
7295 gimple_stmt_iterator si;
7296 unsigned int i, j;
7297
7298 vect_determine_precisions (vinfo);
7299
7300 DUMP_VECT_SCOPE ("vect_pattern_recog");
7301
7302 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
7303 {
7304 loop = LOOP_VINFO_LOOP (loop_vinfo);
7305 bbs = LOOP_VINFO_BBS (loop_vinfo);
7306 nbbs = loop->num_nodes;
7307
7308 /* Scan through the loop stmts, applying the pattern recognition
7309 functions starting at each stmt visited: */
7310 for (i = 0; i < nbbs; i++)
7311 {
7312 basic_block bb = bbs[i];
7313 for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
7314 {
7315 if (is_gimple_debug (gs: gsi_stmt (i: si)))
7316 continue;
7317 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (i: si));
7318 /* Scan over all generic vect_recog_xxx_pattern functions. */
7319 for (j = 0; j < NUM_PATTERNS; j++)
7320 vect_pattern_recog_1 (vinfo, recog_func: &vect_vect_recog_func_ptrs[j],
7321 stmt_info);
7322 }
7323 }
7324 }
7325 else
7326 {
7327 bb_vec_info bb_vinfo = as_a <bb_vec_info> (p: vinfo);
7328 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
7329 for (gimple_stmt_iterator gsi = gsi_start_bb (bb: bb_vinfo->bbs[i]);
7330 !gsi_end_p (i: gsi); gsi_next (i: &gsi))
7331 {
7332 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (i: gsi));
7333 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
7334 continue;
7335
7336 /* Scan over all generic vect_recog_xxx_pattern functions. */
7337 for (j = 0; j < NUM_PATTERNS; j++)
7338 vect_pattern_recog_1 (vinfo,
7339 recog_func: &vect_vect_recog_func_ptrs[j], stmt_info);
7340 }
7341 }
7342
7343 /* After this no more add_stmt calls are allowed. */
7344 vinfo->stmt_vec_info_ro = true;
7345}
7346
7347/* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7348 or internal_fn contained in ch, respectively. */
7349gimple *
7350vect_gimple_build (tree lhs, code_helper ch, tree op0, tree op1)
7351{
7352 gcc_assert (op0 != NULL_TREE);
7353 if (ch.is_tree_code ())
7354 return gimple_build_assign (lhs, (tree_code) ch, op0, op1);
7355
7356 gcc_assert (ch.is_internal_fn ());
7357 gimple* stmt = gimple_build_call_internal (as_internal_fn (code: (combined_fn) ch),
7358 op1 == NULL_TREE ? 1 : 2,
7359 op0, op1);
7360 gimple_call_set_lhs (gs: stmt, lhs);
7361 return stmt;
7362}
7363

source code of gcc/tree-vect-patterns.cc