1/* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "gimple.h"
28#include "ssa.h"
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
33#include "fold-const.h"
34#include "stor-layout.h"
35#include "tree-eh.h"
36#include "gimplify.h"
37#include "gimple-iterator.h"
38#include "cfgloop.h"
39#include "tree-vectorizer.h"
40#include "dumpfile.h"
41#include "builtins.h"
42#include "internal-fn.h"
43#include "case-cfn-macros.h"
44
45/* Pattern recognition functions */
46static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
64
65static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
67
68static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72
73struct vect_recog_func
74{
75 vect_recog_func_ptr fn;
76 const char *name;
77};
78
79/* Note that ordering matters - the first pattern matching on a stmt
80 is taken which means usually the more complex one needs to preceed
81 the less comples onex (widen_sum only after dot_prod or sad for example). */
82static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
83 { vect_recog_widen_mult_pattern, "widen_mult" },
84 { vect_recog_dot_prod_pattern, "dot_prod" },
85 { vect_recog_sad_pattern, "sad" },
86 { vect_recog_widen_sum_pattern, "widen_sum" },
87 { vect_recog_pow_pattern, "pow" },
88 { vect_recog_widen_shift_pattern, "widen_shift" },
89 { vect_recog_over_widening_pattern, "over_widening" },
90 { vect_recog_rotate_pattern, "rotate" },
91 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
92 { vect_recog_divmod_pattern, "divmod" },
93 { vect_recog_mult_pattern, "mult" },
94 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
95 { vect_recog_bool_pattern, "bool" },
96 { vect_recog_mask_conversion_pattern, "mask_conversion" }
97};
98
99static inline void
100append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
101{
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
104}
105
106static inline void
107new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
108{
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
111}
112
113/* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
117
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
120
121static bool
122vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
123{
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
126}
127
128/* If the LHS of DEF_STMT has a single use, and that statement is
129 in the same loop or basic block, return it. */
130
131static gimple *
132vect_single_imm_use (gimple *def_stmt)
133{
134 tree lhs = gimple_assign_lhs (def_stmt);
135 use_operand_p use_p;
136 gimple *use_stmt;
137
138 if (!single_imm_use (lhs, &use_p, &use_stmt))
139 return NULL;
140
141 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
142 return NULL;
143
144 return use_stmt;
145}
146
147/* Check whether NAME, an ssa-name used in USE_STMT,
148 is a result of a type promotion, such that:
149 DEF_STMT: NAME = NOP (name0)
150 If CHECK_SIGN is TRUE, check that either both types are signed or both are
151 unsigned. */
152
153static bool
154type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
155 tree *orig_type, gimple **def_stmt, bool *promotion)
156{
157 gimple *dummy_gimple;
158 stmt_vec_info stmt_vinfo;
159 tree type = TREE_TYPE (name);
160 tree oprnd0;
161 enum vect_def_type dt;
162
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
165 return false;
166
167 if (dt != vect_internal_def
168 && dt != vect_external_def && dt != vect_constant_def)
169 return false;
170
171 if (!*def_stmt)
172 return false;
173
174 if (dt == vect_internal_def)
175 {
176 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
177 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
178 return false;
179 }
180
181 if (!is_gimple_assign (*def_stmt))
182 return false;
183
184 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
185 return false;
186
187 oprnd0 = gimple_assign_rhs1 (*def_stmt);
188
189 *orig_type = TREE_TYPE (oprnd0);
190 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
191 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
192 return false;
193
194 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
195 *promotion = true;
196 else
197 *promotion = false;
198
199 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
200 return false;
201
202 return true;
203}
204
205/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
206 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207
208static tree
209vect_recog_temp_ssa_var (tree type, gimple *stmt)
210{
211 return make_temp_ssa_name (type, stmt, "patt");
212}
213
214/* Function vect_recog_dot_prod_pattern
215
216 Try to find the following pattern:
217
218 type x_t, y_t;
219 TYPE1 prod;
220 TYPE2 sum = init;
221 loop:
222 sum_0 = phi <init, sum_1>
223 S1 x_t = ...
224 S2 y_t = ...
225 S3 x_T = (TYPE1) x_t;
226 S4 y_T = (TYPE1) y_t;
227 S5 prod = x_T * y_T;
228 [S6 prod = (TYPE2) prod; #optional]
229 S7 sum_1 = prod + sum_0;
230
231 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
232 same size of 'TYPE1' or bigger. This is a special case of a reduction
233 computation.
234
235 Input:
236
237 * STMTS: Contains a stmt from which the pattern search begins. In the
238 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
239 will be detected.
240
241 Output:
242
243 * TYPE_IN: The type of the input arguments to the pattern.
244
245 * TYPE_OUT: The type of the output of this pattern.
246
247 * Return value: A new stmt that will be used to replace the sequence of
248 stmts that constitute the pattern. In this case it will be:
249 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
250
251 Note: The dot-prod idiom is a widening reduction pattern that is
252 vectorized without preserving all the intermediate results. It
253 produces only N/2 (widened) results (by summing up pairs of
254 intermediate results) rather than all N results. Therefore, we
255 cannot allow this pattern when we want to get all the results and in
256 the correct order (as is the case when this computation is in an
257 inner-loop nested in an outer-loop that us being vectorized). */
258
259static gimple *
260vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
261 tree *type_out)
262{
263 gimple *stmt, *last_stmt = (*stmts)[0];
264 tree oprnd0, oprnd1;
265 tree oprnd00, oprnd01;
266 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
267 tree type, half_type;
268 gimple *pattern_stmt;
269 tree prod_type;
270 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
271 struct loop *loop;
272 tree var;
273 bool promotion;
274
275 if (!loop_info)
276 return NULL;
277
278 loop = LOOP_VINFO_LOOP (loop_info);
279
280 /* We don't allow changing the order of the computation in the inner-loop
281 when doing outer-loop vectorization. */
282 if (loop && nested_in_vect_loop_p (loop, last_stmt))
283 return NULL;
284
285 if (!is_gimple_assign (last_stmt))
286 return NULL;
287
288 type = gimple_expr_type (last_stmt);
289
290 /* Look for the following pattern
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
293 DPROD = DX * DY;
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
296 In which
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
302
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
306 or
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
309 */
310
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
313
314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315 return NULL;
316
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
318 {
319 /* Has been detected as widening-summation? */
320
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 half_type = TREE_TYPE (oprnd0);
328 }
329 else
330 {
331 gimple *def_stmt;
332
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
334 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
335 return NULL;
336 oprnd0 = gimple_assign_rhs1 (last_stmt);
337 oprnd1 = gimple_assign_rhs2 (last_stmt);
338 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
339 || !types_compatible_p (TREE_TYPE (oprnd1), type))
340 return NULL;
341 stmt = last_stmt;
342
343 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
344 &promotion)
345 && promotion)
346 {
347 stmt = def_stmt;
348 oprnd0 = gimple_assign_rhs1 (stmt);
349 }
350 else
351 half_type = type;
352 }
353
354 /* So far so good. Since last_stmt was detected as a (summation) reduction,
355 we know that oprnd1 is the reduction variable (defined by a loop-header
356 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
357 Left to check that oprnd0 is defined by a (widen_)mult_expr */
358 if (TREE_CODE (oprnd0) != SSA_NAME)
359 return NULL;
360
361 prod_type = half_type;
362 stmt = SSA_NAME_DEF_STMT (oprnd0);
363
364 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
365 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
366 return NULL;
367
368 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
369 inside the loop (in case we are analyzing an outer-loop). */
370 if (!is_gimple_assign (stmt))
371 return NULL;
372 stmt_vinfo = vinfo_for_stmt (stmt);
373 gcc_assert (stmt_vinfo);
374 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
375 return NULL;
376 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
377 return NULL;
378 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
379 {
380 /* Has been detected as a widening multiplication? */
381
382 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
383 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
384 return NULL;
385 stmt_vinfo = vinfo_for_stmt (stmt);
386 gcc_assert (stmt_vinfo);
387 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
388 oprnd00 = gimple_assign_rhs1 (stmt);
389 oprnd01 = gimple_assign_rhs2 (stmt);
390 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
391 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
392 }
393 else
394 {
395 tree half_type0, half_type1;
396 gimple *def_stmt;
397 tree oprnd0, oprnd1;
398
399 oprnd0 = gimple_assign_rhs1 (stmt);
400 oprnd1 = gimple_assign_rhs2 (stmt);
401 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
402 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
403 return NULL;
404 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
405 &promotion)
406 || !promotion)
407 return NULL;
408 oprnd00 = gimple_assign_rhs1 (def_stmt);
409 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
410 &promotion)
411 || !promotion)
412 return NULL;
413 oprnd01 = gimple_assign_rhs1 (def_stmt);
414 if (!types_compatible_p (half_type0, half_type1))
415 return NULL;
416 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
417 return NULL;
418 }
419
420 half_type = TREE_TYPE (oprnd00);
421 *type_in = half_type;
422 *type_out = type;
423
424 /* Pattern detected. Create a stmt to be used to replace the pattern: */
425 var = vect_recog_temp_ssa_var (type, NULL);
426 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
427 oprnd00, oprnd01, oprnd1);
428
429 if (dump_enabled_p ())
430 {
431 dump_printf_loc (MSG_NOTE, vect_location,
432 "vect_recog_dot_prod_pattern: detected: ");
433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
434 }
435
436 return pattern_stmt;
437}
438
439
440/* Function vect_recog_sad_pattern
441
442 Try to find the following Sum of Absolute Difference (SAD) pattern:
443
444 type x_t, y_t;
445 signed TYPE1 diff, abs_diff;
446 TYPE2 sum = init;
447 loop:
448 sum_0 = phi <init, sum_1>
449 S1 x_t = ...
450 S2 y_t = ...
451 S3 x_T = (TYPE1) x_t;
452 S4 y_T = (TYPE1) y_t;
453 S5 diff = x_T - y_T;
454 S6 abs_diff = ABS_EXPR <diff>;
455 [S7 abs_diff = (TYPE2) abs_diff; #optional]
456 S8 sum_1 = abs_diff + sum_0;
457
458 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
459 same size of 'TYPE1' or bigger. This is a special case of a reduction
460 computation.
461
462 Input:
463
464 * STMTS: Contains a stmt from which the pattern search begins. In the
465 example, when this function is called with S8, the pattern
466 {S3,S4,S5,S6,S7,S8} will be detected.
467
468 Output:
469
470 * TYPE_IN: The type of the input arguments to the pattern.
471
472 * TYPE_OUT: The type of the output of this pattern.
473
474 * Return value: A new stmt that will be used to replace the sequence of
475 stmts that constitute the pattern. In this case it will be:
476 SAD_EXPR <x_t, y_t, sum_0>
477 */
478
479static gimple *
480vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
481 tree *type_out)
482{
483 gimple *last_stmt = (*stmts)[0];
484 tree sad_oprnd0, sad_oprnd1;
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
486 tree half_type;
487 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
488 struct loop *loop;
489 bool promotion;
490
491 if (!loop_info)
492 return NULL;
493
494 loop = LOOP_VINFO_LOOP (loop_info);
495
496 /* We don't allow changing the order of the computation in the inner-loop
497 when doing outer-loop vectorization. */
498 if (loop && nested_in_vect_loop_p (loop, last_stmt))
499 return NULL;
500
501 if (!is_gimple_assign (last_stmt))
502 return NULL;
503
504 tree sum_type = gimple_expr_type (last_stmt);
505
506 /* Look for the following pattern
507 DX = (TYPE1) X;
508 DY = (TYPE1) Y;
509 DDIFF = DX - DY;
510 DAD = ABS_EXPR <DDIFF>;
511 DDPROD = (TYPE2) DPROD;
512 sum_1 = DAD + sum_0;
513 In which
514 - DX is at least double the size of X
515 - DY is at least double the size of Y
516 - DX, DY, DDIFF, DAD all have the same type
517 - sum is the same size of DAD or bigger
518 - sum has been recognized as a reduction variable.
519
520 This is equivalent to:
521 DDIFF = X w- Y; #widen sub
522 DAD = ABS_EXPR <DDIFF>;
523 sum_1 = DAD w+ sum_0; #widen summation
524 or
525 DDIFF = X w- Y; #widen sub
526 DAD = ABS_EXPR <DDIFF>;
527 sum_1 = DAD + sum_0; #summation
528 */
529
530 /* Starting from LAST_STMT, follow the defs of its uses in search
531 of the above pattern. */
532
533 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
534 return NULL;
535
536 tree plus_oprnd0, plus_oprnd1;
537
538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
539 {
540 /* Has been detected as widening-summation? */
541
542 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
543 sum_type = gimple_expr_type (stmt);
544 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
545 return NULL;
546 plus_oprnd0 = gimple_assign_rhs1 (stmt);
547 plus_oprnd1 = gimple_assign_rhs2 (stmt);
548 half_type = TREE_TYPE (plus_oprnd0);
549 }
550 else
551 {
552 gimple *def_stmt;
553
554 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
555 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
556 return NULL;
557 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
558 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
559 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
560 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
561 return NULL;
562
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
565 if (type_conversion_p (plus_oprnd0, last_stmt, false,
566 &half_type, &def_stmt, &promotion))
567 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
568 else
569 half_type = sum_type;
570 }
571
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
576
577 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
578 return NULL;
579
580 tree abs_type = half_type;
581 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
582
583 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
584 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
585 return NULL;
586
587 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
588 inside the loop (in case we are analyzing an outer-loop). */
589 if (!is_gimple_assign (abs_stmt))
590 return NULL;
591
592 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
593 gcc_assert (abs_stmt_vinfo);
594 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
595 return NULL;
596 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
597 return NULL;
598
599 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
600 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
601 return NULL;
602 if (TYPE_UNSIGNED (abs_type))
603 return NULL;
604
605 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
606
607 if (TREE_CODE (abs_oprnd) != SSA_NAME)
608 return NULL;
609
610 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
611
612 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
613 if (!gimple_bb (diff_stmt)
614 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
615 return NULL;
616
617 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
618 inside the loop (in case we are analyzing an outer-loop). */
619 if (!is_gimple_assign (diff_stmt))
620 return NULL;
621
622 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
623 gcc_assert (diff_stmt_vinfo);
624 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
625 return NULL;
626 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
627 return NULL;
628
629 tree half_type0, half_type1;
630 gimple *def_stmt;
631
632 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
633 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
634
635 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
636 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
637 return NULL;
638 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
639 &half_type0, &def_stmt, &promotion)
640 || !promotion)
641 return NULL;
642 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
643
644 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
645 &half_type1, &def_stmt, &promotion)
646 || !promotion)
647 return NULL;
648 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
649
650 if (!types_compatible_p (half_type0, half_type1))
651 return NULL;
652 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
653 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
654 return NULL;
655
656 *type_in = TREE_TYPE (sad_oprnd0);
657 *type_out = sum_type;
658
659 /* Pattern detected. Create a stmt to be used to replace the pattern: */
660 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
661 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
662 sad_oprnd1, plus_oprnd1);
663
664 if (dump_enabled_p ())
665 {
666 dump_printf_loc (MSG_NOTE, vect_location,
667 "vect_recog_sad_pattern: detected: ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
669 }
670
671 return pattern_stmt;
672}
673
674
675/* Handle widening operation by a constant. At the moment we support MULT_EXPR
676 and LSHIFT_EXPR.
677
678 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
679 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
680
681 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
682 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
683 that satisfies the above restrictions, we can perform a widening opeartion
684 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
685 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
686
687static bool
688vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
689 tree const_oprnd, tree *oprnd,
690 gimple **wstmt, tree type,
691 tree *half_type, gimple *def_stmt)
692{
693 tree new_type, new_oprnd;
694
695 if (code != MULT_EXPR && code != LSHIFT_EXPR)
696 return false;
697
698 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
699 || (code == LSHIFT_EXPR
700 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
701 != 1))
702 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
703 {
704 /* CONST_OPRND is a constant of HALF_TYPE. */
705 *oprnd = gimple_assign_rhs1 (def_stmt);
706 return true;
707 }
708
709 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
710 return false;
711
712 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
713 return false;
714
715 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
716 a type 2 times bigger than HALF_TYPE. */
717 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
718 TYPE_UNSIGNED (type));
719 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
722 return false;
723
724 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 new_oprnd = make_ssa_name (new_type);
727 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
728 *oprnd = new_oprnd;
729
730 *half_type = new_type;
731 return true;
732}
733
734
735/* Function vect_recog_widen_mult_pattern
736
737 Try to find the following pattern:
738
739 type1 a_t;
740 type2 b_t;
741 TYPE a_T, b_T, prod_T;
742
743 S1 a_t = ;
744 S2 b_t = ;
745 S3 a_T = (TYPE) a_t;
746 S4 b_T = (TYPE) b_t;
747 S5 prod_T = a_T * b_T;
748
749 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
750
751 Also detect unsigned cases:
752
753 unsigned type1 a_t;
754 unsigned type2 b_t;
755 unsigned TYPE u_prod_T;
756 TYPE a_T, b_T, prod_T;
757
758 S1 a_t = ;
759 S2 b_t = ;
760 S3 a_T = (TYPE) a_t;
761 S4 b_T = (TYPE) b_t;
762 S5 prod_T = a_T * b_T;
763 S6 u_prod_T = (unsigned TYPE) prod_T;
764
765 and multiplication by constants:
766
767 type a_t;
768 TYPE a_T, prod_T;
769
770 S1 a_t = ;
771 S3 a_T = (TYPE) a_t;
772 S5 prod_T = a_T * CONST;
773
774 A special case of multiplication by constants is when 'TYPE' is 4 times
775 bigger than 'type', but CONST fits an intermediate type 2 times smaller
776 than 'TYPE'. In that case we create an additional pattern stmt for S3
777 to create a variable of the intermediate type, and perform widen-mult
778 on the intermediate type as well:
779
780 type a_t;
781 interm_type a_it;
782 TYPE a_T, prod_T, prod_T';
783
784 S1 a_t = ;
785 S3 a_T = (TYPE) a_t;
786 '--> a_it = (interm_type) a_t;
787 S5 prod_T = a_T * CONST;
788 '--> prod_T' = a_it w* CONST;
789
790 Input/Output:
791
792 * STMTS: Contains a stmt from which the pattern search begins. In the
793 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
794 is detected. In case of unsigned widen-mult, the original stmt (S5) is
795 replaced with S6 in STMTS. In case of multiplication by a constant
796 of an intermediate type (the last case above), STMTS also contains S3
797 (inserted before S5).
798
799 Output:
800
801 * TYPE_IN: The type of the input arguments to the pattern.
802
803 * TYPE_OUT: The type of the output of this pattern.
804
805 * Return value: A new stmt that will be used to replace the sequence of
806 stmts that constitute the pattern. In this case it will be:
807 WIDEN_MULT <a_t, b_t>
808 If the result of WIDEN_MULT needs to be converted to a larger type, the
809 returned stmt will be this type conversion stmt.
810*/
811
812static gimple *
813vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
814 tree *type_in, tree *type_out)
815{
816 gimple *last_stmt = stmts->pop ();
817 gimple *def_stmt0, *def_stmt1;
818 tree oprnd0, oprnd1;
819 tree type, half_type0, half_type1;
820 gimple *new_stmt = NULL, *pattern_stmt = NULL;
821 tree vectype, vecitype;
822 tree var;
823 enum tree_code dummy_code;
824 int dummy_int;
825 vec<tree> dummy_vec;
826 bool op1_ok;
827 bool promotion;
828
829 if (!is_gimple_assign (last_stmt))
830 return NULL;
831
832 type = gimple_expr_type (last_stmt);
833
834 /* Starting from LAST_STMT, follow the defs of its uses in search
835 of the above pattern. */
836
837 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
838 return NULL;
839
840 oprnd0 = gimple_assign_rhs1 (last_stmt);
841 oprnd1 = gimple_assign_rhs2 (last_stmt);
842 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
843 || !types_compatible_p (TREE_TYPE (oprnd1), type))
844 return NULL;
845
846 /* Check argument 0. */
847 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
848 &promotion)
849 || !promotion)
850 return NULL;
851 /* Check argument 1. */
852 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
853 &def_stmt1, &promotion);
854
855 if (op1_ok && promotion)
856 {
857 oprnd0 = gimple_assign_rhs1 (def_stmt0);
858 oprnd1 = gimple_assign_rhs1 (def_stmt1);
859 }
860 else
861 {
862 if (TREE_CODE (oprnd1) == INTEGER_CST
863 && TREE_CODE (half_type0) == INTEGER_TYPE
864 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
865 &oprnd0, &new_stmt, type,
866 &half_type0, def_stmt0))
867 {
868 half_type1 = half_type0;
869 oprnd1 = fold_convert (half_type1, oprnd1);
870 }
871 else
872 return NULL;
873 }
874
875 /* If the two arguments have different sizes, convert the one with
876 the smaller type into the larger type. */
877 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
878 {
879 /* If we already used up the single-stmt slot give up. */
880 if (new_stmt)
881 return NULL;
882
883 tree* oprnd = NULL;
884 gimple *def_stmt = NULL;
885
886 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
887 {
888 def_stmt = def_stmt0;
889 half_type0 = half_type1;
890 oprnd = &oprnd0;
891 }
892 else
893 {
894 def_stmt = def_stmt1;
895 half_type1 = half_type0;
896 oprnd = &oprnd1;
897 }
898
899 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
900 tree new_oprnd = make_ssa_name (half_type0);
901 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
902 *oprnd = new_oprnd;
903 }
904
905 /* Handle unsigned case. Look for
906 S6 u_prod_T = (unsigned TYPE) prod_T;
907 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
908 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
909 {
910 gimple *use_stmt;
911 tree use_lhs;
912 tree use_type;
913
914 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
915 return NULL;
916
917 use_stmt = vect_single_imm_use (last_stmt);
918 if (!use_stmt || !is_gimple_assign (use_stmt)
919 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
920 return NULL;
921
922 use_lhs = gimple_assign_lhs (use_stmt);
923 use_type = TREE_TYPE (use_lhs);
924 if (!INTEGRAL_TYPE_P (use_type)
925 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
926 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
927 return NULL;
928
929 type = use_type;
930 last_stmt = use_stmt;
931 }
932
933 if (!types_compatible_p (half_type0, half_type1))
934 return NULL;
935
936 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
937 to get an intermediate result of type ITYPE. In this case we need
938 to build a statement to convert this intermediate result to type TYPE. */
939 tree itype = type;
940 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
941 itype = build_nonstandard_integer_type
942 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
943 TYPE_UNSIGNED (type));
944
945 /* Pattern detected. */
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_recog_widen_mult_pattern: detected:\n");
949
950 /* Check target support */
951 vectype = get_vectype_for_scalar_type (half_type0);
952 vecitype = get_vectype_for_scalar_type (itype);
953 if (!vectype
954 || !vecitype
955 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
956 vecitype, vectype,
957 &dummy_code, &dummy_code,
958 &dummy_int, &dummy_vec))
959 return NULL;
960
961 *type_in = vectype;
962 *type_out = get_vectype_for_scalar_type (type);
963
964 /* Pattern supported. Create a stmt to be used to replace the pattern: */
965 var = vect_recog_temp_ssa_var (itype, NULL);
966 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
967
968 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
969 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
970
971 /* If the original two operands have different sizes, we may need to convert
972 the smaller one into the larget type. If this is the case, at this point
973 the new stmt is already built. */
974 if (new_stmt)
975 {
976 append_pattern_def_seq (stmt_vinfo, new_stmt);
977 stmt_vec_info new_stmt_info
978 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
979 set_vinfo_for_stmt (new_stmt, new_stmt_info);
980 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
981 }
982
983 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
984 the result of the widen-mult operation into type TYPE. */
985 if (itype != type)
986 {
987 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
988 stmt_vec_info pattern_stmt_info
989 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
990 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
991 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
992 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
993 NOP_EXPR,
994 gimple_assign_lhs (pattern_stmt));
995 }
996
997 if (dump_enabled_p ())
998 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
999
1000 stmts->safe_push (last_stmt);
1001 return pattern_stmt;
1002}
1003
1004
1005/* Function vect_recog_pow_pattern
1006
1007 Try to find the following pattern:
1008
1009 x = POW (y, N);
1010
1011 with POW being one of pow, powf, powi, powif and N being
1012 either 2 or 0.5.
1013
1014 Input:
1015
1016 * LAST_STMT: A stmt from which the pattern search begins.
1017
1018 Output:
1019
1020 * TYPE_IN: The type of the input arguments to the pattern.
1021
1022 * TYPE_OUT: The type of the output of this pattern.
1023
1024 * Return value: A new stmt that will be used to replace the sequence of
1025 stmts that constitute the pattern. In this case it will be:
1026 x = x * x
1027 or
1028 x = sqrt (x)
1029*/
1030
1031static gimple *
1032vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1033 tree *type_out)
1034{
1035 gimple *last_stmt = (*stmts)[0];
1036 tree base, exp = NULL;
1037 gimple *stmt;
1038 tree var;
1039
1040 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1041 return NULL;
1042
1043 switch (gimple_call_combined_fn (last_stmt))
1044 {
1045 CASE_CFN_POW:
1046 CASE_CFN_POWI:
1047 base = gimple_call_arg (last_stmt, 0);
1048 exp = gimple_call_arg (last_stmt, 1);
1049 if (TREE_CODE (exp) != REAL_CST
1050 && TREE_CODE (exp) != INTEGER_CST)
1051 return NULL;
1052 break;
1053
1054 default:
1055 return NULL;
1056 }
1057
1058 /* We now have a pow or powi builtin function call with a constant
1059 exponent. */
1060
1061 *type_out = NULL_TREE;
1062
1063 /* Catch squaring. */
1064 if ((tree_fits_shwi_p (exp)
1065 && tree_to_shwi (exp) == 2)
1066 || (TREE_CODE (exp) == REAL_CST
1067 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1068 {
1069 *type_in = TREE_TYPE (base);
1070
1071 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1072 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1073 return stmt;
1074 }
1075
1076 /* Catch square root. */
1077 if (TREE_CODE (exp) == REAL_CST
1078 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1079 {
1080 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1081 if (*type_in
1082 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1083 OPTIMIZE_FOR_SPEED))
1084 {
1085 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1086 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1087 gimple_call_set_lhs (stmt, var);
1088 gimple_call_set_nothrow (stmt, true);
1089 return stmt;
1090 }
1091 }
1092
1093 return NULL;
1094}
1095
1096
1097/* Function vect_recog_widen_sum_pattern
1098
1099 Try to find the following pattern:
1100
1101 type x_t;
1102 TYPE x_T, sum = init;
1103 loop:
1104 sum_0 = phi <init, sum_1>
1105 S1 x_t = *p;
1106 S2 x_T = (TYPE) x_t;
1107 S3 sum_1 = x_T + sum_0;
1108
1109 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1110 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1111 a special case of a reduction computation.
1112
1113 Input:
1114
1115 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1116 when this function is called with S3, the pattern {S2,S3} will be detected.
1117
1118 Output:
1119
1120 * TYPE_IN: The type of the input arguments to the pattern.
1121
1122 * TYPE_OUT: The type of the output of this pattern.
1123
1124 * Return value: A new stmt that will be used to replace the sequence of
1125 stmts that constitute the pattern. In this case it will be:
1126 WIDEN_SUM <x_t, sum_0>
1127
1128 Note: The widening-sum idiom is a widening reduction pattern that is
1129 vectorized without preserving all the intermediate results. It
1130 produces only N/2 (widened) results (by summing up pairs of
1131 intermediate results) rather than all N results. Therefore, we
1132 cannot allow this pattern when we want to get all the results and in
1133 the correct order (as is the case when this computation is in an
1134 inner-loop nested in an outer-loop that us being vectorized). */
1135
1136static gimple *
1137vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1138 tree *type_out)
1139{
1140 gimple *stmt, *last_stmt = (*stmts)[0];
1141 tree oprnd0, oprnd1;
1142 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1143 tree type, half_type;
1144 gimple *pattern_stmt;
1145 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1146 struct loop *loop;
1147 tree var;
1148 bool promotion;
1149
1150 if (!loop_info)
1151 return NULL;
1152
1153 loop = LOOP_VINFO_LOOP (loop_info);
1154
1155 /* We don't allow changing the order of the computation in the inner-loop
1156 when doing outer-loop vectorization. */
1157 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1158 return NULL;
1159
1160 if (!is_gimple_assign (last_stmt))
1161 return NULL;
1162
1163 type = gimple_expr_type (last_stmt);
1164
1165 /* Look for the following pattern
1166 DX = (TYPE) X;
1167 sum_1 = DX + sum_0;
1168 In which DX is at least double the size of X, and sum_1 has been
1169 recognized as a reduction variable.
1170 */
1171
1172 /* Starting from LAST_STMT, follow the defs of its uses in search
1173 of the above pattern. */
1174
1175 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1176 return NULL;
1177
1178 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def
1179 && ! STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_vinfo))
1180 return NULL;
1181
1182 oprnd0 = gimple_assign_rhs1 (last_stmt);
1183 oprnd1 = gimple_assign_rhs2 (last_stmt);
1184 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1185 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1186 return NULL;
1187
1188 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1189 we know that oprnd1 is the reduction variable (defined by a loop-header
1190 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1191 Left to check that oprnd0 is defined by a cast from type 'type' to type
1192 'TYPE'. */
1193
1194 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1195 &promotion)
1196 || !promotion)
1197 return NULL;
1198
1199 oprnd0 = gimple_assign_rhs1 (stmt);
1200 *type_in = half_type;
1201 *type_out = type;
1202
1203 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1204 var = vect_recog_temp_ssa_var (type, NULL);
1205 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1206
1207 if (dump_enabled_p ())
1208 {
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_recog_widen_sum_pattern: detected: ");
1211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1212 }
1213
1214 return pattern_stmt;
1215}
1216
1217
1218/* Return TRUE if the operation in STMT can be performed on a smaller type.
1219
1220 Input:
1221 STMT - a statement to check.
1222 DEF - we support operations with two operands, one of which is constant.
1223 The other operand can be defined by a demotion operation, or by a
1224 previous statement in a sequence of over-promoted operations. In the
1225 later case DEF is used to replace that operand. (It is defined by a
1226 pattern statement we created for the previous statement in the
1227 sequence).
1228
1229 Input/output:
1230 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1231 NULL, it's the type of DEF.
1232 STMTS - additional pattern statements. If a pattern statement (type
1233 conversion) is created in this function, its original statement is
1234 added to STMTS.
1235
1236 Output:
1237 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1238 operands to use in the new pattern statement for STMT (will be created
1239 in vect_recog_over_widening_pattern ()).
1240 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1241 statements for STMT: the first one is a type promotion and the second
1242 one is the operation itself. We return the type promotion statement
1243 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1244 the second pattern statement. */
1245
1246static bool
1247vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1248 tree *op0, tree *op1, gimple **new_def_stmt,
1249 vec<gimple *> *stmts)
1250{
1251 enum tree_code code;
1252 tree const_oprnd, oprnd;
1253 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1254 gimple *def_stmt, *new_stmt;
1255 bool first = false;
1256 bool promotion;
1257
1258 *op0 = NULL_TREE;
1259 *op1 = NULL_TREE;
1260 *new_def_stmt = NULL;
1261
1262 if (!is_gimple_assign (stmt))
1263 return false;
1264
1265 code = gimple_assign_rhs_code (stmt);
1266 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1267 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1268 return false;
1269
1270 oprnd = gimple_assign_rhs1 (stmt);
1271 const_oprnd = gimple_assign_rhs2 (stmt);
1272 type = gimple_expr_type (stmt);
1273
1274 if (TREE_CODE (oprnd) != SSA_NAME
1275 || TREE_CODE (const_oprnd) != INTEGER_CST)
1276 return false;
1277
1278 /* If oprnd has other uses besides that in stmt we cannot mark it
1279 as being part of a pattern only. */
1280 if (!has_single_use (oprnd))
1281 return false;
1282
1283 /* If we are in the middle of a sequence, we use DEF from a previous
1284 statement. Otherwise, OPRND has to be a result of type promotion. */
1285 if (*new_type)
1286 {
1287 half_type = *new_type;
1288 oprnd = def;
1289 }
1290 else
1291 {
1292 first = true;
1293 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1294 &promotion)
1295 || !promotion
1296 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1297 return false;
1298 }
1299
1300 /* Can we perform the operation on a smaller type? */
1301 switch (code)
1302 {
1303 case BIT_IOR_EXPR:
1304 case BIT_XOR_EXPR:
1305 case BIT_AND_EXPR:
1306 if (!int_fits_type_p (const_oprnd, half_type))
1307 {
1308 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1309 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1310 return false;
1311
1312 interm_type = build_nonstandard_integer_type (
1313 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1314 if (!int_fits_type_p (const_oprnd, interm_type))
1315 return false;
1316 }
1317
1318 break;
1319
1320 case LSHIFT_EXPR:
1321 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1322 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1323 return false;
1324
1325 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1326 (e.g., if the original value was char, the shift amount is at most 8
1327 if we want to use short). */
1328 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1329 return false;
1330
1331 interm_type = build_nonstandard_integer_type (
1332 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1333
1334 if (!vect_supportable_shift (code, interm_type))
1335 return false;
1336
1337 break;
1338
1339 case RSHIFT_EXPR:
1340 if (vect_supportable_shift (code, half_type))
1341 break;
1342
1343 /* Try intermediate type - HALF_TYPE is not supported. */
1344 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1345 return false;
1346
1347 interm_type = build_nonstandard_integer_type (
1348 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1349
1350 if (!vect_supportable_shift (code, interm_type))
1351 return false;
1352
1353 break;
1354
1355 default:
1356 gcc_unreachable ();
1357 }
1358
1359 /* There are four possible cases:
1360 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1361 the first statement in the sequence)
1362 a. The original, HALF_TYPE, is not enough - we replace the promotion
1363 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1364 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1365 promotion.
1366 2. OPRND is defined by a pattern statement we created.
1367 a. Its type is not sufficient for the operation, we create a new stmt:
1368 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1369 this statement in NEW_DEF_STMT, and it is later put in
1370 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1371 b. OPRND is good to use in the new statement. */
1372 if (first)
1373 {
1374 if (interm_type)
1375 {
1376 /* Replace the original type conversion HALF_TYPE->TYPE with
1377 HALF_TYPE->INTERM_TYPE. */
1378 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1379 {
1380 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1381 /* Check if the already created pattern stmt is what we need. */
1382 if (!is_gimple_assign (new_stmt)
1383 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1384 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1385 return false;
1386
1387 stmts->safe_push (def_stmt);
1388 oprnd = gimple_assign_lhs (new_stmt);
1389 }
1390 else
1391 {
1392 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1393 oprnd = gimple_assign_rhs1 (def_stmt);
1394 new_oprnd = make_ssa_name (interm_type);
1395 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1396 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1397 stmts->safe_push (def_stmt);
1398 oprnd = new_oprnd;
1399 }
1400 }
1401 else
1402 {
1403 /* Retrieve the operand before the type promotion. */
1404 oprnd = gimple_assign_rhs1 (def_stmt);
1405 }
1406 }
1407 else
1408 {
1409 if (interm_type)
1410 {
1411 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1412 new_oprnd = make_ssa_name (interm_type);
1413 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1414 oprnd = new_oprnd;
1415 *new_def_stmt = new_stmt;
1416 }
1417
1418 /* Otherwise, OPRND is already set. */
1419 }
1420
1421 if (interm_type)
1422 *new_type = interm_type;
1423 else
1424 *new_type = half_type;
1425
1426 *op0 = oprnd;
1427 *op1 = fold_convert (*new_type, const_oprnd);
1428
1429 return true;
1430}
1431
1432
1433/* Try to find a statement or a sequence of statements that can be performed
1434 on a smaller type:
1435
1436 type x_t;
1437 TYPE x_T, res0_T, res1_T;
1438 loop:
1439 S1 x_t = *p;
1440 S2 x_T = (TYPE) x_t;
1441 S3 res0_T = op (x_T, C0);
1442 S4 res1_T = op (res0_T, C1);
1443 S5 ... = () res1_T; - type demotion
1444
1445 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1446 constants.
1447 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1448 be 'type' or some intermediate type. For now, we expect S5 to be a type
1449 demotion operation. We also check that S3 and S4 have only one use. */
1450
1451static gimple *
1452vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1453 tree *type_in, tree *type_out)
1454{
1455 gimple *stmt = stmts->pop ();
1456 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1457 *use_stmt = NULL;
1458 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1459 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1460 bool first;
1461 tree type = NULL;
1462
1463 first = true;
1464 while (1)
1465 {
1466 if (!vinfo_for_stmt (stmt)
1467 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1468 return NULL;
1469
1470 new_def_stmt = NULL;
1471 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1472 &op0, &op1, &new_def_stmt,
1473 stmts))
1474 {
1475 if (first)
1476 return NULL;
1477 else
1478 break;
1479 }
1480
1481 /* STMT can be performed on a smaller type. Check its uses. */
1482 use_stmt = vect_single_imm_use (stmt);
1483 if (!use_stmt || !is_gimple_assign (use_stmt))
1484 return NULL;
1485
1486 /* Create pattern statement for STMT. */
1487 vectype = get_vectype_for_scalar_type (new_type);
1488 if (!vectype)
1489 return NULL;
1490
1491 /* We want to collect all the statements for which we create pattern
1492 statetments, except for the case when the last statement in the
1493 sequence doesn't have a corresponding pattern statement. In such
1494 case we associate the last pattern statement with the last statement
1495 in the sequence. Therefore, we only add the original statement to
1496 the list if we know that it is not the last. */
1497 if (prev_stmt)
1498 stmts->safe_push (prev_stmt);
1499
1500 var = vect_recog_temp_ssa_var (new_type, NULL);
1501 pattern_stmt
1502 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1503 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1504 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1505
1506 if (dump_enabled_p ())
1507 {
1508 dump_printf_loc (MSG_NOTE, vect_location,
1509 "created pattern stmt: ");
1510 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1511 }
1512
1513 type = gimple_expr_type (stmt);
1514 prev_stmt = stmt;
1515 stmt = use_stmt;
1516
1517 first = false;
1518 }
1519
1520 /* We got a sequence. We expect it to end with a type demotion operation.
1521 Otherwise, we quit (for now). There are three possible cases: the
1522 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1523 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1524 NEW_TYPE differs (we create a new conversion statement). */
1525 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1526 {
1527 use_lhs = gimple_assign_lhs (use_stmt);
1528 use_type = TREE_TYPE (use_lhs);
1529 /* Support only type demotion or signedess change. */
1530 if (!INTEGRAL_TYPE_P (use_type)
1531 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1532 return NULL;
1533
1534 /* Check that NEW_TYPE is not bigger than the conversion result. */
1535 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1536 return NULL;
1537
1538 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1539 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1540 {
1541 /* Create NEW_TYPE->USE_TYPE conversion. */
1542 new_oprnd = make_ssa_name (use_type);
1543 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1544 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1545
1546 *type_in = get_vectype_for_scalar_type (new_type);
1547 *type_out = get_vectype_for_scalar_type (use_type);
1548
1549 /* We created a pattern statement for the last statement in the
1550 sequence, so we don't need to associate it with the pattern
1551 statement created for PREV_STMT. Therefore, we add PREV_STMT
1552 to the list in order to mark it later in vect_pattern_recog_1. */
1553 if (prev_stmt)
1554 stmts->safe_push (prev_stmt);
1555 }
1556 else
1557 {
1558 if (prev_stmt)
1559 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1560 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1561
1562 *type_in = vectype;
1563 *type_out = NULL_TREE;
1564 }
1565
1566 stmts->safe_push (use_stmt);
1567 }
1568 else
1569 /* TODO: support general case, create a conversion to the correct type. */
1570 return NULL;
1571
1572 /* Pattern detected. */
1573 if (dump_enabled_p ())
1574 {
1575 dump_printf_loc (MSG_NOTE, vect_location,
1576 "vect_recog_over_widening_pattern: detected: ");
1577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1578 }
1579
1580 return pattern_stmt;
1581}
1582
1583/* Detect widening shift pattern:
1584
1585 type a_t;
1586 TYPE a_T, res_T;
1587
1588 S1 a_t = ;
1589 S2 a_T = (TYPE) a_t;
1590 S3 res_T = a_T << CONST;
1591
1592 where type 'TYPE' is at least double the size of type 'type'.
1593
1594 Also detect cases where the shift result is immediately converted
1595 to another type 'result_type' that is no larger in size than 'TYPE'.
1596 In those cases we perform a widen-shift that directly results in
1597 'result_type', to avoid a possible over-widening situation:
1598
1599 type a_t;
1600 TYPE a_T, res_T;
1601 result_type res_result;
1602
1603 S1 a_t = ;
1604 S2 a_T = (TYPE) a_t;
1605 S3 res_T = a_T << CONST;
1606 S4 res_result = (result_type) res_T;
1607 '--> res_result' = a_t w<< CONST;
1608
1609 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1610 create an additional pattern stmt for S2 to create a variable of an
1611 intermediate type, and perform widen-shift on the intermediate type:
1612
1613 type a_t;
1614 interm_type a_it;
1615 TYPE a_T, res_T, res_T';
1616
1617 S1 a_t = ;
1618 S2 a_T = (TYPE) a_t;
1619 '--> a_it = (interm_type) a_t;
1620 S3 res_T = a_T << CONST;
1621 '--> res_T' = a_it <<* CONST;
1622
1623 Input/Output:
1624
1625 * STMTS: Contains a stmt from which the pattern search begins.
1626 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1627 in STMTS. When an intermediate type is used and a pattern statement is
1628 created for S2, we also put S2 here (before S3).
1629
1630 Output:
1631
1632 * TYPE_IN: The type of the input arguments to the pattern.
1633
1634 * TYPE_OUT: The type of the output of this pattern.
1635
1636 * Return value: A new stmt that will be used to replace the sequence of
1637 stmts that constitute the pattern. In this case it will be:
1638 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1639
1640static gimple *
1641vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1642 tree *type_in, tree *type_out)
1643{
1644 gimple *last_stmt = stmts->pop ();
1645 gimple *def_stmt0;
1646 tree oprnd0, oprnd1;
1647 tree type, half_type0;
1648 gimple *pattern_stmt;
1649 tree vectype, vectype_out = NULL_TREE;
1650 tree var;
1651 enum tree_code dummy_code;
1652 int dummy_int;
1653 vec<tree> dummy_vec;
1654 gimple *use_stmt;
1655 bool promotion;
1656
1657 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1658 return NULL;
1659
1660 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1661 return NULL;
1662
1663 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1664 return NULL;
1665
1666 oprnd0 = gimple_assign_rhs1 (last_stmt);
1667 oprnd1 = gimple_assign_rhs2 (last_stmt);
1668 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1669 return NULL;
1670
1671 /* Check operand 0: it has to be defined by a type promotion. */
1672 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1673 &promotion)
1674 || !promotion)
1675 return NULL;
1676
1677 /* Check operand 1: has to be positive. We check that it fits the type
1678 in vect_handle_widen_op_by_const (). */
1679 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1680 return NULL;
1681
1682 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1683 type = gimple_expr_type (last_stmt);
1684
1685 /* Check for subsequent conversion to another type. */
1686 use_stmt = vect_single_imm_use (last_stmt);
1687 if (use_stmt && is_gimple_assign (use_stmt)
1688 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1689 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1690 {
1691 tree use_lhs = gimple_assign_lhs (use_stmt);
1692 tree use_type = TREE_TYPE (use_lhs);
1693
1694 if (INTEGRAL_TYPE_P (use_type)
1695 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1696 {
1697 last_stmt = use_stmt;
1698 type = use_type;
1699 }
1700 }
1701
1702 /* Check if this a widening operation. */
1703 gimple *wstmt = NULL;
1704 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1705 &oprnd0, &wstmt,
1706 type, &half_type0, def_stmt0))
1707 return NULL;
1708
1709 /* Pattern detected. */
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE, vect_location,
1712 "vect_recog_widen_shift_pattern: detected:\n");
1713
1714 /* Check target support. */
1715 vectype = get_vectype_for_scalar_type (half_type0);
1716 vectype_out = get_vectype_for_scalar_type (type);
1717
1718 if (!vectype
1719 || !vectype_out
1720 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1721 vectype_out, vectype,
1722 &dummy_code, &dummy_code,
1723 &dummy_int, &dummy_vec))
1724 return NULL;
1725
1726 *type_in = vectype;
1727 *type_out = vectype_out;
1728
1729 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1730 var = vect_recog_temp_ssa_var (type, NULL);
1731 pattern_stmt =
1732 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1733 if (wstmt)
1734 {
1735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1736 new_pattern_def_seq (stmt_vinfo, wstmt);
1737 stmt_vec_info new_stmt_info
1738 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1739 set_vinfo_for_stmt (wstmt, new_stmt_info);
1740 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1741 }
1742
1743 if (dump_enabled_p ())
1744 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1745
1746 stmts->safe_push (last_stmt);
1747 return pattern_stmt;
1748}
1749
1750/* Detect a rotate pattern wouldn't be otherwise vectorized:
1751
1752 type a_t, b_t, c_t;
1753
1754 S0 a_t = b_t r<< c_t;
1755
1756 Input/Output:
1757
1758 * STMTS: Contains a stmt from which the pattern search begins,
1759 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1760 with a sequence:
1761
1762 S1 d_t = -c_t;
1763 S2 e_t = d_t & (B - 1);
1764 S3 f_t = b_t << c_t;
1765 S4 g_t = b_t >> e_t;
1766 S0 a_t = f_t | g_t;
1767
1768 where B is element bitsize of type.
1769
1770 Output:
1771
1772 * TYPE_IN: The type of the input arguments to the pattern.
1773
1774 * TYPE_OUT: The type of the output of this pattern.
1775
1776 * Return value: A new stmt that will be used to replace the rotate
1777 S0 stmt. */
1778
1779static gimple *
1780vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1781{
1782 gimple *last_stmt = stmts->pop ();
1783 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1784 gimple *pattern_stmt, *def_stmt;
1785 enum tree_code rhs_code;
1786 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1787 vec_info *vinfo = stmt_vinfo->vinfo;
1788 enum vect_def_type dt;
1789 optab optab1, optab2;
1790 edge ext_def = NULL;
1791
1792 if (!is_gimple_assign (last_stmt))
1793 return NULL;
1794
1795 rhs_code = gimple_assign_rhs_code (last_stmt);
1796 switch (rhs_code)
1797 {
1798 case LROTATE_EXPR:
1799 case RROTATE_EXPR:
1800 break;
1801 default:
1802 return NULL;
1803 }
1804
1805 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1806 return NULL;
1807
1808 lhs = gimple_assign_lhs (last_stmt);
1809 oprnd0 = gimple_assign_rhs1 (last_stmt);
1810 type = TREE_TYPE (oprnd0);
1811 oprnd1 = gimple_assign_rhs2 (last_stmt);
1812 if (TREE_CODE (oprnd0) != SSA_NAME
1813 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1814 || !INTEGRAL_TYPE_P (type)
1815 || !TYPE_UNSIGNED (type))
1816 return NULL;
1817
1818 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1819 return NULL;
1820
1821 if (dt != vect_internal_def
1822 && dt != vect_constant_def
1823 && dt != vect_external_def)
1824 return NULL;
1825
1826 vectype = get_vectype_for_scalar_type (type);
1827 if (vectype == NULL_TREE)
1828 return NULL;
1829
1830 /* If vector/vector or vector/scalar rotate is supported by the target,
1831 don't do anything here. */
1832 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1833 if (optab1
1834 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1835 return NULL;
1836
1837 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1838 {
1839 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1840 if (optab2
1841 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1842 return NULL;
1843 }
1844
1845 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1846 don't do anything here either. */
1847 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1848 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1849 if (!optab1
1850 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1851 || !optab2
1852 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1853 {
1854 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1855 return NULL;
1856 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1857 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1858 if (!optab1
1859 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1860 || !optab2
1861 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1862 return NULL;
1863 }
1864
1865 *type_in = vectype;
1866 *type_out = vectype;
1867 if (*type_in == NULL_TREE)
1868 return NULL;
1869
1870 if (dt == vect_external_def
1871 && TREE_CODE (oprnd1) == SSA_NAME
1872 && is_a <loop_vec_info> (vinfo))
1873 {
1874 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1875 ext_def = loop_preheader_edge (loop);
1876 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1877 {
1878 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1879 if (bb == NULL
1880 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1881 ext_def = NULL;
1882 }
1883 }
1884
1885 def = NULL_TREE;
1886 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1887 if (TREE_CODE (oprnd1) == INTEGER_CST
1888 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1889 def = oprnd1;
1890 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1891 {
1892 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1893 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1894 && TYPE_PRECISION (TREE_TYPE (rhs1))
1895 == TYPE_PRECISION (type))
1896 def = rhs1;
1897 }
1898
1899 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1900 if (def == NULL_TREE)
1901 {
1902 def = vect_recog_temp_ssa_var (type, NULL);
1903 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1904 if (ext_def)
1905 {
1906 basic_block new_bb
1907 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1908 gcc_assert (!new_bb);
1909 }
1910 else
1911 append_pattern_def_seq (stmt_vinfo, def_stmt);
1912 }
1913 stype = TREE_TYPE (def);
1914 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1915
1916 if (TREE_CODE (def) == INTEGER_CST)
1917 {
1918 if (!tree_fits_uhwi_p (def)
1919 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1920 || integer_zerop (def))
1921 return NULL;
1922 def2 = build_int_cst (stype,
1923 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1924 }
1925 else
1926 {
1927 tree vecstype = get_vectype_for_scalar_type (stype);
1928 stmt_vec_info def_stmt_vinfo;
1929
1930 if (vecstype == NULL_TREE)
1931 return NULL;
1932 def2 = vect_recog_temp_ssa_var (stype, NULL);
1933 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1934 if (ext_def)
1935 {
1936 basic_block new_bb
1937 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1938 gcc_assert (!new_bb);
1939 }
1940 else
1941 {
1942 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1943 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1944 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1945 append_pattern_def_seq (stmt_vinfo, def_stmt);
1946 }
1947
1948 def2 = vect_recog_temp_ssa_var (stype, NULL);
1949 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1950 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1951 gimple_assign_lhs (def_stmt), mask);
1952 if (ext_def)
1953 {
1954 basic_block new_bb
1955 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1956 gcc_assert (!new_bb);
1957 }
1958 else
1959 {
1960 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1961 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1962 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1963 append_pattern_def_seq (stmt_vinfo, def_stmt);
1964 }
1965 }
1966
1967 var1 = vect_recog_temp_ssa_var (type, NULL);
1968 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1969 ? LSHIFT_EXPR : RSHIFT_EXPR,
1970 oprnd0, def);
1971 append_pattern_def_seq (stmt_vinfo, def_stmt);
1972
1973 var2 = vect_recog_temp_ssa_var (type, NULL);
1974 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1975 ? RSHIFT_EXPR : LSHIFT_EXPR,
1976 oprnd0, def2);
1977 append_pattern_def_seq (stmt_vinfo, def_stmt);
1978
1979 /* Pattern detected. */
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_NOTE, vect_location,
1982 "vect_recog_rotate_pattern: detected:\n");
1983
1984 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1985 var = vect_recog_temp_ssa_var (type, NULL);
1986 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1987
1988 if (dump_enabled_p ())
1989 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1990
1991 stmts->safe_push (last_stmt);
1992 return pattern_stmt;
1993}
1994
1995/* Detect a vector by vector shift pattern that wouldn't be otherwise
1996 vectorized:
1997
1998 type a_t;
1999 TYPE b_T, res_T;
2000
2001 S1 a_t = ;
2002 S2 b_T = ;
2003 S3 res_T = b_T op a_t;
2004
2005 where type 'TYPE' is a type with different size than 'type',
2006 and op is <<, >> or rotate.
2007
2008 Also detect cases:
2009
2010 type a_t;
2011 TYPE b_T, c_T, res_T;
2012
2013 S0 c_T = ;
2014 S1 a_t = (type) c_T;
2015 S2 b_T = ;
2016 S3 res_T = b_T op a_t;
2017
2018 Input/Output:
2019
2020 * STMTS: Contains a stmt from which the pattern search begins,
2021 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2022 with a shift/rotate which has same type on both operands, in the
2023 second case just b_T op c_T, in the first case with added cast
2024 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2025
2026 Output:
2027
2028 * TYPE_IN: The type of the input arguments to the pattern.
2029
2030 * TYPE_OUT: The type of the output of this pattern.
2031
2032 * Return value: A new stmt that will be used to replace the shift/rotate
2033 S3 stmt. */
2034
2035static gimple *
2036vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2037 tree *type_in, tree *type_out)
2038{
2039 gimple *last_stmt = stmts->pop ();
2040 tree oprnd0, oprnd1, lhs, var;
2041 gimple *pattern_stmt, *def_stmt;
2042 enum tree_code rhs_code;
2043 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2044 vec_info *vinfo = stmt_vinfo->vinfo;
2045 enum vect_def_type dt;
2046
2047 if (!is_gimple_assign (last_stmt))
2048 return NULL;
2049
2050 rhs_code = gimple_assign_rhs_code (last_stmt);
2051 switch (rhs_code)
2052 {
2053 case LSHIFT_EXPR:
2054 case RSHIFT_EXPR:
2055 case LROTATE_EXPR:
2056 case RROTATE_EXPR:
2057 break;
2058 default:
2059 return NULL;
2060 }
2061
2062 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2063 return NULL;
2064
2065 lhs = gimple_assign_lhs (last_stmt);
2066 oprnd0 = gimple_assign_rhs1 (last_stmt);
2067 oprnd1 = gimple_assign_rhs2 (last_stmt);
2068 if (TREE_CODE (oprnd0) != SSA_NAME
2069 || TREE_CODE (oprnd1) != SSA_NAME
2070 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2071 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2072 || TYPE_PRECISION (TREE_TYPE (lhs))
2073 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2074 return NULL;
2075
2076 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2077 return NULL;
2078
2079 if (dt != vect_internal_def)
2080 return NULL;
2081
2082 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2083 *type_out = *type_in;
2084 if (*type_in == NULL_TREE)
2085 return NULL;
2086
2087 tree def = NULL_TREE;
2088 stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2089 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2090 {
2091 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2092 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2093 && TYPE_PRECISION (TREE_TYPE (rhs1))
2094 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2095 {
2096 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2097 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2098 def = rhs1;
2099 else
2100 {
2101 tree mask
2102 = build_low_bits_mask (TREE_TYPE (rhs1),
2103 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2104 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2105 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2106 new_pattern_def_seq (stmt_vinfo, def_stmt);
2107 }
2108 }
2109 }
2110
2111 if (def == NULL_TREE)
2112 {
2113 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2114 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2115 new_pattern_def_seq (stmt_vinfo, def_stmt);
2116 }
2117
2118 /* Pattern detected. */
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_NOTE, vect_location,
2121 "vect_recog_vector_vector_shift_pattern: detected:\n");
2122
2123 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2124 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2125 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2126
2127 if (dump_enabled_p ())
2128 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2129
2130 stmts->safe_push (last_stmt);
2131 return pattern_stmt;
2132}
2133
2134/* Return true iff the target has a vector optab implementing the operation
2135 CODE on type VECTYPE. */
2136
2137static bool
2138target_has_vecop_for_code (tree_code code, tree vectype)
2139{
2140 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2141 return voptab
2142 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2143}
2144
2145/* Verify that the target has optabs of VECTYPE to perform all the steps
2146 needed by the multiplication-by-immediate synthesis algorithm described by
2147 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2148 present. Return true iff the target supports all the steps. */
2149
2150static bool
2151target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2152 tree vectype, bool synth_shift_p)
2153{
2154 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2155 return false;
2156
2157 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2158 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2159
2160 if (var == negate_variant
2161 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2162 return false;
2163
2164 /* If we must synthesize shifts with additions make sure that vector
2165 addition is available. */
2166 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2167 return false;
2168
2169 for (int i = 1; i < alg->ops; i++)
2170 {
2171 switch (alg->op[i])
2172 {
2173 case alg_shift:
2174 break;
2175 case alg_add_t_m2:
2176 case alg_add_t2_m:
2177 case alg_add_factor:
2178 if (!supports_vplus)
2179 return false;
2180 break;
2181 case alg_sub_t_m2:
2182 case alg_sub_t2_m:
2183 case alg_sub_factor:
2184 if (!supports_vminus)
2185 return false;
2186 break;
2187 case alg_unknown:
2188 case alg_m:
2189 case alg_zero:
2190 case alg_impossible:
2191 return false;
2192 default:
2193 gcc_unreachable ();
2194 }
2195 }
2196
2197 return true;
2198}
2199
2200/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2201 putting the final result in DEST. Append all statements but the last into
2202 VINFO. Return the last statement. */
2203
2204static gimple *
2205synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2206 stmt_vec_info vinfo)
2207{
2208 HOST_WIDE_INT i;
2209 tree itype = TREE_TYPE (op);
2210 tree prev_res = op;
2211 gcc_assert (amnt >= 0);
2212 for (i = 0; i < amnt; i++)
2213 {
2214 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2215 : dest;
2216 gimple *stmt
2217 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2218 prev_res = tmp_var;
2219 if (i < amnt - 1)
2220 append_pattern_def_seq (vinfo, stmt);
2221 else
2222 return stmt;
2223 }
2224 gcc_unreachable ();
2225 return NULL;
2226}
2227
2228/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2229 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2230 the process if necessary. Append the resulting assignment statements
2231 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2232 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2233 left shifts using additions. */
2234
2235static tree
2236apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2237 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2238{
2239 if (integer_zerop (op2)
2240 && (code == LSHIFT_EXPR
2241 || code == PLUS_EXPR))
2242 {
2243 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2244 return op1;
2245 }
2246
2247 gimple *stmt;
2248 tree itype = TREE_TYPE (op1);
2249 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2250
2251 if (code == LSHIFT_EXPR
2252 && synth_shift_p)
2253 {
2254 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2255 stmt_vinfo);
2256 append_pattern_def_seq (stmt_vinfo, stmt);
2257 return tmp_var;
2258 }
2259
2260 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2261 append_pattern_def_seq (stmt_vinfo, stmt);
2262 return tmp_var;
2263}
2264
2265/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2266 and simple arithmetic operations to be vectorized. Record the statements
2267 produced in STMT_VINFO and return the last statement in the sequence or
2268 NULL if it's not possible to synthesize such a multiplication.
2269 This function mirrors the behavior of expand_mult_const in expmed.c but
2270 works on tree-ssa form. */
2271
2272static gimple *
2273vect_synth_mult_by_constant (tree op, tree val,
2274 stmt_vec_info stmt_vinfo)
2275{
2276 tree itype = TREE_TYPE (op);
2277 machine_mode mode = TYPE_MODE (itype);
2278 struct algorithm alg;
2279 mult_variant variant;
2280 if (!tree_fits_shwi_p (val))
2281 return NULL;
2282
2283 /* Multiplication synthesis by shifts, adds and subs can introduce
2284 signed overflow where the original operation didn't. Perform the
2285 operations on an unsigned type and cast back to avoid this.
2286 In the future we may want to relax this for synthesis algorithms
2287 that we can prove do not cause unexpected overflow. */
2288 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2289
2290 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2291
2292 /* Targets that don't support vector shifts but support vector additions
2293 can synthesize shifts that way. */
2294 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2295
2296 HOST_WIDE_INT hwval = tree_to_shwi (val);
2297 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2298 The vectorizer's benefit analysis will decide whether it's beneficial
2299 to do this. */
2300 bool possible = choose_mult_variant (mode, hwval, &alg,
2301 &variant, MAX_COST);
2302 if (!possible)
2303 return NULL;
2304
2305 tree vectype = get_vectype_for_scalar_type (multtype);
2306
2307 if (!vectype
2308 || !target_supports_mult_synth_alg (&alg, variant,
2309 vectype, synth_shift_p))
2310 return NULL;
2311
2312 tree accumulator;
2313
2314 /* Clear out the sequence of statements so we can populate it below. */
2315 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2316 gimple *stmt = NULL;
2317
2318 if (cast_to_unsigned_p)
2319 {
2320 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2321 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2322 append_pattern_def_seq (stmt_vinfo, stmt);
2323 op = tmp_op;
2324 }
2325
2326 if (alg.op[0] == alg_zero)
2327 accumulator = build_int_cst (multtype, 0);
2328 else
2329 accumulator = op;
2330
2331 bool needs_fixup = (variant == negate_variant)
2332 || (variant == add_variant);
2333
2334 for (int i = 1; i < alg.ops; i++)
2335 {
2336 tree shft_log = build_int_cst (multtype, alg.log[i]);
2337 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2338 tree tmp_var = NULL_TREE;
2339
2340 switch (alg.op[i])
2341 {
2342 case alg_shift:
2343 if (synth_shift_p)
2344 stmt
2345 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2346 stmt_vinfo);
2347 else
2348 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2349 shft_log);
2350 break;
2351 case alg_add_t_m2:
2352 tmp_var
2353 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2354 stmt_vinfo, synth_shift_p);
2355 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2356 tmp_var);
2357 break;
2358 case alg_sub_t_m2:
2359 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2360 shft_log, stmt_vinfo,
2361 synth_shift_p);
2362 /* In some algorithms the first step involves zeroing the
2363 accumulator. If subtracting from such an accumulator
2364 just emit the negation directly. */
2365 if (integer_zerop (accumulator))
2366 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2367 else
2368 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2369 tmp_var);
2370 break;
2371 case alg_add_t2_m:
2372 tmp_var
2373 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2374 stmt_vinfo, synth_shift_p);
2375 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2376 break;
2377 case alg_sub_t2_m:
2378 tmp_var
2379 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2380 stmt_vinfo, synth_shift_p);
2381 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2382 break;
2383 case alg_add_factor:
2384 tmp_var
2385 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2386 stmt_vinfo, synth_shift_p);
2387 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2388 tmp_var);
2389 break;
2390 case alg_sub_factor:
2391 tmp_var
2392 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2393 stmt_vinfo, synth_shift_p);
2394 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2395 accumulator);
2396 break;
2397 default:
2398 gcc_unreachable ();
2399 }
2400 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2401 but rather return it directly. */
2402
2403 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2404 append_pattern_def_seq (stmt_vinfo, stmt);
2405 accumulator = accum_tmp;
2406 }
2407 if (variant == negate_variant)
2408 {
2409 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2410 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2411 accumulator = accum_tmp;
2412 if (cast_to_unsigned_p)
2413 append_pattern_def_seq (stmt_vinfo, stmt);
2414 }
2415 else if (variant == add_variant)
2416 {
2417 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2418 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2419 accumulator = accum_tmp;
2420 if (cast_to_unsigned_p)
2421 append_pattern_def_seq (stmt_vinfo, stmt);
2422 }
2423 /* Move back to a signed if needed. */
2424 if (cast_to_unsigned_p)
2425 {
2426 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2427 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2428 }
2429
2430 return stmt;
2431}
2432
2433/* Detect multiplication by constant and convert it into a sequence of
2434 shifts and additions, subtractions, negations. We reuse the
2435 choose_mult_variant algorithms from expmed.c
2436
2437 Input/Output:
2438
2439 STMTS: Contains a stmt from which the pattern search begins,
2440 i.e. the mult stmt.
2441
2442 Output:
2443
2444 * TYPE_IN: The type of the input arguments to the pattern.
2445
2446 * TYPE_OUT: The type of the output of this pattern.
2447
2448 * Return value: A new stmt that will be used to replace
2449 the multiplication. */
2450
2451static gimple *
2452vect_recog_mult_pattern (vec<gimple *> *stmts,
2453 tree *type_in, tree *type_out)
2454{
2455 gimple *last_stmt = stmts->pop ();
2456 tree oprnd0, oprnd1, vectype, itype;
2457 gimple *pattern_stmt;
2458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2459
2460 if (!is_gimple_assign (last_stmt))
2461 return NULL;
2462
2463 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2464 return NULL;
2465
2466 oprnd0 = gimple_assign_rhs1 (last_stmt);
2467 oprnd1 = gimple_assign_rhs2 (last_stmt);
2468 itype = TREE_TYPE (oprnd0);
2469
2470 if (TREE_CODE (oprnd0) != SSA_NAME
2471 || TREE_CODE (oprnd1) != INTEGER_CST
2472 || !INTEGRAL_TYPE_P (itype)
2473 || !type_has_mode_precision_p (itype))
2474 return NULL;
2475
2476 vectype = get_vectype_for_scalar_type (itype);
2477 if (vectype == NULL_TREE)
2478 return NULL;
2479
2480 /* If the target can handle vectorized multiplication natively,
2481 don't attempt to optimize this. */
2482 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2483 if (mul_optab != unknown_optab)
2484 {
2485 machine_mode vec_mode = TYPE_MODE (vectype);
2486 int icode = (int) optab_handler (mul_optab, vec_mode);
2487 if (icode != CODE_FOR_nothing)
2488 return NULL;
2489 }
2490
2491 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2492 if (!pattern_stmt)
2493 return NULL;
2494
2495 /* Pattern detected. */
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_NOTE, vect_location,
2498 "vect_recog_mult_pattern: detected:\n");
2499
2500 if (dump_enabled_p ())
2501 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2502 pattern_stmt,0);
2503
2504 stmts->safe_push (last_stmt);
2505 *type_in = vectype;
2506 *type_out = vectype;
2507
2508 return pattern_stmt;
2509}
2510
2511/* Detect a signed division by a constant that wouldn't be
2512 otherwise vectorized:
2513
2514 type a_t, b_t;
2515
2516 S1 a_t = b_t / N;
2517
2518 where type 'type' is an integral type and N is a constant.
2519
2520 Similarly handle modulo by a constant:
2521
2522 S4 a_t = b_t % N;
2523
2524 Input/Output:
2525
2526 * STMTS: Contains a stmt from which the pattern search begins,
2527 i.e. the division stmt. S1 is replaced by if N is a power
2528 of two constant and type is signed:
2529 S3 y_t = b_t < 0 ? N - 1 : 0;
2530 S2 x_t = b_t + y_t;
2531 S1' a_t = x_t >> log2 (N);
2532
2533 S4 is replaced if N is a power of two constant and
2534 type is signed by (where *_T temporaries have unsigned type):
2535 S9 y_T = b_t < 0 ? -1U : 0U;
2536 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2537 S7 z_t = (type) z_T;
2538 S6 w_t = b_t + z_t;
2539 S5 x_t = w_t & (N - 1);
2540 S4' a_t = x_t - z_t;
2541
2542 Output:
2543
2544 * TYPE_IN: The type of the input arguments to the pattern.
2545
2546 * TYPE_OUT: The type of the output of this pattern.
2547
2548 * Return value: A new stmt that will be used to replace the division
2549 S1 or modulo S4 stmt. */
2550
2551static gimple *
2552vect_recog_divmod_pattern (vec<gimple *> *stmts,
2553 tree *type_in, tree *type_out)
2554{
2555 gimple *last_stmt = stmts->pop ();
2556 tree oprnd0, oprnd1, vectype, itype, cond;
2557 gimple *pattern_stmt, *def_stmt;
2558 enum tree_code rhs_code;
2559 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2560 vec_info *vinfo = stmt_vinfo->vinfo;
2561 optab optab;
2562 tree q;
2563 int dummy_int, prec;
2564 stmt_vec_info def_stmt_vinfo;
2565
2566 if (!is_gimple_assign (last_stmt))
2567 return NULL;
2568
2569 rhs_code = gimple_assign_rhs_code (last_stmt);
2570 switch (rhs_code)
2571 {
2572 case TRUNC_DIV_EXPR:
2573 case TRUNC_MOD_EXPR:
2574 break;
2575 default:
2576 return NULL;
2577 }
2578
2579 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2580 return NULL;
2581
2582 oprnd0 = gimple_assign_rhs1 (last_stmt);
2583 oprnd1 = gimple_assign_rhs2 (last_stmt);
2584 itype = TREE_TYPE (oprnd0);
2585 if (TREE_CODE (oprnd0) != SSA_NAME
2586 || TREE_CODE (oprnd1) != INTEGER_CST
2587 || TREE_CODE (itype) != INTEGER_TYPE
2588 || !type_has_mode_precision_p (itype))
2589 return NULL;
2590
2591 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2592 vectype = get_vectype_for_scalar_type (itype);
2593 if (vectype == NULL_TREE)
2594 return NULL;
2595
2596 /* If the target can handle vectorized division or modulo natively,
2597 don't attempt to optimize this. */
2598 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2599 if (optab != unknown_optab)
2600 {
2601 machine_mode vec_mode = TYPE_MODE (vectype);
2602 int icode = (int) optab_handler (optab, vec_mode);
2603 if (icode != CODE_FOR_nothing)
2604 return NULL;
2605 }
2606
2607 prec = TYPE_PRECISION (itype);
2608 if (integer_pow2p (oprnd1))
2609 {
2610 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2611 return NULL;
2612
2613 /* Pattern detected. */
2614 if (dump_enabled_p ())
2615 dump_printf_loc (MSG_NOTE, vect_location,
2616 "vect_recog_divmod_pattern: detected:\n");
2617
2618 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2619 build_int_cst (itype, 0));
2620 if (rhs_code == TRUNC_DIV_EXPR)
2621 {
2622 tree var = vect_recog_temp_ssa_var (itype, NULL);
2623 tree shift;
2624 def_stmt
2625 = gimple_build_assign (var, COND_EXPR, cond,
2626 fold_build2 (MINUS_EXPR, itype, oprnd1,
2627 build_int_cst (itype, 1)),
2628 build_int_cst (itype, 0));
2629 new_pattern_def_seq (stmt_vinfo, def_stmt);
2630 var = vect_recog_temp_ssa_var (itype, NULL);
2631 def_stmt
2632 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2633 gimple_assign_lhs (def_stmt));
2634 append_pattern_def_seq (stmt_vinfo, def_stmt);
2635
2636 shift = build_int_cst (itype, tree_log2 (oprnd1));
2637 pattern_stmt
2638 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2639 RSHIFT_EXPR, var, shift);
2640 }
2641 else
2642 {
2643 tree signmask;
2644 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2645 if (compare_tree_int (oprnd1, 2) == 0)
2646 {
2647 signmask = vect_recog_temp_ssa_var (itype, NULL);
2648 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2649 build_int_cst (itype, 1),
2650 build_int_cst (itype, 0));
2651 append_pattern_def_seq (stmt_vinfo, def_stmt);
2652 }
2653 else
2654 {
2655 tree utype
2656 = build_nonstandard_integer_type (prec, 1);
2657 tree vecutype = get_vectype_for_scalar_type (utype);
2658 tree shift
2659 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2660 - tree_log2 (oprnd1));
2661 tree var = vect_recog_temp_ssa_var (utype, NULL);
2662
2663 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2664 build_int_cst (utype, -1),
2665 build_int_cst (utype, 0));
2666 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2667 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2668 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2669 append_pattern_def_seq (stmt_vinfo, def_stmt);
2670 var = vect_recog_temp_ssa_var (utype, NULL);
2671 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2672 gimple_assign_lhs (def_stmt),
2673 shift);
2674 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2675 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2676 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2677 append_pattern_def_seq (stmt_vinfo, def_stmt);
2678 signmask = vect_recog_temp_ssa_var (itype, NULL);
2679 def_stmt
2680 = gimple_build_assign (signmask, NOP_EXPR, var);
2681 append_pattern_def_seq (stmt_vinfo, def_stmt);
2682 }
2683 def_stmt
2684 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2685 PLUS_EXPR, oprnd0, signmask);
2686 append_pattern_def_seq (stmt_vinfo, def_stmt);
2687 def_stmt
2688 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2689 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2690 fold_build2 (MINUS_EXPR, itype, oprnd1,
2691 build_int_cst (itype, 1)));
2692 append_pattern_def_seq (stmt_vinfo, def_stmt);
2693
2694 pattern_stmt
2695 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2696 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2697 signmask);
2698 }
2699
2700 if (dump_enabled_p ())
2701 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2702 0);
2703
2704 stmts->safe_push (last_stmt);
2705
2706 *type_in = vectype;
2707 *type_out = vectype;
2708 return pattern_stmt;
2709 }
2710
2711 if (prec > HOST_BITS_PER_WIDE_INT
2712 || integer_zerop (oprnd1))
2713 return NULL;
2714
2715 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2716 return NULL;
2717
2718 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2719
2720 if (TYPE_UNSIGNED (itype))
2721 {
2722 unsigned HOST_WIDE_INT mh, ml;
2723 int pre_shift, post_shift;
2724 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2725 & GET_MODE_MASK (itype_mode));
2726 tree t1, t2, t3, t4;
2727
2728 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2729 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2730 return NULL;
2731
2732 /* Find a suitable multiplier and right shift count
2733 instead of multiplying with D. */
2734 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2735
2736 /* If the suggested multiplier is more than SIZE bits, we can do better
2737 for even divisors, using an initial right shift. */
2738 if (mh != 0 && (d & 1) == 0)
2739 {
2740 pre_shift = ctz_or_zero (d);
2741 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2742 &ml, &post_shift, &dummy_int);
2743 gcc_assert (!mh);
2744 }
2745 else
2746 pre_shift = 0;
2747
2748 if (mh != 0)
2749 {
2750 if (post_shift - 1 >= prec)
2751 return NULL;
2752
2753 /* t1 = oprnd0 h* ml;
2754 t2 = oprnd0 - t1;
2755 t3 = t2 >> 1;
2756 t4 = t1 + t3;
2757 q = t4 >> (post_shift - 1); */
2758 t1 = vect_recog_temp_ssa_var (itype, NULL);
2759 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2760 build_int_cst (itype, ml));
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762
2763 t2 = vect_recog_temp_ssa_var (itype, NULL);
2764 def_stmt
2765 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2766 append_pattern_def_seq (stmt_vinfo, def_stmt);
2767
2768 t3 = vect_recog_temp_ssa_var (itype, NULL);
2769 def_stmt
2770 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2771 append_pattern_def_seq (stmt_vinfo, def_stmt);
2772
2773 t4 = vect_recog_temp_ssa_var (itype, NULL);
2774 def_stmt
2775 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2776
2777 if (post_shift != 1)
2778 {
2779 append_pattern_def_seq (stmt_vinfo, def_stmt);
2780
2781 q = vect_recog_temp_ssa_var (itype, NULL);
2782 pattern_stmt
2783 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2784 build_int_cst (itype, post_shift - 1));
2785 }
2786 else
2787 {
2788 q = t4;
2789 pattern_stmt = def_stmt;
2790 }
2791 }
2792 else
2793 {
2794 if (pre_shift >= prec || post_shift >= prec)
2795 return NULL;
2796
2797 /* t1 = oprnd0 >> pre_shift;
2798 t2 = t1 h* ml;
2799 q = t2 >> post_shift; */
2800 if (pre_shift)
2801 {
2802 t1 = vect_recog_temp_ssa_var (itype, NULL);
2803 def_stmt
2804 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2805 build_int_cst (NULL, pre_shift));
2806 append_pattern_def_seq (stmt_vinfo, def_stmt);
2807 }
2808 else
2809 t1 = oprnd0;
2810
2811 t2 = vect_recog_temp_ssa_var (itype, NULL);
2812 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2813 build_int_cst (itype, ml));
2814
2815 if (post_shift)
2816 {
2817 append_pattern_def_seq (stmt_vinfo, def_stmt);
2818
2819 q = vect_recog_temp_ssa_var (itype, NULL);
2820 def_stmt
2821 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2822 build_int_cst (itype, post_shift));
2823 }
2824 else
2825 q = t2;
2826
2827 pattern_stmt = def_stmt;
2828 }
2829 }
2830 else
2831 {
2832 unsigned HOST_WIDE_INT ml;
2833 int post_shift;
2834 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2835 unsigned HOST_WIDE_INT abs_d;
2836 bool add = false;
2837 tree t1, t2, t3, t4;
2838
2839 /* Give up for -1. */
2840 if (d == -1)
2841 return NULL;
2842
2843 /* Since d might be INT_MIN, we have to cast to
2844 unsigned HOST_WIDE_INT before negating to avoid
2845 undefined signed overflow. */
2846 abs_d = (d >= 0
2847 ? (unsigned HOST_WIDE_INT) d
2848 : - (unsigned HOST_WIDE_INT) d);
2849
2850 /* n rem d = n rem -d */
2851 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2852 {
2853 d = abs_d;
2854 oprnd1 = build_int_cst (itype, abs_d);
2855 }
2856 else if (HOST_BITS_PER_WIDE_INT >= prec
2857 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2858 /* This case is not handled correctly below. */
2859 return NULL;
2860
2861 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2862 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2863 {
2864 add = true;
2865 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2866 }
2867 if (post_shift >= prec)
2868 return NULL;
2869
2870 /* t1 = oprnd0 h* ml; */
2871 t1 = vect_recog_temp_ssa_var (itype, NULL);
2872 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2873 build_int_cst (itype, ml));
2874
2875 if (add)
2876 {
2877 /* t2 = t1 + oprnd0; */
2878 append_pattern_def_seq (stmt_vinfo, def_stmt);
2879 t2 = vect_recog_temp_ssa_var (itype, NULL);
2880 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2881 }
2882 else
2883 t2 = t1;
2884
2885 if (post_shift)
2886 {
2887 /* t3 = t2 >> post_shift; */
2888 append_pattern_def_seq (stmt_vinfo, def_stmt);
2889 t3 = vect_recog_temp_ssa_var (itype, NULL);
2890 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2891 build_int_cst (itype, post_shift));
2892 }
2893 else
2894 t3 = t2;
2895
2896 wide_int oprnd0_min, oprnd0_max;
2897 int msb = 1;
2898 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2899 {
2900 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2901 msb = 0;
2902 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2903 msb = -1;
2904 }
2905
2906 if (msb == 0 && d >= 0)
2907 {
2908 /* q = t3; */
2909 q = t3;
2910 pattern_stmt = def_stmt;
2911 }
2912 else
2913 {
2914 /* t4 = oprnd0 >> (prec - 1);
2915 or if we know from VRP that oprnd0 >= 0
2916 t4 = 0;
2917 or if we know from VRP that oprnd0 < 0
2918 t4 = -1; */
2919 append_pattern_def_seq (stmt_vinfo, def_stmt);
2920 t4 = vect_recog_temp_ssa_var (itype, NULL);
2921 if (msb != 1)
2922 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2923 build_int_cst (itype, msb));
2924 else
2925 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2926 build_int_cst (itype, prec - 1));
2927 append_pattern_def_seq (stmt_vinfo, def_stmt);
2928
2929 /* q = t3 - t4; or q = t4 - t3; */
2930 q = vect_recog_temp_ssa_var (itype, NULL);
2931 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2932 d < 0 ? t3 : t4);
2933 }
2934 }
2935
2936 if (rhs_code == TRUNC_MOD_EXPR)
2937 {
2938 tree r, t1;
2939
2940 /* We divided. Now finish by:
2941 t1 = q * oprnd1;
2942 r = oprnd0 - t1; */
2943 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2944
2945 t1 = vect_recog_temp_ssa_var (itype, NULL);
2946 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2947 append_pattern_def_seq (stmt_vinfo, def_stmt);
2948
2949 r = vect_recog_temp_ssa_var (itype, NULL);
2950 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2951 }
2952
2953 /* Pattern detected. */
2954 if (dump_enabled_p ())
2955 {
2956 dump_printf_loc (MSG_NOTE, vect_location,
2957 "vect_recog_divmod_pattern: detected: ");
2958 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2959 }
2960
2961 stmts->safe_push (last_stmt);
2962
2963 *type_in = vectype;
2964 *type_out = vectype;
2965 return pattern_stmt;
2966}
2967
2968/* Function vect_recog_mixed_size_cond_pattern
2969
2970 Try to find the following pattern:
2971
2972 type x_t, y_t;
2973 TYPE a_T, b_T, c_T;
2974 loop:
2975 S1 a_T = x_t CMP y_t ? b_T : c_T;
2976
2977 where type 'TYPE' is an integral type which has different size
2978 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2979 than 'type', the constants need to fit into an integer type
2980 with the same width as 'type') or results of conversion from 'type'.
2981
2982 Input:
2983
2984 * LAST_STMT: A stmt from which the pattern search begins.
2985
2986 Output:
2987
2988 * TYPE_IN: The type of the input arguments to the pattern.
2989
2990 * TYPE_OUT: The type of the output of this pattern.
2991
2992 * Return value: A new stmt that will be used to replace the pattern.
2993 Additionally a def_stmt is added.
2994
2995 a_it = x_t CMP y_t ? b_it : c_it;
2996 a_T = (TYPE) a_it; */
2997
2998static gimple *
2999vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3000 tree *type_out)
3001{
3002 gimple *last_stmt = (*stmts)[0];
3003 tree cond_expr, then_clause, else_clause;
3004 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3005 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3006 gimple *pattern_stmt, *def_stmt;
3007 vec_info *vinfo = stmt_vinfo->vinfo;
3008 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3009 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3010 bool promotion;
3011 tree comp_scalar_type;
3012
3013 if (!is_gimple_assign (last_stmt)
3014 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3015 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3016 return NULL;
3017
3018 cond_expr = gimple_assign_rhs1 (last_stmt);
3019 then_clause = gimple_assign_rhs2 (last_stmt);
3020 else_clause = gimple_assign_rhs3 (last_stmt);
3021
3022 if (!COMPARISON_CLASS_P (cond_expr))
3023 return NULL;
3024
3025 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3026 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3027 if (comp_vectype == NULL_TREE)
3028 return NULL;
3029
3030 type = gimple_expr_type (last_stmt);
3031 if (types_compatible_p (type, comp_scalar_type)
3032 || ((TREE_CODE (then_clause) != INTEGER_CST
3033 || TREE_CODE (else_clause) != INTEGER_CST)
3034 && !INTEGRAL_TYPE_P (comp_scalar_type))
3035 || !INTEGRAL_TYPE_P (type))
3036 return NULL;
3037
3038 if ((TREE_CODE (then_clause) != INTEGER_CST
3039 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3040 &def_stmt0, &promotion))
3041 || (TREE_CODE (else_clause) != INTEGER_CST
3042 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3043 &def_stmt1, &promotion)))
3044 return NULL;
3045
3046 if (orig_type0 && orig_type1
3047 && !types_compatible_p (orig_type0, orig_type1))
3048 return NULL;
3049
3050 if (orig_type0)
3051 {
3052 if (!types_compatible_p (orig_type0, comp_scalar_type))
3053 return NULL;
3054 then_clause = gimple_assign_rhs1 (def_stmt0);
3055 itype = orig_type0;
3056 }
3057
3058 if (orig_type1)
3059 {
3060 if (!types_compatible_p (orig_type1, comp_scalar_type))
3061 return NULL;
3062 else_clause = gimple_assign_rhs1 (def_stmt1);
3063 itype = orig_type1;
3064 }
3065
3066
3067 HOST_WIDE_INT cmp_mode_size
3068 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3069
3070 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3071 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3072 return NULL;
3073
3074 vectype = get_vectype_for_scalar_type (type);
3075 if (vectype == NULL_TREE)
3076 return NULL;
3077
3078 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3079 return NULL;
3080
3081 if (itype == NULL_TREE)
3082 itype = build_nonstandard_integer_type (cmp_mode_size,
3083 TYPE_UNSIGNED (type));
3084
3085 if (itype == NULL_TREE
3086 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3087 return NULL;
3088
3089 vecitype = get_vectype_for_scalar_type (itype);
3090 if (vecitype == NULL_TREE)
3091 return NULL;
3092
3093 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3094 return NULL;
3095
3096 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3097 {
3098 if ((TREE_CODE (then_clause) == INTEGER_CST
3099 && !int_fits_type_p (then_clause, itype))
3100 || (TREE_CODE (else_clause) == INTEGER_CST
3101 && !int_fits_type_p (else_clause, itype)))
3102 return NULL;
3103 }
3104
3105 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3106 COND_EXPR, unshare_expr (cond_expr),
3107 fold_convert (itype, then_clause),
3108 fold_convert (itype, else_clause));
3109 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3110 NOP_EXPR, gimple_assign_lhs (def_stmt));
3111
3112 new_pattern_def_seq (stmt_vinfo, def_stmt);
3113 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3114 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3115 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3116 *type_in = vecitype;
3117 *type_out = vectype;
3118
3119 if (dump_enabled_p ())
3120 dump_printf_loc (MSG_NOTE, vect_location,
3121 "vect_recog_mixed_size_cond_pattern: detected:\n");
3122
3123 return pattern_stmt;
3124}
3125
3126
3127/* Helper function of vect_recog_bool_pattern. Called recursively, return
3128 true if bool VAR can and should be optimized that way. Assume it shouldn't
3129 in case it's a result of a comparison which can be directly vectorized into
3130 a vector comparison. Fills in STMTS with all stmts visited during the
3131 walk. */
3132
3133static bool
3134check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3135{
3136 gimple *def_stmt;
3137 enum vect_def_type dt;
3138 tree rhs1;
3139 enum tree_code rhs_code;
3140
3141 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3142 return false;
3143
3144 if (dt != vect_internal_def)
3145 return false;
3146
3147 if (!is_gimple_assign (def_stmt))
3148 return false;
3149
3150 if (stmts.contains (def_stmt))
3151 return true;
3152
3153 rhs1 = gimple_assign_rhs1 (def_stmt);
3154 rhs_code = gimple_assign_rhs_code (def_stmt);
3155 switch (rhs_code)
3156 {
3157 case SSA_NAME:
3158 if (! check_bool_pattern (rhs1, vinfo, stmts))
3159 return false;
3160 break;
3161
3162 CASE_CONVERT:
3163 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3164 return false;
3165 if (! check_bool_pattern (rhs1, vinfo, stmts))
3166 return false;
3167 break;
3168
3169 case BIT_NOT_EXPR:
3170 if (! check_bool_pattern (rhs1, vinfo, stmts))
3171 return false;
3172 break;
3173
3174 case BIT_AND_EXPR:
3175 case BIT_IOR_EXPR:
3176 case BIT_XOR_EXPR:
3177 if (! check_bool_pattern (rhs1, vinfo, stmts)
3178 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3179 return false;
3180 break;
3181
3182 default:
3183 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3184 {
3185 tree vecitype, comp_vectype;
3186
3187 /* If the comparison can throw, then is_gimple_condexpr will be
3188 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3189 if (stmt_could_throw_p (def_stmt))
3190 return false;
3191
3192 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3193 if (comp_vectype == NULL_TREE)
3194 return false;
3195
3196 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3197 if (mask_type
3198 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3199 return false;
3200
3201 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3202 {
3203 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3204 tree itype
3205 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3206 vecitype = get_vectype_for_scalar_type (itype);
3207 if (vecitype == NULL_TREE)
3208 return false;
3209 }
3210 else
3211 vecitype = comp_vectype;
3212 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3213 return false;
3214 }
3215 else
3216 return false;
3217 break;
3218 }
3219
3220 bool res = stmts.add (def_stmt);
3221 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3222 gcc_assert (!res);
3223
3224 return true;
3225}
3226
3227
3228/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3229 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3230 pattern sequence. */
3231
3232static tree
3233adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3234{
3235 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3236 NOP_EXPR, var);
3237 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3238 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3239 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3240 append_pattern_def_seq (stmt_info, cast_stmt);
3241 return gimple_assign_lhs (cast_stmt);
3242}
3243
3244/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3245 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3246 type, OUT_TYPE is the desired final integer type of the whole pattern.
3247 STMT_INFO is the info of the pattern root and is where pattern stmts should
3248 be associated with. DEFS is a map of pattern defs. */
3249
3250static void
3251adjust_bool_pattern (tree var, tree out_type,
3252 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3253{
3254 gimple *stmt = SSA_NAME_DEF_STMT (var);
3255 enum tree_code rhs_code, def_rhs_code;
3256 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3257 location_t loc;
3258 gimple *pattern_stmt, *def_stmt;
3259 tree trueval = NULL_TREE;
3260
3261 rhs1 = gimple_assign_rhs1 (stmt);
3262 rhs2 = gimple_assign_rhs2 (stmt);
3263 rhs_code = gimple_assign_rhs_code (stmt);
3264 loc = gimple_location (stmt);
3265 switch (rhs_code)
3266 {
3267 case SSA_NAME:
3268 CASE_CONVERT:
3269 irhs1 = *defs.get (rhs1);
3270 itype = TREE_TYPE (irhs1);
3271 pattern_stmt
3272 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3273 SSA_NAME, irhs1);
3274 break;
3275
3276 case BIT_NOT_EXPR:
3277 irhs1 = *defs.get (rhs1);
3278 itype = TREE_TYPE (irhs1);
3279 pattern_stmt
3280 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3281 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3282 break;
3283
3284 case BIT_AND_EXPR:
3285 /* Try to optimize x = y & (a < b ? 1 : 0); into
3286 x = (a < b ? y : 0);
3287
3288 E.g. for:
3289 bool a_b, b_b, c_b;
3290 TYPE d_T;
3291
3292 S1 a_b = x1 CMP1 y1;
3293 S2 b_b = x2 CMP2 y2;
3294 S3 c_b = a_b & b_b;
3295 S4 d_T = (TYPE) c_b;
3296
3297 we would normally emit:
3298
3299 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3300 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3301 S3' c_T = a_T & b_T;
3302 S4' d_T = c_T;
3303
3304 but we can save one stmt by using the
3305 result of one of the COND_EXPRs in the other COND_EXPR and leave
3306 BIT_AND_EXPR stmt out:
3307
3308 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3309 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3310 S4' f_T = c_T;
3311
3312 At least when VEC_COND_EXPR is implemented using masks
3313 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3314 computes the comparison masks and ands it, in one case with
3315 all ones vector, in the other case with a vector register.
3316 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3317 often more expensive. */
3318 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3319 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3320 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3321 {
3322 irhs1 = *defs.get (rhs1);
3323 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3324 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3325 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3326 {
3327 rhs_code = def_rhs_code;
3328 rhs1 = def_rhs1;
3329 rhs2 = gimple_assign_rhs2 (def_stmt);
3330 trueval = irhs1;
3331 goto do_compare;
3332 }
3333 else
3334 irhs2 = *defs.get (rhs2);
3335 goto and_ior_xor;
3336 }
3337 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3338 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3339 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3340 {
3341 irhs2 = *defs.get (rhs2);
3342 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3343 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3344 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3345 {
3346 rhs_code = def_rhs_code;
3347 rhs1 = def_rhs1;
3348 rhs2 = gimple_assign_rhs2 (def_stmt);
3349 trueval = irhs2;
3350 goto do_compare;
3351 }
3352 else
3353 irhs1 = *defs.get (rhs1);
3354 goto and_ior_xor;
3355 }
3356 /* FALLTHRU */
3357 case BIT_IOR_EXPR:
3358 case BIT_XOR_EXPR:
3359 irhs1 = *defs.get (rhs1);
3360 irhs2 = *defs.get (rhs2);
3361 and_ior_xor:
3362 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3363 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3364 {
3365 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3366 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3367 int out_prec = TYPE_PRECISION (out_type);
3368 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3369 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3370 stmt_info);
3371 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3372 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3373 stmt_info);
3374 else
3375 {
3376 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3377 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3378 }
3379 }
3380 itype = TREE_TYPE (irhs1);
3381 pattern_stmt
3382 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3383 rhs_code, irhs1, irhs2);
3384 break;
3385
3386 default:
3387 do_compare:
3388 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3389 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3390 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3391 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3392 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3393 {
3394 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3395 itype
3396 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3397 }
3398 else
3399 itype = TREE_TYPE (rhs1);
3400 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3401 if (trueval == NULL_TREE)
3402 trueval = build_int_cst (itype, 1);
3403 else
3404 gcc_checking_assert (useless_type_conversion_p (itype,
3405 TREE_TYPE (trueval)));
3406 pattern_stmt
3407 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3408 COND_EXPR, cond_expr, trueval,
3409 build_int_cst (itype, 0));
3410 break;
3411 }
3412
3413 gimple_set_location (pattern_stmt, loc);
3414 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3415 pattern def seq stmts instead of just letting auto-detection do
3416 its work? */
3417 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3418 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3419 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3420 append_pattern_def_seq (stmt_info, pattern_stmt);
3421 defs.put (var, gimple_assign_lhs (pattern_stmt));
3422}
3423
3424/* Comparison function to qsort a vector of gimple stmts after UID. */
3425
3426static int
3427sort_after_uid (const void *p1, const void *p2)
3428{
3429 const gimple *stmt1 = *(const gimple * const *)p1;
3430 const gimple *stmt2 = *(const gimple * const *)p2;
3431 return gimple_uid (stmt1) - gimple_uid (stmt2);
3432}
3433
3434/* Create pattern stmts for all stmts participating in the bool pattern
3435 specified by BOOL_STMT_SET and its root STMT with the desired type
3436 OUT_TYPE. Return the def of the pattern root. */
3437
3438static tree
3439adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3440 tree out_type, gimple *stmt)
3441{
3442 /* Gather original stmts in the bool pattern in their order of appearance
3443 in the IL. */
3444 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3445 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3446 i != bool_stmt_set.end (); ++i)
3447 bool_stmts.quick_push (*i);
3448 bool_stmts.qsort (sort_after_uid);
3449
3450 /* Now process them in that order, producing pattern stmts. */
3451 hash_map <tree, tree> defs;
3452 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3453 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3454 out_type, vinfo_for_stmt (stmt), defs);
3455
3456 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3457 gimple *pattern_stmt
3458 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3459 return gimple_assign_lhs (pattern_stmt);
3460}
3461
3462/* Helper for search_type_for_mask. */
3463
3464static tree
3465search_type_for_mask_1 (tree var, vec_info *vinfo,
3466 hash_map<gimple *, tree> &cache)
3467{
3468 gimple *def_stmt;
3469 enum vect_def_type dt;
3470 tree rhs1;
3471 enum tree_code rhs_code;
3472 tree res = NULL_TREE, res2;
3473
3474 if (TREE_CODE (var) != SSA_NAME)
3475 return NULL_TREE;
3476
3477 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3478 return NULL_TREE;
3479
3480 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3481 return NULL_TREE;
3482
3483 if (dt != vect_internal_def)
3484 return NULL_TREE;
3485
3486 if (!is_gimple_assign (def_stmt))
3487 return NULL_TREE;
3488
3489 tree *c = cache.get (def_stmt);
3490 if (c)
3491 return *c;
3492
3493 rhs_code = gimple_assign_rhs_code (def_stmt);
3494 rhs1 = gimple_assign_rhs1 (def_stmt);
3495
3496 switch (rhs_code)
3497 {
3498 case SSA_NAME:
3499 case BIT_NOT_EXPR:
3500 CASE_CONVERT:
3501 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3502 break;
3503
3504 case BIT_AND_EXPR:
3505 case BIT_IOR_EXPR:
3506 case BIT_XOR_EXPR:
3507 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3508 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3509 cache);
3510 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3511 res = res2;
3512 break;
3513
3514 default:
3515 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3516 {
3517 tree comp_vectype, mask_type;
3518
3519 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3520 {
3521 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3522 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3523 vinfo, cache);
3524 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3525 res = res2;
3526 break;
3527 }
3528
3529 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3530 if (comp_vectype == NULL_TREE)
3531 {
3532 res = NULL_TREE;
3533 break;
3534 }
3535
3536 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3537 if (!mask_type
3538 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3539 {
3540 res = NULL_TREE;
3541 break;
3542 }
3543
3544 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3545 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3546 {
3547 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3548 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3549 }
3550 else
3551 res = TREE_TYPE (rhs1);
3552 }
3553 }
3554
3555 cache.put (def_stmt, res);
3556 return res;
3557}
3558
3559/* Return the proper type for converting bool VAR into
3560 an integer value or NULL_TREE if no such type exists.
3561 The type is chosen so that converted value has the
3562 same number of elements as VAR's vector type. */
3563
3564static tree
3565search_type_for_mask (tree var, vec_info *vinfo)
3566{
3567 hash_map<gimple *, tree> cache;
3568 return search_type_for_mask_1 (var, vinfo, cache);
3569}
3570
3571/* Function vect_recog_bool_pattern
3572
3573 Try to find pattern like following:
3574
3575 bool a_b, b_b, c_b, d_b, e_b;
3576 TYPE f_T;
3577 loop:
3578 S1 a_b = x1 CMP1 y1;
3579 S2 b_b = x2 CMP2 y2;
3580 S3 c_b = a_b & b_b;
3581 S4 d_b = x3 CMP3 y3;
3582 S5 e_b = c_b | d_b;
3583 S6 f_T = (TYPE) e_b;
3584
3585 where type 'TYPE' is an integral type. Or a similar pattern
3586 ending in
3587
3588 S6 f_Y = e_b ? r_Y : s_Y;
3589
3590 as results from if-conversion of a complex condition.
3591
3592 Input:
3593
3594 * LAST_STMT: A stmt at the end from which the pattern
3595 search begins, i.e. cast of a bool to
3596 an integer type.
3597
3598 Output:
3599
3600 * TYPE_IN: The type of the input arguments to the pattern.
3601
3602 * TYPE_OUT: The type of the output of this pattern.
3603
3604 * Return value: A new stmt that will be used to replace the pattern.
3605
3606 Assuming size of TYPE is the same as size of all comparisons
3607 (otherwise some casts would be added where needed), the above
3608 sequence we create related pattern stmts:
3609 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3610 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3611 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3612 S5' e_T = c_T | d_T;
3613 S6' f_T = e_T;
3614
3615 Instead of the above S3' we could emit:
3616 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3617 S3' c_T = a_T | b_T;
3618 but the above is more efficient. */
3619
3620static gimple *
3621vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3622 tree *type_out)
3623{
3624 gimple *last_stmt = stmts->pop ();
3625 enum tree_code rhs_code;
3626 tree var, lhs, rhs, vectype;
3627 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3628 stmt_vec_info new_stmt_info;
3629 vec_info *vinfo = stmt_vinfo->vinfo;
3630 gimple *pattern_stmt;
3631
3632 if (!is_gimple_assign (last_stmt))
3633 return NULL;
3634
3635 var = gimple_assign_rhs1 (last_stmt);
3636 lhs = gimple_assign_lhs (last_stmt);
3637
3638 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3639 return NULL;
3640
3641 hash_set<gimple *> bool_stmts;
3642
3643 rhs_code = gimple_assign_rhs_code (last_stmt);
3644 if (CONVERT_EXPR_CODE_P (rhs_code))
3645 {
3646 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3647 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3648 return NULL;
3649 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3650 if (vectype == NULL_TREE)
3651 return NULL;
3652
3653 if (check_bool_pattern (var, vinfo, bool_stmts))
3654 {
3655 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3656 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3657 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3658 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3659 else
3660 pattern_stmt
3661 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3662 }
3663 else
3664 {
3665 tree type = search_type_for_mask (var, vinfo);
3666 tree cst0, cst1, tmp;
3667
3668 if (!type)
3669 return NULL;
3670
3671 /* We may directly use cond with narrowed type to avoid
3672 multiple cond exprs with following result packing and
3673 perform single cond with packed mask instead. In case
3674 of widening we better make cond first and then extract
3675 results. */
3676 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3677 type = TREE_TYPE (lhs);
3678
3679 cst0 = build_int_cst (type, 0);
3680 cst1 = build_int_cst (type, 1);
3681 tmp = vect_recog_temp_ssa_var (type, NULL);
3682 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3683
3684 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3685 {
3686 tree new_vectype = get_vectype_for_scalar_type (type);
3687 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3688 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3689 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3690 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3691
3692 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3693 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3694 }
3695 }
3696
3697 *type_out = vectype;
3698 *type_in = vectype;
3699 stmts->safe_push (last_stmt);
3700 if (dump_enabled_p ())
3701 dump_printf_loc (MSG_NOTE, vect_location,
3702 "vect_recog_bool_pattern: detected:\n");
3703
3704 return pattern_stmt;
3705 }
3706 else if (rhs_code == COND_EXPR
3707 && TREE_CODE (var) == SSA_NAME)
3708 {
3709 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3710 if (vectype == NULL_TREE)
3711 return NULL;
3712
3713 /* Build a scalar type for the boolean result that when
3714 vectorized matches the vector type of the result in
3715 size and number of elements. */
3716 unsigned prec
3717 = wi::udiv_trunc (wi::to_wide (TYPE_SIZE (vectype)),
3718 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3719 tree type
3720 = build_nonstandard_integer_type (prec,
3721 TYPE_UNSIGNED (TREE_TYPE (var)));
3722 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3723 return NULL;
3724
3725 if (!check_bool_pattern (var, vinfo, bool_stmts))
3726 return NULL;
3727
3728 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3729
3730 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3731 pattern_stmt
3732 = gimple_build_assign (lhs, COND_EXPR,
3733 build2 (NE_EXPR, boolean_type_node,
3734 rhs, build_int_cst (type, 0)),
3735 gimple_assign_rhs2 (last_stmt),
3736 gimple_assign_rhs3 (last_stmt));
3737 *type_out = vectype;
3738 *type_in = vectype;
3739 stmts->safe_push (last_stmt);
3740 if (dump_enabled_p ())
3741 dump_printf_loc (MSG_NOTE, vect_location,
3742 "vect_recog_bool_pattern: detected:\n");
3743
3744 return pattern_stmt;
3745 }
3746 else if (rhs_code == SSA_NAME
3747 && STMT_VINFO_DATA_REF (stmt_vinfo))
3748 {
3749 stmt_vec_info pattern_stmt_info;
3750 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3751 gcc_assert (vectype != NULL_TREE);
3752 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3753 return NULL;
3754
3755 if (check_bool_pattern (var, vinfo, bool_stmts))
3756 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3757 else
3758 {
3759 tree type = search_type_for_mask (var, vinfo);
3760 tree cst0, cst1, new_vectype;
3761
3762 if (!type)
3763 return NULL;
3764
3765 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3766 type = TREE_TYPE (vectype);
3767
3768 cst0 = build_int_cst (type, 0);
3769 cst1 = build_int_cst (type, 1);
3770 new_vectype = get_vectype_for_scalar_type (type);
3771
3772 rhs = vect_recog_temp_ssa_var (type, NULL);
3773 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3774
3775 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3776 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3777 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3778 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3779 }
3780
3781 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3782 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3783 {
3784 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3785 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3786 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3787 rhs = rhs2;
3788 }
3789 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3790 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3791 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3792 STMT_VINFO_DATA_REF (pattern_stmt_info)
3793 = STMT_VINFO_DATA_REF (stmt_vinfo);
3794 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3795 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3796 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3797 *type_out = vectype;
3798 *type_in = vectype;
3799 stmts->safe_push (last_stmt);
3800 if (dump_enabled_p ())
3801 dump_printf_loc (MSG_NOTE, vect_location,
3802 "vect_recog_bool_pattern: detected:\n");
3803 return pattern_stmt;
3804 }
3805 else
3806 return NULL;
3807}
3808
3809
3810/* A helper for vect_recog_mask_conversion_pattern. Build
3811 conversion of MASK to a type suitable for masking VECTYPE.
3812 Built statement gets required vectype and is appended to
3813 a pattern sequence of STMT_VINFO.
3814
3815 Return converted mask. */
3816
3817static tree
3818build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3819 vec_info *vinfo)
3820{
3821 gimple *stmt;
3822 tree masktype, tmp;
3823 stmt_vec_info new_stmt_info;
3824
3825 masktype = build_same_sized_truth_vector_type (vectype);
3826 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3827 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3828 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3829 set_vinfo_for_stmt (stmt, new_stmt_info);
3830 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3831 append_pattern_def_seq (stmt_vinfo, stmt);
3832
3833 return tmp;
3834}
3835
3836
3837/* Function vect_recog_mask_conversion_pattern
3838
3839 Try to find statements which require boolean type
3840 converison. Additional conversion statements are
3841 added to handle such cases. For example:
3842
3843 bool m_1, m_2, m_3;
3844 int i_4, i_5;
3845 double d_6, d_7;
3846 char c_1, c_2, c_3;
3847
3848 S1 m_1 = i_4 > i_5;
3849 S2 m_2 = d_6 < d_7;
3850 S3 m_3 = m_1 & m_2;
3851 S4 c_1 = m_3 ? c_2 : c_3;
3852
3853 Will be transformed into:
3854
3855 S1 m_1 = i_4 > i_5;
3856 S2 m_2 = d_6 < d_7;
3857 S3'' m_2' = (_Bool[bitsize=32])m_2
3858 S3' m_3' = m_1 & m_2';
3859 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3860 S4' c_1' = m_3'' ? c_2 : c_3; */
3861
3862static gimple *
3863vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3864 tree *type_out)
3865{
3866 gimple *last_stmt = stmts->pop ();
3867 enum tree_code rhs_code;
3868 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3869 tree vectype1, vectype2;
3870 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3871 stmt_vec_info pattern_stmt_info;
3872 vec_info *vinfo = stmt_vinfo->vinfo;
3873
3874 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3875 if (is_gimple_call (last_stmt)
3876 && gimple_call_internal_p (last_stmt)
3877 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3878 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3879 {
3880 gcall *pattern_stmt;
3881 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3882
3883 if (load)
3884 {
3885 lhs = gimple_call_lhs (last_stmt);
3886 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3887 }
3888 else
3889 {
3890 rhs2 = gimple_call_arg (last_stmt, 3);
3891 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3892 }
3893
3894 rhs1 = gimple_call_arg (last_stmt, 2);
3895 rhs1_type = search_type_for_mask (rhs1, vinfo);
3896 if (!rhs1_type)
3897 return NULL;
3898 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3899
3900 if (!vectype1 || !vectype2
3901 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3902 return NULL;
3903
3904 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3905
3906 if (load)
3907 {
3908 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3909 pattern_stmt
3910 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3911 gimple_call_arg (last_stmt, 0),
3912 gimple_call_arg (last_stmt, 1),
3913 tmp);
3914 gimple_call_set_lhs (pattern_stmt, lhs);
3915 }
3916 else
3917 pattern_stmt
3918 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3919 gimple_call_arg (last_stmt, 0),
3920 gimple_call_arg (last_stmt, 1),
3921 tmp,
3922 gimple_call_arg (last_stmt, 3));
3923
3924 gimple_call_set_nothrow (pattern_stmt, true);
3925
3926 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3927 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3928 STMT_VINFO_DATA_REF (pattern_stmt_info)
3929 = STMT_VINFO_DATA_REF (stmt_vinfo);
3930 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3931 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3932 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3933
3934 *type_out = vectype1;
3935 *type_in = vectype1;
3936 stmts->safe_push (last_stmt);
3937 if (dump_enabled_p ())
3938 dump_printf_loc (MSG_NOTE, vect_location,
3939 "vect_recog_mask_conversion_pattern: detected:\n");
3940
3941 return pattern_stmt;
3942 }
3943
3944 if (!is_gimple_assign (last_stmt))
3945 return NULL;
3946
3947 gimple *pattern_stmt;
3948 lhs = gimple_assign_lhs (last_stmt);
3949 rhs1 = gimple_assign_rhs1 (last_stmt);
3950 rhs_code = gimple_assign_rhs_code (last_stmt);
3951
3952 /* Check for cond expression requiring mask conversion. */
3953 if (rhs_code == COND_EXPR)
3954 {
3955 /* vect_recog_mixed_size_cond_pattern could apply.
3956 Do nothing then. */
3957 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3958 return NULL;
3959
3960 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3961
3962 if (TREE_CODE (rhs1) == SSA_NAME)
3963 {
3964 rhs1_type = search_type_for_mask (rhs1, vinfo);
3965 if (!rhs1_type)
3966 return NULL;
3967 }
3968 else if (COMPARISON_CLASS_P (rhs1))
3969 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3970 else
3971 return NULL;
3972
3973 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3974
3975 if (!vectype1 || !vectype2
3976 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3977 return NULL;
3978
3979 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
3980 in place, we can handle it in vectorizable_condition. This avoids
3981 unnecessary promotion stmts and increased vectorization factor. */
3982 if (COMPARISON_CLASS_P (rhs1)
3983 && INTEGRAL_TYPE_P (rhs1_type)
3984 && TYPE_VECTOR_SUBPARTS (vectype1) < TYPE_VECTOR_SUBPARTS (vectype2))
3985 {
3986 gimple *dummy;
3987 enum vect_def_type dt;
3988 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
3989 &dummy, &dt)
3990 && dt == vect_external_def
3991 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
3992 &dummy, &dt)
3993 && (dt == vect_external_def
3994 || dt == vect_constant_def))
3995 {
3996 tree wide_scalar_type = build_nonstandard_integer_type
3997 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
3998 TYPE_UNSIGNED (rhs1_type));
3999 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4000 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4001 return NULL;
4002 }
4003 }
4004
4005 /* If rhs1 is a comparison we need to move it into a
4006 separate statement. */
4007 if (TREE_CODE (rhs1) != SSA_NAME)
4008 {
4009 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4010 pattern_stmt = gimple_build_assign (tmp, rhs1);
4011 rhs1 = tmp;
4012
4013 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4014 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4015 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4016 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4017 }
4018
4019 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4020
4021 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4022 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4023 gimple_assign_rhs2 (last_stmt),
4024 gimple_assign_rhs3 (last_stmt));
4025
4026 *type_out = vectype1;
4027 *type_in = vectype1;
4028 stmts->safe_push (last_stmt);
4029 if (dump_enabled_p ())
4030 dump_printf_loc (MSG_NOTE, vect_location,
4031 "vect_recog_mask_conversion_pattern: detected:\n");
4032
4033 return pattern_stmt;
4034 }
4035
4036 /* Now check for binary boolean operations requiring conversion for
4037 one of operands. */
4038 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4039 return NULL;
4040
4041 if (rhs_code != BIT_IOR_EXPR
4042 && rhs_code != BIT_XOR_EXPR
4043 && rhs_code != BIT_AND_EXPR
4044 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4045 return NULL;
4046
4047 rhs2 = gimple_assign_rhs2 (last_stmt);
4048
4049 rhs1_type = search_type_for_mask (rhs1, vinfo);
4050 rhs2_type = search_type_for_mask (rhs2, vinfo);
4051
4052 if (!rhs1_type || !rhs2_type
4053 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4054 return NULL;
4055
4056 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4057 {
4058 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4059 if (!vectype1)
4060 return NULL;
4061 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4062 }
4063 else
4064 {
4065 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4066 if (!vectype1)
4067 return NULL;
4068 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4069 }
4070
4071 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs),