tree-vect-loop.cc source code [gcc/tree-vect-loop.cc]

1	/ Loop Vectorization*
2	Copyright (C) 2003-2024 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4	Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#define INCLUDE_ALGORITHM
23	#include "config.h"
24	#include "system.h"
25	#include "coretypes.h"
26	#include "backend.h"
27	#include "target.h"
28	#include "rtl.h"
29	#include "tree.h"
30	#include "gimple.h"
31	#include "cfghooks.h"
32	#include "tree-pass.h"
33	#include "ssa.h"
34	#include "optabs-tree.h"
35	#include "memmodel.h"
36	#include "optabs.h"
37	#include "diagnostic-core.h"
38	#include "fold-const.h"
39	#include "stor-layout.h"
40	#include "cfganal.h"
41	#include "gimplify.h"
42	#include "gimple-iterator.h"
43	#include "gimplify-me.h"
44	#include "tree-ssa-loop-ivopts.h"
45	#include "tree-ssa-loop-manip.h"
46	#include "tree-ssa-loop-niter.h"
47	#include "tree-ssa-loop.h"
48	#include "cfgloop.h"
49	#include "tree-scalar-evolution.h"
50	#include "tree-vectorizer.h"
51	#include "gimple-fold.h"
52	#include "cgraph.h"
53	#include "tree-cfg.h"
54	#include "tree-if-conv.h"
55	#include "internal-fn.h"
56	#include "tree-vector-builder.h"
57	#include "vec-perm-indices.h"
58	#include "tree-eh.h"
59	#include "case-cfn-macros.h"
60	#include "langhooks.h"
61
62	/ Loop Vectorization Pass.*
63
64	This pass tries to vectorize loops.
65
66	For example, the vectorizer transforms the following simple loop:
67
68	short a[N]; short b[N]; short c[N]; int i;
69
70	for (i=0; i<N; i++){
71	a[i] = b[i] + c[i];
72	}
73
74	as if it was manually vectorized by rewriting the source code into:
75
76	typedef int __attribute__((mode(V8HI))) v8hi;
77	short a[N]; short b[N]; short c[N]; int i;
78	v8hi pa = (v8hi)a, pb = (v8hi)b, pc = (v8hi)c;
79	v8hi va, vb, vc;
80
81	for (i=0; i<N/8; i++){
82	vb = pb[i];
83	vc = pc[i];
84	va = vb + vc;
85	pa[i] = va;
86	}
87
88	The main entry to this pass is vectorize_loops(), in which
89	the vectorizer applies a set of analyses on a given set of loops,
90	followed by the actual vectorization transformation for the loops that
91	had successfully passed the analysis phase.
92	Throughout this pass we make a distinction between two types of
93	data: scalars (which are represented by SSA_NAMES), and memory references
94	("data-refs"). These two types of data require different handling both
95	during analysis and transformation. The types of data-refs that the
96	vectorizer currently supports are ARRAY_REFS which base is an array DECL
97	(not a pointer), and INDIRECT_REFS through pointers; both array and pointer
98	accesses are required to have a simple (consecutive) access pattern.
99
100	Analysis phase:
101	===============
102	The driver for the analysis phase is vect_analyze_loop().
103	It applies a set of analyses, some of which rely on the scalar evolution
104	analyzer (scev) developed by Sebastian Pop.
105
106	During the analysis phase the vectorizer records some information
107	per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
108	loop, as well as general information about the loop as a whole, which is
109	recorded in a "loop_vec_info" struct attached to each loop.
110
111	Transformation phase:
112	=====================
113	The loop transformation phase scans all the stmts in the loop, and
114	creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
115	the loop that needs to be vectorized. It inserts the vector code sequence
116	just before the scalar stmt S, and records a pointer to the vector code
117	in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
118	attached to S). This pointer will be used for the vectorization of following
119	stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
120	otherwise, we rely on dead code elimination for removing it.
121
122	For example, say stmt S1 was vectorized into stmt VS1:
123
124	VS1: vb = px[i];
125	S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
126	S2: a = b;
127
128	To vectorize stmt S2, the vectorizer first finds the stmt that defines
129	the operand 'b' (S1), and gets the relevant vector def 'vb' from the
130	vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
131	resulting sequence would be:
132
133	VS1: vb = px[i];
134	S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
135	VS2: va = vb;
136	S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
137
138	Operands that are not SSA_NAMEs, are data-refs that appear in
139	load/store operations (like 'x[i]' in S1), and are handled differently.
140
141	Target modeling:
142	=================
143	Currently the only target specific information that is used is the
144	size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
145	Targets that can support different sizes of vectors, for now will need
146	to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
147	flexibility will be added in the future.
148
149	Since we only vectorize operations which vector form can be
150	expressed using existing tree codes, to verify that an operation is
151	supported, the vectorizer checks the relevant optab at the relevant
152	machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
153	the value found is CODE_FOR_nothing, then there's no target support, and
154	we can't vectorize the stmt.
155
156	For additional information on this project see:
157	http://gcc.gnu.org/projects/tree-ssa/vectorization.html
158	*/
159
160	static void vect_estimate_min_profitable_iters (loop_vec_info, int , int* *,
161	unsigned *);
162	static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
163	bool , bool* , bool*);
164
165	/ Subroutine of vect_determine_vf_for_stmt that handles only one*
166	statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
167	may already be set for general statements (not just data refs). /*
168
169	static opt_result
170	vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
171	bool vectype_maybe_set_p,
172	poly_uint64 *vf)
173	{
174	gimple *stmt = stmt_info->stmt;
175
176	if ((!STMT_VINFO_RELEVANT_P (stmt_info)
177	&& !STMT_VINFO_LIVE_P (stmt_info))
178	\|\| gimple_clobber_p (s: stmt))
179	{
180	if (dump_enabled_p ())
181	dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
182	return opt_result::success ();
183	}
184
185	tree stmt_vectype, nunits_vectype;
186	opt_result res = vect_get_vector_types_for_stmt (vinfo, stmt_info,
187	&stmt_vectype,
188	&nunits_vectype);
189	if (!res)
190	return res;
191
192	if (stmt_vectype)
193	{
194	if (STMT_VINFO_VECTYPE (stmt_info))
195	/ The only case when a vectype had been already set is for stmts*
196	that contain a data ref, or for "pattern-stmts" (stmts generated
197	by the vectorizer to represent/replace a certain idiom). /*
198	gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
199	\|\| vectype_maybe_set_p)
200	&& STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
201	else
202	STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
203	}
204
205	if (nunits_vectype)
206	vect_update_max_nunits (max_nunits: vf, vectype: nunits_vectype);
207
208	return opt_result::success ();
209	}
210
211	/ Subroutine of vect_determine_vectorization_factor. Set the vector*
212	types of STMT_INFO and all attached pattern statements and update
213	the vectorization factor VF accordingly. Return true on success
214	or false if something prevented vectorization. /*
215
216	static opt_result
217	vect_determine_vf_for_stmt (vec_info *vinfo,
218	stmt_vec_info stmt_info, poly_uint64 *vf)
219	{
220	if (dump_enabled_p ())
221	dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
222	stmt_info->stmt);
223	opt_result res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, vectype_maybe_set_p: false, vf);
224	if (!res)
225	return res;
226
227	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
228	&& STMT_VINFO_RELATED_STMT (stmt_info))
229	{
230	gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
231	stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
232
233	/ If a pattern statement has def stmts, analyze them too. /
234	for (gimple_stmt_iterator si = gsi_start (seq&: pattern_def_seq);
235	!gsi_end_p (i: si); gsi_next (i: &si))
236	{
237	stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (i: si));
238	if (dump_enabled_p ())
239	dump_printf_loc (MSG_NOTE, vect_location,
240	"==> examining pattern def stmt: %G",
241	def_stmt_info->stmt);
242	res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info: def_stmt_info, vectype_maybe_set_p: true, vf);
243	if (!res)
244	return res;
245	}
246
247	if (dump_enabled_p ())
248	dump_printf_loc (MSG_NOTE, vect_location,
249	"==> examining pattern statement: %G",
250	stmt_info->stmt);
251	res = vect_determine_vf_for_stmt_1 (vinfo, stmt_info, vectype_maybe_set_p: true, vf);
252	if (!res)
253	return res;
254	}
255
256	return opt_result::success ();
257	}
258
259	/ Function vect_determine_vectorization_factor*
260
261	Determine the vectorization factor (VF). VF is the number of data elements
262	that are operated upon in parallel in a single iteration of the vectorized
263	loop. For example, when vectorizing a loop that operates on 4byte elements,
264	on a target with vector size (VS) 16byte, the VF is set to 4, since 4
265	elements can fit in a single vector register.
266
267	We currently support vectorization of loops in which all types operated upon
268	are of the same size. Therefore this function currently sets VF according to
269	the size of the types operated upon, and fails if there are multiple sizes
270	in the loop.
271
272	VF is also the factor by which the loop iterations are strip-mined, e.g.:
273	original loop:
274	for (i=0; i<N; i++){
275	a[i] = b[i] + c[i];
276	}
277
278	vectorized loop:
279	for (i=0; i<N; i+=VF){
280	a[i:VF] = b[i:VF] + c[i:VF];
281	}
282	*/
283
284	static opt_result
285	vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
286	{
287	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
288	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
289	unsigned nbbs = loop->num_nodes;
290	poly_uint64 vectorization_factor = `1`;
291	tree scalar_type = NULL_TREE;
292	gphi *phi;
293	tree vectype;
294	stmt_vec_info stmt_info;
295	unsigned i;
296
297	DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
298
299	for (i = `0`; i < nbbs; i++)
300	{
301	basic_block bb = bbs[i];
302
303	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
304	gsi_next (i: &si))
305	{
306	phi = si.phi ();
307	stmt_info = loop_vinfo->lookup_stmt (phi);
308	if (dump_enabled_p ())
309	dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
310	(gimple *) phi);
311
312	gcc_assert (stmt_info);
313
314	if (STMT_VINFO_RELEVANT_P (stmt_info)
315	\|\| STMT_VINFO_LIVE_P (stmt_info))
316	{
317	gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
318	scalar_type = TREE_TYPE (PHI_RESULT (phi));
319
320	if (dump_enabled_p ())
321	dump_printf_loc (MSG_NOTE, vect_location,
322	"get vectype for scalar type: %T\n",
323	scalar_type);
324
325	vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
326	if (!vectype)
327	return opt_result::failure_at (loc: phi,
328	fmt: "not vectorized: unsupported "
329	"data-type %T\n",
330	scalar_type);
331	STMT_VINFO_VECTYPE (stmt_info) = vectype;
332
333	if (dump_enabled_p ())
334	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
335	vectype);
336
337	if (dump_enabled_p ())
338	{
339	dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
340	dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: vectype));
341	dump_printf (MSG_NOTE, "\n");
342	}
343
344	vect_update_max_nunits (max_nunits: &vectorization_factor, vectype);
345	}
346	}
347
348	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
349	gsi_next (i: &si))
350	{
351	if (is_gimple_debug (gs: gsi_stmt (i: si)))
352	continue;
353	stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
354	opt_result res
355	= vect_determine_vf_for_stmt (vinfo: loop_vinfo,
356	stmt_info, vf: &vectorization_factor);
357	if (!res)
358	return res;
359	}
360	}
361
362	/ TODO: Analyze cost. Decide if worth while to vectorize. /
363	if (dump_enabled_p ())
364	{
365	dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
366	dump_dec (MSG_NOTE, vectorization_factor);
367	dump_printf (MSG_NOTE, "\n");
368	}
369
370	if (known_le (vectorization_factor, `1U`))
371	return opt_result::failure_at (loc: vect_location,
372	fmt: "not vectorized: unsupported data-type\n");
373	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
374	return opt_result::success ();
375	}
376
377
378	/ Function vect_is_simple_iv_evolution.*
379
380	FORNOW: A simple evolution of an induction variables in the loop is
381	considered a polynomial evolution. /*
382
383	static bool
384	vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
385	tree * step)
386	{
387	tree init_expr;
388	tree step_expr;
389	tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
390	basic_block bb;
391
392	/ When there is no evolution in this loop, the evolution function*
393	is not "simple". /*
394	if (evolution_part == NULL_TREE)
395	return false;
396
397	/ When the evolution is a polynomial of degree >= 2*
398	the evolution function is not "simple". /*
399	if (tree_is_chrec (expr: evolution_part))
400	return false;
401
402	step_expr = evolution_part;
403	init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
404
405	if (dump_enabled_p ())
406	dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
407	step_expr, init_expr);
408
409	*init = init_expr;
410	*step = step_expr;
411
412	if (TREE_CODE (step_expr) != INTEGER_CST
413	&& (TREE_CODE (step_expr) != SSA_NAME
414	\|\| ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
415	&& flow_bb_inside_loop_p (get_loop (cfun, num: loop_nb), bb))
416	\|\| (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
417	&& (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
418	\|\| !flag_associative_math)))
419	&& (TREE_CODE (step_expr) != REAL_CST
420	\|\| !flag_associative_math))
421	{
422	if (dump_enabled_p ())
423	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424	"step unknown.\n");
425	return false;
426	}
427
428	return true;
429	}
430
431	/ Function vect_is_nonlinear_iv_evolution*
432
433	Only support nonlinear induction for integer type
434	1. neg
435	2. mul by constant
436	3. lshift/rshift by constant.
437
438	For neg induction, return a fake step as integer -1. /*
439	static bool
440	vect_is_nonlinear_iv_evolution (class loop* loop, stmt_vec_info stmt_info,
441	gphi* loop_phi_node, tree init, tree step)
442	{
443	tree init_expr, ev_expr, result, op1, op2;
444	gimple* def;
445
446	if (gimple_phi_num_args (gs: loop_phi_node) != `2`)
447	return false;
448
449	init_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_preheader_edge (loop));
450	ev_expr = PHI_ARG_DEF_FROM_EDGE (loop_phi_node, loop_latch_edge (loop));
451
452	/ Support nonlinear induction only for integer type. /
453	if (!INTEGRAL_TYPE_P (TREE_TYPE (init_expr)))
454	return false;
455
456	*init = init_expr;
457	result = PHI_RESULT (loop_phi_node);
458
459	if (TREE_CODE (ev_expr) != SSA_NAME
460	\|\| ((def = SSA_NAME_DEF_STMT (ev_expr)), false)
461	\|\| !is_gimple_assign (gs: def))
462	return false;
463
464	enum tree_code t_code = gimple_assign_rhs_code (gs: def);
465	switch (t_code)
466	{
467	case NEGATE_EXPR:
468	if (gimple_assign_rhs1 (gs: def) != result)
469	return false;
470	*step = build_int_cst (TREE_TYPE (init_expr), -`1`);
471	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_neg;
472	break;
473
474	case RSHIFT_EXPR:
475	case LSHIFT_EXPR:
476	case MULT_EXPR:
477	op1 = gimple_assign_rhs1 (gs: def);
478	op2 = gimple_assign_rhs2 (gs: def);
479	if (TREE_CODE (op2) != INTEGER_CST
480	\|\| op1 != result)
481	return false;
482	*step = op2;
483	if (t_code == LSHIFT_EXPR)
484	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shl;
485	else if (t_code == RSHIFT_EXPR)
486	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_shr;
487	/ NEGATE_EXPR and MULT_EXPR are both vect_step_op_mul. /
488	else
489	STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info) = vect_step_op_mul;
490	break;
491
492	default:
493	return false;
494	}
495
496	STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_info) = *init;
497	STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info) = *step;
498
499	return true;
500	}
501
502	/ Return true if PHI, described by STMT_INFO, is the inner PHI in*
503	what we are assuming is a double reduction. For example, given
504	a structure like this:
505
506	outer1:
507	x_1 = PHI <x_4(outer2), ...>;
508	...
509
510	inner:
511	x_2 = PHI <x_1(outer1), ...>;
512	...
513	x_3 = ...;
514	...
515
516	outer2:
517	x_4 = PHI <x_3(inner)>;
518	...
519
520	outer loop analysis would treat x_1 as a double reduction phi and
521	this function would then return true for x_2. /*
522
523	static bool
524	vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
525	{
526	use_operand_p use_p;
527	ssa_op_iter op_iter;
528	FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)
529	if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)))
530	if (STMT_VINFO_DEF_TYPE (def_info) == vect_double_reduction_def)
531	return true;
532	return false;
533	}
534
535	/ Returns true if Phi is a first-order recurrence. A first-order*
536	recurrence is a non-reduction recurrence relation in which the value of
537	the recurrence in the current loop iteration equals a value defined in
538	the previous iteration. /*
539
540	static bool
541	vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
542	gphi *phi)
543	{
544	/ A nested cycle isn't vectorizable as first order recurrence. /
545	if (LOOP_VINFO_LOOP (loop_vinfo) != loop)
546	return false;
547
548	/ Ensure the loop latch definition is from within the loop. /
549	edge latch = loop_latch_edge (loop);
550	tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch);
551	if (TREE_CODE (ldef) != SSA_NAME
552	\|\| SSA_NAME_IS_DEFAULT_DEF (ldef)
553	\|\| is_a <gphi *> (SSA_NAME_DEF_STMT (ldef))
554	\|\| !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef))))
555	return false;
556
557	tree def = gimple_phi_result (gs: phi);
558
559	/ Ensure every use_stmt of the phi node is dominated by the latch*
560	definition. /*
561	imm_use_iterator imm_iter;
562	use_operand_p use_p;
563	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
564	if (!is_gimple_debug (USE_STMT (use_p))
565	&& (SSA_NAME_DEF_STMT (ldef) == USE_STMT (use_p)
566	\|\| !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef),
567	USE_STMT (use_p))))
568	return false;
569
570	/ First-order recurrence autovectorization needs shuffle vector. /
571	tree scalar_type = TREE_TYPE (def);
572	tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
573	if (!vectype)
574	return false;
575
576	return true;
577	}
578
579	/ Function vect_analyze_scalar_cycles_1.*
580
581	Examine the cross iteration def-use cycles of scalar variables
582	in LOOP. LOOP_VINFO represents the loop that is now being
583	considered for vectorization (can be LOOP, or an outer-loop
584	enclosing LOOP). SLP indicates there will be some subsequent
585	slp analyses or not. /*
586
587	static void
588	vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
589	bool slp)
590	{
591	basic_block bb = loop->header;
592	tree init, step;
593	auto_vec<stmt_vec_info, `64`> worklist;
594	gphi_iterator gsi;
595	bool double_reduc, reduc_chain;
596
597	DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles");
598
599	/ First - identify all inductions. Reduction detection assumes that all the*
600	inductions have been identified, therefore, this order must not be
601	changed. /*
602	for (gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
603	{
604	gphi *phi = gsi.phi ();
605	tree access_fn = NULL;
606	tree def = PHI_RESULT (phi);
607	stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
608
609	if (dump_enabled_p ())
610	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
611	(gimple *) phi);
612
613	/ Skip virtual phi's. The data dependences that are associated with*
614	virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. /*
615	if (virtual_operand_p (op: def))
616	continue;
617
618	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
619
620	/ Analyze the evolution function. /
621	access_fn = analyze_scalar_evolution (loop, def);
622	if (access_fn)
623	{
624	STRIP_NOPS (access_fn);
625	if (dump_enabled_p ())
626	dump_printf_loc (MSG_NOTE, vect_location,
627	"Access function of PHI: %T\n", access_fn);
628	STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
629	= initial_condition_in_loop_num (access_fn, loop->num);
630	STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
631	= evolution_part_in_loop_num (access_fn, loop->num);
632	}
633
634	if ((!access_fn
635	\|\| vect_inner_phi_in_double_reduction_p (loop_vinfo, phi)
636	\|\| !vect_is_simple_iv_evolution (loop_nb: loop->num, access_fn,
637	init: &init, step: &step)
638	\|\| (LOOP_VINFO_LOOP (loop_vinfo) != loop
639	&& TREE_CODE (step) != INTEGER_CST))
640	/ Only handle nonlinear iv for same loop. /
641	&& (LOOP_VINFO_LOOP (loop_vinfo) != loop
642	\|\| !vect_is_nonlinear_iv_evolution (loop, stmt_info: stmt_vinfo,
643	loop_phi_node: phi, init: &init, step: &step)))
644	{
645	worklist.safe_push (obj: stmt_vinfo);
646	continue;
647	}
648
649	gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
650	!= NULL_TREE);
651	gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
652
653	if (dump_enabled_p ())
654	dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
655	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
656	}
657
658
659	/ Second - identify all reductions and nested cycles. /
660	while (worklist.length () > `0`)
661	{
662	stmt_vec_info stmt_vinfo = worklist.pop ();
663	gphi phi = as_a <gphi > (p: stmt_vinfo->stmt);
664	tree def = PHI_RESULT (phi);
665
666	if (dump_enabled_p ())
667	dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
668	(gimple *) phi);
669
670	gcc_assert (!virtual_operand_p (def)
671	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
672
673	stmt_vec_info reduc_stmt_info
674	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
675	&reduc_chain, slp);
676	if (reduc_stmt_info)
677	{
678	STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
679	STMT_VINFO_REDUC_DEF (reduc_stmt_info) = stmt_vinfo;
680	if (double_reduc)
681	{
682	if (dump_enabled_p ())
683	dump_printf_loc (MSG_NOTE, vect_location,
684	"Detected double reduction.\n");
685
686	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
687	STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
688	}
689	else
690	{
691	if (loop != LOOP_VINFO_LOOP (loop_vinfo))
692	{
693	if (dump_enabled_p ())
694	dump_printf_loc (MSG_NOTE, vect_location,
695	"Detected vectorizable nested cycle.\n");
696
697	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
698	}
699	else
700	{
701	if (dump_enabled_p ())
702	dump_printf_loc (MSG_NOTE, vect_location,
703	"Detected reduction.\n");
704
705	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
706	STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def;
707	/ Store the reduction cycles for possible vectorization in*
708	loop-aware SLP if it was not detected as reduction
709	chain. /*
710	if (! reduc_chain)
711	LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
712	(obj: reduc_stmt_info);
713	}
714	}
715	}
716	else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
717	STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence;
718	else
719	if (dump_enabled_p ())
720	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721	"Unknown def-use cycle pattern.\n");
722	}
723	}
724
725
726	/ Function vect_analyze_scalar_cycles.*
727
728	Examine the cross iteration def-use cycles of scalar variables, by
729	analyzing the loop-header PHIs of scalar variables. Classify each
730	cycle as one of the following: invariant, induction, reduction, unknown.
731	We do that for the loop represented by LOOP_VINFO, and also to its
732	inner-loop, if exists.
733	Examples for scalar cycles:
734
735	Example1: reduction:
736
737	loop1:
738	for (i=0; i<N; i++)
739	sum += a[i];
740
741	Example2: induction:
742
743	loop2:
744	for (i=0; i<N; i++)
745	a[i] = i; /*
746
747	static void
748	vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
749	{
750	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
751
752	vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
753
754	/ When vectorizing an outer-loop, the inner-loop is executed sequentially.*
755	Reductions in such inner-loop therefore have different properties than
756	the reductions in the nest that gets vectorized:
757	1. When vectorized, they are executed in the same order as in the original
758	scalar loop, so we can't change the order of computation when
759	vectorizing them.
760	2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
761	current checks are too strict. /*
762
763	if (loop->inner)
764	vect_analyze_scalar_cycles_1 (loop_vinfo, loop: loop->inner, slp);
765	}
766
767	/ Transfer group and reduction information from STMT_INFO to its*
768	pattern stmt. /*
769
770	static void
771	vect_fixup_reduc_chain (stmt_vec_info stmt_info)
772	{
773	stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info);
774	stmt_vec_info stmtp;
775	gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)
776	&& REDUC_GROUP_FIRST_ELEMENT (stmt_info));
777	REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info);
778	do
779	{
780	stmtp = STMT_VINFO_RELATED_STMT (stmt_info);
781	gcc_checking_assert (STMT_VINFO_DEF_TYPE (stmtp)
782	== STMT_VINFO_DEF_TYPE (stmt_info));
783	REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp;
784	stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info);
785	if (stmt_info)
786	REDUC_GROUP_NEXT_ELEMENT (stmtp)
787	= STMT_VINFO_RELATED_STMT (stmt_info);
788	}
789	while (stmt_info);
790	}
791
792	/ Fixup scalar cycles that now have their stmts detected as patterns. /
793
794	static void
795	vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
796	{
797	stmt_vec_info first;
798	unsigned i;
799
800	FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
801	{
802	stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
803	while (next)
804	{
805	if ((STMT_VINFO_IN_PATTERN_P (next)
806	!= STMT_VINFO_IN_PATTERN_P (first))
807	\|\| STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (next)) == -`1`)
808	break;
809	next = REDUC_GROUP_NEXT_ELEMENT (next);
810	}
811	/ If all reduction chain members are well-formed patterns adjust*
812	the group to group the pattern stmts instead. /*
813	if (! next
814	&& STMT_VINFO_REDUC_IDX (vect_stmt_to_vectorize (first)) != -`1`)
815	{
816	if (STMT_VINFO_IN_PATTERN_P (first))
817	{
818	vect_fixup_reduc_chain (stmt_info: first);
819	LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
820	= STMT_VINFO_RELATED_STMT (first);
821	}
822	}
823	/ If not all stmt in the chain are patterns or if we failed*
824	to update STMT_VINFO_REDUC_IDX dissolve the chain and handle
825	it as regular reduction instead. /*
826	else
827	{
828	stmt_vec_info vinfo = first;
829	stmt_vec_info last = NULL;
830	while (vinfo)
831	{
832	next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
833	REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
834	REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
835	last = vinfo;
836	vinfo = next;
837	}
838	STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (first))
839	= vect_internal_def;
840	loop_vinfo->reductions.safe_push (obj: vect_stmt_to_vectorize (stmt_info: last));
841	LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).unordered_remove (ix: i);
842	--i;
843	}
844	}
845	}
846
847	/ Function vect_get_loop_niters.*
848
849	Determine how many iterations the loop is executed and place it
850	in NUMBER_OF_ITERATIONS. Place the number of latch iterations
851	in NUMBER_OF_ITERATIONSM1. Place the condition under which the
852	niter information holds in ASSUMPTIONS.
853
854	Return the loop exit conditions. /*
855
856
857	static vec<gcond *>
858	vect_get_loop_niters (class loop loop, const_edge main_exit, tree assumptions,
859	tree number_of_iterations, tree number_of_iterationsm1)
860	{
861	auto_vec<edge> exits = get_loop_exit_edges (loop);
862	vec<gcond *> conds;
863	conds.create (nelems: exits.length ());
864	class tree_niter_desc niter_desc;
865	tree niter_assumptions, niter, may_be_zero;
866
867	*assumptions = boolean_true_node;
868	*number_of_iterationsm1 = chrec_dont_know;
869	*number_of_iterations = chrec_dont_know;
870
871	DUMP_VECT_SCOPE ("get_loop_niters");
872
873	if (exits.is_empty ())
874	return conds;
875
876	if (dump_enabled_p ())
877	dump_printf_loc (MSG_NOTE, vect_location, "Loop has %d exits.\n",
878	exits.length ());
879
880	edge exit;
881	unsigned int i;
882	FOR_EACH_VEC_ELT (exits, i, exit)
883	{
884	gcond *cond = get_loop_exit_condition (exit);
885	if (cond)
886	conds.safe_push (obj: cond);
887
888	if (dump_enabled_p ())
889	dump_printf_loc (MSG_NOTE, vect_location, "Analyzing exit %d...\n", i);
890
891	if (exit != main_exit)
892	continue;
893
894	may_be_zero = NULL_TREE;
895	if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
896	\|\| chrec_contains_undetermined (niter_desc.niter))
897	continue;
898
899	niter_assumptions = niter_desc.assumptions;
900	may_be_zero = niter_desc.may_be_zero;
901	niter = niter_desc.niter;
902
903	if (may_be_zero && integer_zerop (may_be_zero))
904	may_be_zero = NULL_TREE;
905
906	if (may_be_zero)
907	{
908	if (COMPARISON_CLASS_P (may_be_zero))
909	{
910	/ Try to combine may_be_zero with assumptions, this can simplify*
911	computation of niter expression. /*
912	if (niter_assumptions && !integer_nonzerop (niter_assumptions))
913	niter_assumptions = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
914	niter_assumptions,
915	fold_build1 (TRUTH_NOT_EXPR,
916	boolean_type_node,
917	may_be_zero));
918	else
919	niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
920	build_int_cst (TREE_TYPE (niter), `0`),
921	rewrite_to_non_trapping_overflow (niter));
922
923	may_be_zero = NULL_TREE;
924	}
925	else if (integer_nonzerop (may_be_zero))
926	{
927	*number_of_iterationsm1 = build_int_cst (TREE_TYPE (niter), `0`);
928	*number_of_iterations = build_int_cst (TREE_TYPE (niter), `1`);
929	continue;
930	}
931	else
932	continue;
933	}
934
935	/ Loop assumptions are based off the normal exit. /
936	*assumptions = niter_assumptions;
937	*number_of_iterationsm1 = niter;
938
939	/ We want the number of loop header executions which is the number*
940	of latch executions plus one.
941	??? For UINT_MAX latch executions this number overflows to zero
942	for loops like do { n++; } while (n != 0); /*
943	if (niter && !chrec_contains_undetermined (niter))
944	{
945	niter = fold_build2 (PLUS_EXPR, TREE_TYPE (niter),
946	unshare_expr (niter),
947	build_int_cst (TREE_TYPE (niter), `1`));
948	if (TREE_CODE (niter) == INTEGER_CST
949	&& TREE_CODE (*number_of_iterationsm1) != INTEGER_CST)
950	{
951	/ If we manage to fold niter + 1 into INTEGER_CST even when*
952	niter is some complex expression, ensure back
953	*number_of_iterationsm1 is an INTEGER_CST as well. See
954	PR113210. /*
955	*number_of_iterationsm1
956	= fold_build2 (PLUS_EXPR, TREE_TYPE (niter), niter,
957	build_minus_one_cst (TREE_TYPE (niter)));
958	}
959	}
960	*number_of_iterations = niter;
961	}
962
963	if (dump_enabled_p ())
964	dump_printf_loc (MSG_NOTE, vect_location, "All loop exits successfully analyzed.\n");
965
966	return conds;
967	}
968
969	/ Determine the main loop exit for the vectorizer. /
970
971	edge
972	vec_init_loop_exit_info (class loop *loop)
973	{
974	/ Before we begin we must first determine which exit is the main one and*
975	which are auxilary exits. /*
976	auto_vec<edge> exits = get_loop_exit_edges (loop);
977	if (exits.length () == `1`)
978	return exits [`0`];
979
980	/ If we have multiple exits we only support counting IV at the moment.*
981	Analyze all exits and return the last one we can analyze. /*
982	class tree_niter_desc niter_desc;
983	edge candidate = NULL;
984	for (edge exit : exits)
985	{
986	if (!get_loop_exit_condition (exit))
987	continue;
988
989	if (number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
990	&& !chrec_contains_undetermined (niter_desc.niter))
991	{
992	tree may_be_zero = niter_desc.may_be_zero;
993	if ((integer_zerop (may_be_zero)
994	/ As we are handling may_be_zero that's not false by*
995	rewriting niter to may_be_zero ? 0 : niter we require
996	an empty latch. /*
997	\|\| (single_pred_p (bb: loop->latch)
998	&& exit->src == single_pred (bb: loop->latch)
999	&& (integer_nonzerop (may_be_zero)
1000	\|\| COMPARISON_CLASS_P (may_be_zero))))
1001	&& (!candidate
1002	\|\| dominated_by_p (CDI_DOMINATORS, exit->src,
1003	candidate->src)))
1004	candidate = exit;
1005	}
1006	}
1007
1008	return candidate;
1009	}
1010
1011	/ Function bb_in_loop_p*
1012
1013	Used as predicate for dfs order traversal of the loop bbs. /*
1014
1015	static bool
1016	bb_in_loop_p (const_basic_block bb, const void *data)
1017	{
1018	const class loop *const loop = (const class loop *)data;
1019	if (flow_bb_inside_loop_p (loop, bb))
1020	return true;
1021	return false;
1022	}
1023
1024
1025	/ Create and initialize a new loop_vec_info struct for LOOP_IN, as well as*
1026	stmt_vec_info structs for all the stmts in LOOP_IN. /*
1027
1028	_loop_vec_info::_loop_vec_info (class loop loop_in, vec_info_shared shared)
1029	: vec_info (vec_info::loop, shared),
1030	loop (loop_in),
1031	bbs (XCNEWVEC (basic_block, loop->num_nodes)),
1032	num_itersm1 (NULL_TREE),
1033	num_iters (NULL_TREE),
1034	num_iters_unchanged (NULL_TREE),
1035	num_iters_assumptions (NULL_TREE),
1036	vector_costs (nullptr),
1037	scalar_costs (nullptr),
1038	th (`0`),
1039	versioning_threshold (`0`),
1040	vectorization_factor (`0`),
1041	main_loop_edge (nullptr),
1042	skip_main_loop_edge (nullptr),
1043	skip_this_loop_edge (nullptr),
1044	reusable_accumulators (),
1045	suggested_unroll_factor (`1`),
1046	max_vectorization_factor (`0`),
1047	mask_skip_niters (NULL_TREE),
1048	rgroup_compare_type (NULL_TREE),
1049	simd_if_cond (NULL_TREE),
1050	partial_vector_style (vect_partial_vectors_none),
1051	unaligned_dr (NULL),
1052	peeling_for_alignment (`0`),
1053	ptr_mask (`0`),
1054	ivexpr_map (NULL),
1055	scan_map (NULL),
1056	slp_unrolling_factor (`1`),
1057	inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
1058	vectorizable (false),
1059	can_use_partial_vectors_p (param_vect_partial_vector_usage != `0`),
1060	using_partial_vectors_p (false),
1061	using_decrementing_iv_p (false),
1062	using_select_vl_p (false),
1063	epil_using_partial_vectors_p (false),
1064	partial_load_store_bias (`0`),
1065	peeling_for_gaps (false),
1066	peeling_for_niter (false),
1067	early_breaks (false),
1068	no_data_dependencies (false),
1069	has_mask_store (false),
1070	scalar_loop_scaling (profile_probability::uninitialized ()),
1071	scalar_loop (NULL),
1072	orig_loop_info (NULL),
1073	vec_loop_iv_exit (NULL),
1074	vec_epilogue_loop_iv_exit (NULL),
1075	scalar_loop_iv_exit (NULL)
1076	{
1077	/ CHECKME: We want to visit all BBs before their successors (except for*
1078	latch blocks, for which this assertion wouldn't hold). In the simple
1079	case of the loop forms we allow, a dfs order of the BBs would the same
1080	as reversed postorder traversal, so we are safe. /*
1081
1082	unsigned int nbbs = dfs_enumerate_from (loop->header, `0`, bb_in_loop_p,
1083	bbs, loop->num_nodes, loop);
1084	gcc_assert (nbbs == loop->num_nodes);
1085
1086	for (unsigned int i = `0`; i < nbbs; i++)
1087	{
1088	basic_block bb = bbs[i];
1089	gimple_stmt_iterator si;
1090
1091	for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1092	{
1093	gimple *phi = gsi_stmt (i: si);
1094	gimple_set_uid (g: phi, uid: `0`);
1095	add_stmt (phi);
1096	}
1097
1098	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1099	{
1100	gimple *stmt = gsi_stmt (i: si);
1101	gimple_set_uid (g: stmt, uid: `0`);
1102	if (is_gimple_debug (gs: stmt))
1103	continue;
1104	add_stmt (stmt);
1105	/ If .GOMP_SIMD_LANE call for the current loop has 3 arguments, the*
1106	third argument is the #pragma omp simd if (x) condition, when 0,
1107	loop shouldn't be vectorized, when non-zero constant, it should
1108	be vectorized normally, otherwise versioned with vectorized loop
1109	done if the condition is non-zero at runtime. /*
1110	if (loop_in->simduid
1111	&& is_gimple_call (gs: stmt)
1112	&& gimple_call_internal_p (gs: stmt)
1113	&& gimple_call_internal_fn (gs: stmt) == IFN_GOMP_SIMD_LANE
1114	&& gimple_call_num_args (gs: stmt) >= `3`
1115	&& TREE_CODE (gimple_call_arg (stmt, `0`)) == SSA_NAME
1116	&& (loop_in->simduid
1117	== SSA_NAME_VAR (gimple_call_arg (stmt, `0`))))
1118	{
1119	tree arg = gimple_call_arg (gs: stmt, index: `2`);
1120	if (integer_zerop (arg) \|\| TREE_CODE (arg) == SSA_NAME)
1121	simd_if_cond = arg;
1122	else
1123	gcc_assert (integer_nonzerop (arg));
1124	}
1125	}
1126	}
1127
1128	epilogue_vinfos.create (nelems: `6`);
1129	}
1130
1131	/ Free all levels of rgroup CONTROLS. /
1132
1133	void
1134	release_vec_loop_controls (vec<rgroup_controls> *controls)
1135	{
1136	rgroup_controls *rgc;
1137	unsigned int i;
1138	FOR_EACH_VEC_ELT (*controls, i, rgc)
1139	rgc->controls.release ();
1140	controls->release ();
1141	}
1142
1143	/ Free all memory used by the _loop_vec_info, as well as all the*
1144	stmt_vec_info structs of all the stmts in the loop. /*
1145
1146	_loop_vec_info::~_loop_vec_info ()
1147	{
1148	free (ptr: bbs);
1149
1150	release_vec_loop_controls (controls: &masks.rgc_vec);
1151	release_vec_loop_controls (controls: &lens);
1152	delete ivexpr_map;
1153	delete scan_map;
1154	epilogue_vinfos.release ();
1155	delete scalar_costs;
1156	delete vector_costs;
1157
1158	/ When we release an epiloge vinfo that we do not intend to use*
1159	avoid clearing AUX of the main loop which should continue to
1160	point to the main loop vinfo since otherwise we'll leak that. /*
1161	if (loop->aux == this)
1162	loop->aux = NULL;
1163	}
1164
1165	/ Return an invariant or register for EXPR and emit necessary*
1166	computations in the LOOP_VINFO loop preheader. /*
1167
1168	tree
1169	cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
1170	{
1171	if (is_gimple_reg (expr)
1172	\|\| is_gimple_min_invariant (expr))
1173	return expr;
1174
1175	if (! loop_vinfo->ivexpr_map)
1176	loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
1177	tree &cached = loop_vinfo->ivexpr_map->get_or_insert (k: expr);
1178	if (! cached)
1179	{
1180	gimple_seq stmts = NULL;
1181	cached = force_gimple_operand (unshare_expr (expr),
1182	&stmts, true, NULL_TREE);
1183	if (stmts)
1184	{
1185	edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1186	gsi_insert_seq_on_edge_immediate (e, stmts);
1187	}
1188	}
1189	return cached;
1190	}
1191
1192	/ Return true if we can use CMP_TYPE as the comparison type to produce*
1193	all masks required to mask LOOP_VINFO. /*
1194
1195	static bool
1196	can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
1197	{
1198	rgroup_controls *rgm;
1199	unsigned int i;
1200	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
1201	if (rgm->type != NULL_TREE
1202	&& !direct_internal_fn_supported_p (fn: IFN_WHILE_ULT,
1203	type0: cmp_type, type1: rgm->type,
1204	opt_type: OPTIMIZE_FOR_SPEED))
1205	return false;
1206	return true;
1207	}
1208
1209	/ Calculate the maximum number of scalars per iteration for every*
1210	rgroup in LOOP_VINFO. /*
1211
1212	static unsigned int
1213	vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
1214	{
1215	unsigned int res = `1`;
1216	unsigned int i;
1217	rgroup_controls *rgm;
1218	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec, i, rgm)
1219	res = MAX (res, rgm->max_nscalars_per_iter);
1220	return res;
1221	}
1222
1223	/ Calculate the minimum precision necessary to represent:*
1224
1225	MAX_NITERS FACTOR*
1226
1227	as an unsigned integer, where MAX_NITERS is the maximum number of
1228	loop header iterations for the original scalar form of LOOP_VINFO. /*
1229
1230	static unsigned
1231	vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
1232	{
1233	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1234
1235	/ Get the maximum number of iterations that is representable*
1236	in the counter type. /*
1237	tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
1238	widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + `1`;
1239
1240	/ Get a more refined estimate for the number of iterations. /
1241	widest_int max_back_edges;
1242	if (max_loop_iterations (loop, &max_back_edges))
1243	max_ni = wi::smin (x: max_ni, y: max_back_edges + `1`);
1244
1245	/ Work out how many bits we need to represent the limit. /
1246	return wi::min_precision (x: max_ni * factor, sgn: UNSIGNED);
1247	}
1248
1249	/ True if the loop needs peeling or partial vectors when vectorized. /
1250
1251	static bool
1252	vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo)
1253	{
1254	unsigned HOST_WIDE_INT const_vf;
1255	HOST_WIDE_INT max_niter
1256	= likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1257
1258	unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
1259	if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
1260	th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
1261	(loop_vinfo));
1262
1263	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1264	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= `0`)
1265	{
1266	/ Work out the (constant) number of iterations that need to be*
1267	peeled for reasons other than niters. /*
1268	unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1269	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1270	peel_niter += `1`;
1271	if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
1272	LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1273	return true;
1274	}
1275	else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1276	/ ??? When peeling for gaps but not alignment, we could*
1277	try to check whether the (variable) niters is known to be
1278	VF N + 1. That's something of a niche case though. /
1279	\|\| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1280	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &const_vf)
1281	\|\| ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
1282	< (unsigned) exact_log2 (x: const_vf))
1283	/ In case of versioning, check if the maximum number of*
1284	iterations is greater than th. If they are identical,
1285	the epilogue is unnecessary. /*
1286	&& (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
1287	\|\| ((unsigned HOST_WIDE_INT) max_niter
1288	/ We'd like to use LOOP_VINFO_VERSIONING_THRESHOLD*
1289	but that's only computed later based on our result.
1290	The following is the most conservative approximation. /*
1291	> (std::max (a: (unsigned HOST_WIDE_INT) th,
1292	b: const_vf) / const_vf) * const_vf))))
1293	return true;
1294
1295	return false;
1296	}
1297
1298	/ Each statement in LOOP_VINFO can be masked where necessary. Check*
1299	whether we can actually generate the masks required. Return true if so,
1300	storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. /*
1301
1302	static bool
1303	vect_verify_full_masking (loop_vec_info loop_vinfo)
1304	{
1305	unsigned int min_ni_width;
1306
1307	/ Use a normal loop if there are no statements that need masking.*
1308	This only happens in rare degenerate cases: it means that the loop
1309	has no loads, no stores, and no live-out values. /*
1310	if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
1311	return false;
1312
1313	/ Produce the rgroup controls. /
1314	for (auto mask : LOOP_VINFO_MASKS (loop_vinfo).mask_set)
1315	{
1316	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1317	tree vectype = mask.first;
1318	unsigned nvectors = mask.second;
1319
1320	if (masks->rgc_vec.length () < nvectors)
1321	masks->rgc_vec.safe_grow_cleared (len: nvectors, exact: true);
1322	rgroup_controls rgm = &(masks).rgc_vec [nvectors - `1`];
1323	/ The number of scalars per iteration and the number of vectors are*
1324	both compile-time constants. /*
1325	unsigned int nscalars_per_iter
1326	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
1327	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
1328
1329	if (rgm->max_nscalars_per_iter < nscalars_per_iter)
1330	{
1331	rgm->max_nscalars_per_iter = nscalars_per_iter;
1332	rgm->type = truth_type_for (vectype);
1333	rgm->factor = `1`;
1334	}
1335	}
1336
1337	unsigned int max_nscalars_per_iter
1338	= vect_get_max_nscalars_per_iter (loop_vinfo);
1339
1340	/ Work out how many bits we need to represent the limit. /
1341	min_ni_width
1342	= vect_min_prec_for_max_niters (loop_vinfo, factor: max_nscalars_per_iter);
1343
1344	/ Find a scalar mode for which WHILE_ULT is supported. /
1345	opt_scalar_int_mode cmp_mode_iter;
1346	tree cmp_type = NULL_TREE;
1347	tree iv_type = NULL_TREE;
1348	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1349	unsigned int iv_precision = UINT_MAX;
1350
1351	if (iv_limit != -`1`)
1352	iv_precision = wi::min_precision (x: iv_limit * max_nscalars_per_iter,
1353	sgn: UNSIGNED);
1354
1355	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1356	{
1357	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1358	if (cmp_bits >= min_ni_width
1359	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1360	{
1361	tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1362	if (this_type
1363	&& can_produce_all_loop_masks_p (loop_vinfo, cmp_type: this_type))
1364	{
1365	/ Although we could stop as soon as we find a valid mode,*
1366	there are at least two reasons why that's not always the
1367	best choice:
1368
1369	- An IV that's Pmode or wider is more likely to be reusable
1370	in address calculations than an IV that's narrower than
1371	Pmode.
1372
1373	- Doing the comparison in IV_PRECISION or wider allows
1374	a natural 0-based IV, whereas using a narrower comparison
1375	type requires mitigations against wrap-around.
1376
1377	Conversely, if the IV limit is variable, doing the comparison
1378	in a wider type than the original type can introduce
1379	unnecessary extensions, so picking the widest valid mode
1380	is not always a good choice either.
1381
1382	Here we prefer the first IV type that's Pmode or wider,
1383	and the first comparison type that's IV_PRECISION or wider.
1384	(The comparison type must be no wider than the IV type,
1385	to avoid extensions in the vector loop.)
1386
1387	??? We might want to try continuing beyond Pmode for ILP32
1388	targets if CMP_BITS < IV_PRECISION. /*
1389	iv_type = this_type;
1390	if (!cmp_type \|\| iv_precision > TYPE_PRECISION (cmp_type))
1391	cmp_type = this_type;
1392	if (cmp_bits >= GET_MODE_BITSIZE (Pmode))
1393	break;
1394	}
1395	}
1396	}
1397
1398	if (!cmp_type)
1399	{
1400	LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.release ();
1401	return false;
1402	}
1403
1404	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
1405	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1406	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_while_ult;
1407	return true;
1408	}
1409
1410	/ Each statement in LOOP_VINFO can be masked where necessary. Check*
1411	whether we can actually generate AVX512 style masks. Return true if so,
1412	storing the type of the scalar IV in LOOP_VINFO_RGROUP_IV_TYPE. /*
1413
1414	static bool
1415	vect_verify_full_masking_avx512 (loop_vec_info loop_vinfo)
1416	{
1417	/ Produce differently organized rgc_vec and differently check*
1418	we can produce masks. /*
1419
1420	/ Use a normal loop if there are no statements that need masking.*
1421	This only happens in rare degenerate cases: it means that the loop
1422	has no loads, no stores, and no live-out values. /*
1423	if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
1424	return false;
1425
1426	/ For the decrementing IV we need to represent all values in*
1427	[0, niter + niter_skip] where niter_skip is the elements we
1428	skip in the first iteration for prologue peeling. /*
1429	tree iv_type = NULL_TREE;
1430	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
1431	unsigned int iv_precision = UINT_MAX;
1432	if (iv_limit != -`1`)
1433	iv_precision = wi::min_precision (x: iv_limit, sgn: UNSIGNED);
1434
1435	/ First compute the type for the IV we use to track the remaining*
1436	scalar iterations. /*
1437	opt_scalar_int_mode cmp_mode_iter;
1438	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1439	{
1440	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1441	if (cmp_bits >= iv_precision
1442	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1443	{
1444	iv_type = build_nonstandard_integer_type (cmp_bits, true);
1445	if (iv_type)
1446	break;
1447	}
1448	}
1449	if (!iv_type)
1450	return false;
1451
1452	/ Produce the rgroup controls. /
1453	for (auto const &mask : LOOP_VINFO_MASKS (loop_vinfo).mask_set)
1454	{
1455	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1456	tree vectype = mask.first;
1457	unsigned nvectors = mask.second;
1458
1459	/ The number of scalars per iteration and the number of vectors are*
1460	both compile-time constants. /*
1461	unsigned int nscalars_per_iter
1462	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
1463	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
1464
1465	/ We index the rgroup_controls vector with nscalars_per_iter*
1466	which we keep constant and instead have a varying nvectors,
1467	remembering the vector mask with the fewest nV. /*
1468	if (masks->rgc_vec.length () < nscalars_per_iter)
1469	masks->rgc_vec.safe_grow_cleared (len: nscalars_per_iter, exact: true);
1470	rgroup_controls rgm = &(masks).rgc_vec [nscalars_per_iter - `1`];
1471
1472	if (!rgm->type \|\| rgm->factor > nvectors)
1473	{
1474	rgm->type = truth_type_for (vectype);
1475	rgm->compare_type = NULL_TREE;
1476	rgm->max_nscalars_per_iter = nscalars_per_iter;
1477	rgm->factor = nvectors;
1478	rgm->bias_adjusted_ctrl = NULL_TREE;
1479	}
1480	}
1481
1482	/ There is no fixed compare type we are going to use but we have to*
1483	be able to get at one for each mask group. /*
1484	unsigned int min_ni_width
1485	= wi::min_precision (x: vect_max_vf (loop_vinfo), sgn: UNSIGNED);
1486
1487	bool ok = true;
1488	for (auto &rgc : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
1489	{
1490	tree mask_type = rgc.type;
1491	if (!mask_type)
1492	continue;
1493
1494	/ For now vect_get_loop_mask only supports integer mode masks*
1495	when we need to split it. /*
1496	if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_INT
1497	\|\| TYPE_PRECISION (TREE_TYPE (mask_type)) != `1`)
1498	{
1499	ok = false;
1500	break;
1501	}
1502
1503	/ If iv_type is usable as compare type use that - we can elide the*
1504	saturation in that case. /*
1505	if (TYPE_PRECISION (iv_type) >= min_ni_width)
1506	{
1507	tree cmp_vectype
1508	= build_vector_type (iv_type, TYPE_VECTOR_SUBPARTS (node: mask_type));
1509	if (expand_vec_cmp_expr_p (cmp_vectype, mask_type, LT_EXPR))
1510	rgc.compare_type = cmp_vectype;
1511	}
1512	if (!rgc.compare_type)
1513	FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1514	{
1515	unsigned int cmp_bits = GET_MODE_BITSIZE (mode: cmp_mode_iter.require ());
1516	if (cmp_bits >= min_ni_width
1517	&& targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1518	{
1519	tree cmp_type = build_nonstandard_integer_type (cmp_bits, true);
1520	if (!cmp_type)
1521	continue;
1522
1523	/ Check whether we can produce the mask with cmp_type. /
1524	tree cmp_vectype
1525	= build_vector_type (cmp_type, TYPE_VECTOR_SUBPARTS (node: mask_type));
1526	if (expand_vec_cmp_expr_p (cmp_vectype, mask_type, LT_EXPR))
1527	{
1528	rgc.compare_type = cmp_vectype;
1529	break;
1530	}
1531	}
1532	}
1533	if (!rgc.compare_type)
1534	{
1535	ok = false;
1536	break;
1537	}
1538	}
1539	if (!ok)
1540	{
1541	release_vec_loop_controls (controls: &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec);
1542	return false;
1543	}
1544
1545	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = error_mark_node;
1546	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1547	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_avx512;
1548	return true;
1549	}
1550
1551	/ Check whether we can use vector access with length based on precison*
1552	comparison. So far, to keep it simple, we only allow the case that the
1553	precision of the target supported length is larger than the precision
1554	required by loop niters. /*
1555
1556	static bool
1557	vect_verify_loop_lens (loop_vec_info loop_vinfo)
1558	{
1559	if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
1560	return false;
1561
1562	machine_mode len_load_mode, len_store_mode;
1563	if (!get_len_load_store_mode (loop_vinfo->vector_mode, true)
1564	.exists (mode: &len_load_mode))
1565	return false;
1566	if (!get_len_load_store_mode (loop_vinfo->vector_mode, false)
1567	.exists (mode: &len_store_mode))
1568	return false;
1569
1570	signed char partial_load_bias = internal_len_load_store_bias
1571	(ifn: IFN_LEN_LOAD, len_load_mode);
1572
1573	signed char partial_store_bias = internal_len_load_store_bias
1574	(ifn: IFN_LEN_STORE, len_store_mode);
1575
1576	gcc_assert (partial_load_bias == partial_store_bias);
1577
1578	if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
1579	return false;
1580
1581	/ If the backend requires a bias of -1 for LEN_LOAD, we must not emit*
1582	len_loads with a length of zero. In order to avoid that we prohibit
1583	more than one loop length here. /*
1584	if (partial_load_bias == -`1`
1585	&& LOOP_VINFO_LENS (loop_vinfo).length () > `1`)
1586	return false;
1587
1588	LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
1589
1590	unsigned int max_nitems_per_iter = `1`;
1591	unsigned int i;
1592	rgroup_controls *rgl;
1593	/ Find the maximum number of items per iteration for every rgroup. /
1594	FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), i, rgl)
1595	{
1596	unsigned nitems_per_iter = rgl->max_nscalars_per_iter * rgl->factor;
1597	max_nitems_per_iter = MAX (max_nitems_per_iter, nitems_per_iter);
1598	}
1599
1600	/ Work out how many bits we need to represent the length limit. /
1601	unsigned int min_ni_prec
1602	= vect_min_prec_for_max_niters (loop_vinfo, factor: max_nitems_per_iter);
1603
1604	/ Now use the maximum of below precisions for one suitable IV type:*
1605	- the IV's natural precision
1606	- the precision needed to hold: the maximum number of scalar
1607	iterations multiplied by the scale factor (min_ni_prec above)
1608	- the Pmode precision
1609
1610	If min_ni_prec is less than the precision of the current niters,
1611	we perfer to still use the niters type. Prefer to use Pmode and
1612	wider IV to avoid narrow conversions. /*
1613
1614	unsigned int ni_prec
1615	= TYPE_PRECISION (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)));
1616	min_ni_prec = MAX (min_ni_prec, ni_prec);
1617	min_ni_prec = MAX (min_ni_prec, GET_MODE_BITSIZE (Pmode));
1618
1619	tree iv_type = NULL_TREE;
1620	opt_scalar_int_mode tmode_iter;
1621	FOR_EACH_MODE_IN_CLASS (tmode_iter, MODE_INT)
1622	{
1623	scalar_mode tmode = tmode_iter.require ();
1624	unsigned int tbits = GET_MODE_BITSIZE (mode: tmode);
1625
1626	/ ??? Do we really want to construct one IV whose precision exceeds*
1627	BITS_PER_WORD? /*
1628	if (tbits > BITS_PER_WORD)
1629	break;
1630
1631	/ Find the first available standard integral type. /
1632	if (tbits >= min_ni_prec && targetm.scalar_mode_supported_p (tmode))
1633	{
1634	iv_type = build_nonstandard_integer_type (tbits, true);
1635	break;
1636	}
1637	}
1638
1639	if (!iv_type)
1640	{
1641	if (dump_enabled_p ())
1642	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1643	"can't vectorize with length-based partial vectors"
1644	" because there is no suitable iv type.\n");
1645	return false;
1646	}
1647
1648	LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = iv_type;
1649	LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
1650	LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) = vect_partial_vectors_len;
1651
1652	return true;
1653	}
1654
1655	/ Calculate the cost of one scalar iteration of the loop. /
1656	static void
1657	vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1658	{
1659	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1660	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1661	int nbbs = loop->num_nodes, factor;
1662	int innerloop_iters, i;
1663
1664	DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost");
1665
1666	/ Gather costs for statements in the scalar loop. /
1667
1668	/ FORNOW. /
1669	innerloop_iters = `1`;
1670	if (loop->inner)
1671	innerloop_iters = LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo);
1672
1673	for (i = `0`; i < nbbs; i++)
1674	{
1675	gimple_stmt_iterator si;
1676	basic_block bb = bbs[i];
1677
1678	if (bb->loop_father == loop->inner)
1679	factor = innerloop_iters;
1680	else
1681	factor = `1`;
1682
1683	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
1684	{
1685	gimple *stmt = gsi_stmt (i: si);
1686	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1687
1688	if (!is_gimple_assign (gs: stmt) && !is_gimple_call (gs: stmt))
1689	continue;
1690
1691	/ Skip stmts that are not vectorized inside the loop. /
1692	stmt_vec_info vstmt_info = vect_stmt_to_vectorize (stmt_info);
1693	if (!STMT_VINFO_RELEVANT_P (vstmt_info)
1694	&& (!STMT_VINFO_LIVE_P (vstmt_info)
1695	\|\| !VECTORIZABLE_CYCLE_DEF
1696	(STMT_VINFO_DEF_TYPE (vstmt_info))))
1697	continue;
1698
1699	vect_cost_for_stmt kind;
1700	if (STMT_VINFO_DATA_REF (stmt_info))
1701	{
1702	if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1703	kind = scalar_load;
1704	else
1705	kind = scalar_store;
1706	}
1707	else if (vect_nop_conversion_p (stmt_info))
1708	continue;
1709	else
1710	kind = scalar_stmt;
1711
1712	/ We are using vect_prologue here to avoid scaling twice*
1713	by the inner loop factor. /*
1714	record_stmt_cost (body_cost_vec: &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1715	count: factor, kind, stmt_info, misalign: `0`, where: vect_prologue);
1716	}
1717	}
1718
1719	/ Now accumulate cost. /
1720	loop_vinfo->scalar_costs = init_cost (vinfo: loop_vinfo, costing_for_scalar: true);
1721	add_stmt_costs (costs: loop_vinfo->scalar_costs,
1722	cost_vec: &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
1723	loop_vinfo->scalar_costs->finish_cost (scalar_costs: nullptr);
1724	}
1725
1726	/ Function vect_analyze_loop_form.*
1727
1728	Verify that certain CFG restrictions hold, including:
1729	- the loop has a pre-header
1730	- the loop has a single entry
1731	- nested loops can have only a single exit.
1732	- the loop exit condition is simple enough
1733	- the number of iterations can be analyzed, i.e, a countable loop. The
1734	niter could be analyzed under some assumptions. /*
1735
1736	opt_result
1737	vect_analyze_loop_form (class loop loop, vect_loop_form_info info)
1738	{
1739	DUMP_VECT_SCOPE ("vect_analyze_loop_form");
1740
1741	edge exit_e = vec_init_loop_exit_info (loop);
1742	if (!exit_e)
1743	return opt_result::failure_at (loc: vect_location,
1744	fmt: "not vectorized:"
1745	" could not determine main exit from"
1746	" loop with multiple exits.\n");
1747	info->loop_exit = exit_e;
1748	if (dump_enabled_p ())
1749	dump_printf_loc (MSG_NOTE, vect_location,
1750	"using as main loop exit: %d -> %d [AUX: %p]\n",
1751	exit_e->src->index, exit_e->dest->index, exit_e->aux);
1752
1753	/ Check if we have any control flow that doesn't leave the loop. /
1754	class loop *v_loop = loop->inner ? loop->inner : loop;
1755	basic_block *bbs = get_loop_body (v_loop);
1756	for (unsigned i = `0`; i < v_loop->num_nodes; i++)
1757	if (EDGE_COUNT (bbs[i]->succs) != `1`
1758	&& (EDGE_COUNT (bbs[i]->succs) != `2`
1759	\|\| !loop_exits_from_bb_p (bbs[i]->loop_father, bbs[i])))
1760	{
1761	free (ptr: bbs);
1762	return opt_result::failure_at (loc: vect_location,
1763	fmt: "not vectorized:"
1764	" unsupported control flow in loop.\n");
1765	}
1766	free (ptr: bbs);
1767
1768	/ Different restrictions apply when we are considering an inner-most loop,*
1769	vs. an outer (nested) loop.
1770	(FORNOW. May want to relax some of these restrictions in the future). /*
1771
1772	info->inner_loop_cond = NULL;
1773	if (!loop->inner)
1774	{
1775	/ Inner-most loop. /
1776
1777	if (empty_block_p (loop->header))
1778	return opt_result::failure_at (loc: vect_location,
1779	fmt: "not vectorized: empty loop.\n");
1780	}
1781	else
1782	{
1783	class loop *innerloop = loop->inner;
1784	edge entryedge;
1785
1786	/ Nested loop. We currently require that the loop is doubly-nested,*
1787	contains a single inner loop with a single exit to the block
1788	with the single exit condition in the outer loop.
1789	Vectorizable outer-loops look like this:
1790
1791	(pre-header)
1792	\|
1793	header <---+
1794	\| \|
1795	inner-loop \|
1796	\| \|
1797	tail ------+
1798	\|
1799	(exit-bb)
1800
1801	The inner-loop also has the properties expected of inner-most loops
1802	as described above. /*
1803
1804	if ((loop->inner)->inner \|\| (loop->inner)->next)
1805	return opt_result::failure_at (loc: vect_location,
1806	fmt: "not vectorized:"
1807	" multiple nested loops.\n");
1808
1809	entryedge = loop_preheader_edge (innerloop);
1810	if (entryedge->src != loop->header
1811	\|\| !single_exit (innerloop)
1812	\|\| single_exit (innerloop)->dest != EDGE_PRED (loop->latch, `0`)->src)
1813	return opt_result::failure_at (loc: vect_location,
1814	fmt: "not vectorized:"
1815	" unsupported outerloop form.\n");
1816
1817	/ Analyze the inner-loop. /
1818	vect_loop_form_info inner;
1819	opt_result res = vect_analyze_loop_form (loop: loop->inner, info: &inner);
1820	if (!res)
1821	{
1822	if (dump_enabled_p ())
1823	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824	"not vectorized: Bad inner loop.\n");
1825	return res;
1826	}
1827
1828	/ Don't support analyzing niter under assumptions for inner*
1829	loop. /*
1830	if (!integer_onep (inner.assumptions))
1831	return opt_result::failure_at (loc: vect_location,
1832	fmt: "not vectorized: Bad inner loop.\n");
1833
1834	if (!expr_invariant_in_loop_p (loop, inner.number_of_iterations))
1835	return opt_result::failure_at (loc: vect_location,
1836	fmt: "not vectorized: inner-loop count not"
1837	" invariant.\n");
1838
1839	if (dump_enabled_p ())
1840	dump_printf_loc (MSG_NOTE, vect_location,
1841	"Considering outer-loop vectorization.\n");
1842	info->inner_loop_cond = inner.conds [`0`];
1843	}
1844
1845	if (EDGE_COUNT (loop->header->preds) != `2`)
1846	return opt_result::failure_at (loc: vect_location,
1847	fmt: "not vectorized:"
1848	" too many incoming edges.\n");
1849
1850	/ We assume that the latch is empty. /
1851	if (!empty_block_p (loop->latch)
1852	\|\| !gimple_seq_empty_p (s: phi_nodes (bb: loop->latch)))
1853	return opt_result::failure_at (loc: vect_location,
1854	fmt: "not vectorized: latch block not empty.\n");
1855
1856	/ Make sure there is no abnormal exit. /
1857	auto_vec<edge> exits = get_loop_exit_edges (loop);
1858	for (edge e : exits)
1859	{
1860	if (e->flags & EDGE_ABNORMAL)
1861	return opt_result::failure_at (loc: vect_location,
1862	fmt: "not vectorized:"
1863	" abnormal loop exit edge.\n");
1864	}
1865
1866	info->conds
1867	= vect_get_loop_niters (loop, main_exit: exit_e, assumptions: &info->assumptions,
1868	number_of_iterations: &info->number_of_iterations,
1869	number_of_iterationsm1: &info->number_of_iterationsm1);
1870	if (info->conds.is_empty ())
1871	return opt_result::failure_at
1872	(loc: vect_location,
1873	fmt: "not vectorized: complicated exit condition.\n");
1874
1875	/ Determine what the primary and alternate exit conds are. /
1876	for (unsigned i = `0`; i < info->conds.length (); i++)
1877	{
1878	gcond *cond = info->conds [i];
1879	if (exit_e->src == gimple_bb (g: cond))
1880	std::swap (a&: info->conds [`0`], b&: info->conds [i]);
1881	}
1882
1883	if (integer_zerop (info->assumptions)
1884	\|\| !info->number_of_iterations
1885	\|\| chrec_contains_undetermined (info->number_of_iterations))
1886	return opt_result::failure_at
1887	(loc: info->conds [`0`],
1888	fmt: "not vectorized: number of iterations cannot be computed.\n");
1889
1890	if (integer_zerop (info->number_of_iterations))
1891	return opt_result::failure_at
1892	(loc: info->conds [`0`],
1893	fmt: "not vectorized: number of iterations = 0.\n");
1894
1895	if (!(tree_fits_shwi_p (info->number_of_iterations)
1896	&& tree_to_shwi (info->number_of_iterations) > `0`))
1897	{
1898	if (dump_enabled_p ())
1899	{
1900	dump_printf_loc (MSG_NOTE, vect_location,
1901	"Symbolic number of iterations is ");
1902	dump_generic_expr (MSG_NOTE, TDF_DETAILS, info->number_of_iterations);
1903	dump_printf (MSG_NOTE, "\n");
1904	}
1905	}
1906
1907	return opt_result::success ();
1908	}
1909
1910	/ Create a loop_vec_info for LOOP with SHARED and the*
1911	vect_analyze_loop_form result. /*
1912
1913	loop_vec_info
1914	vect_create_loop_vinfo (class loop loop, vec_info_shared shared,
1915	const vect_loop_form_info *info,
1916	loop_vec_info main_loop_info)
1917	{
1918	loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1919	LOOP_VINFO_NITERSM1 (loop_vinfo) = info->number_of_iterationsm1;
1920	LOOP_VINFO_NITERS (loop_vinfo) = info->number_of_iterations;
1921	LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = info->number_of_iterations;
1922	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_info;
1923	/ Also record the assumptions for versioning. /
1924	if (!integer_onep (info->assumptions) && !main_loop_info)
1925	LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions;
1926
1927	for (gcond *cond : info->conds)
1928	{
1929	stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (cond);
1930	STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
1931	/ Mark the statement as a condition. /
1932	STMT_VINFO_DEF_TYPE (loop_cond_info) = vect_condition_def;
1933	}
1934
1935	for (unsigned i = `1`; i < info->conds.length (); i ++)
1936	LOOP_VINFO_LOOP_CONDS (loop_vinfo).safe_push (obj: info->conds [i]);
1937	LOOP_VINFO_LOOP_IV_COND (loop_vinfo) = info->conds [`0`];
1938
1939	LOOP_VINFO_IV_EXIT (loop_vinfo) = info->loop_exit;
1940
1941	/ Check to see if we're vectorizing multiple exits. /
1942	LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
1943	= !LOOP_VINFO_LOOP_CONDS (loop_vinfo).is_empty ();
1944
1945	if (info->inner_loop_cond)
1946	{
1947	stmt_vec_info inner_loop_cond_info
1948	= loop_vinfo->lookup_stmt (info->inner_loop_cond);
1949	STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type;
1950	/ If we have an estimate on the number of iterations of the inner*
1951	loop use that to limit the scale for costing, otherwise use
1952	--param vect-inner-loop-cost-factor literally. /*
1953	widest_int nit;
1954	if (estimated_stmt_executions (loop->inner, &nit))
1955	LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)
1956	= wi::smin (x: nit, param_vect_inner_loop_cost_factor).to_uhwi ();
1957	}
1958
1959	return loop_vinfo;
1960	}
1961
1962
1963
1964	/ Scan the loop stmts and dependent on whether there are any (non-)SLP*
1965	statements update the vectorization factor. /*
1966
1967	static void
1968	vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1969	{
1970	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1971	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1972	int nbbs = loop->num_nodes;
1973	poly_uint64 vectorization_factor;
1974	int i;
1975
1976	DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
1977
1978	vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1979	gcc_assert (known_ne (vectorization_factor, `0U`));
1980
1981	/ If all the stmts in the loop can be SLPed, we perform only SLP, and*
1982	vectorization factor of the loop is the unrolling factor required by
1983	the SLP instances. If that unrolling factor is 1, we say, that we
1984	perform pure SLP on loop - cross iteration parallelism is not
1985	exploited. /*
1986	bool only_slp_in_loop = true;
1987	for (i = `0`; i < nbbs; i++)
1988	{
1989	basic_block bb = bbs[i];
1990	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
1991	gsi_next (i: &si))
1992	{
1993	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
1994	if (!stmt_info)
1995	continue;
1996	if ((STMT_VINFO_RELEVANT_P (stmt_info)
1997	\|\| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1998	&& !PURE_SLP_STMT (stmt_info))
1999	/ STMT needs both SLP and loop-based vectorization. /
2000	only_slp_in_loop = false;
2001	}
2002	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
2003	gsi_next (i: &si))
2004	{
2005	if (is_gimple_debug (gs: gsi_stmt (i: si)))
2006	continue;
2007	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
2008	stmt_info = vect_stmt_to_vectorize (stmt_info);
2009	if ((STMT_VINFO_RELEVANT_P (stmt_info)
2010	\|\| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2011	&& !PURE_SLP_STMT (stmt_info))
2012	/ STMT needs both SLP and loop-based vectorization. /
2013	only_slp_in_loop = false;
2014	}
2015	}
2016
2017	if (only_slp_in_loop)
2018	{
2019	if (dump_enabled_p ())
2020	dump_printf_loc (MSG_NOTE, vect_location,
2021	"Loop contains only SLP stmts\n");
2022	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
2023	}
2024	else
2025	{
2026	if (dump_enabled_p ())
2027	dump_printf_loc (MSG_NOTE, vect_location,
2028	"Loop contains SLP and non-SLP stmts\n");
2029	/ Both the vectorization factor and unroll factor have the form*
2030	GET_MODE_SIZE (loop_vinfo->vector_mode) X for some rational X,*
2031	so they must have a common multiple. /*
2032	vectorization_factor
2033	= force_common_multiple (a: vectorization_factor,
2034	LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
2035	}
2036
2037	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2038	if (dump_enabled_p ())
2039	{
2040	dump_printf_loc (MSG_NOTE, vect_location,
2041	"Updating vectorization factor to ");
2042	dump_dec (MSG_NOTE, vectorization_factor);
2043	dump_printf (MSG_NOTE, ".\n");
2044	}
2045	}
2046
2047	/ Return true if STMT_INFO describes a double reduction phi and if*
2048	the other phi in the reduction is also relevant for vectorization.
2049	This rejects cases such as:
2050
2051	outer1:
2052	x_1 = PHI <x_3(outer2), ...>;
2053	...
2054
2055	inner:
2056	x_2 = ...;
2057	...
2058
2059	outer2:
2060	x_3 = PHI <x_2(inner)>;
2061
2062	if nothing in x_2 or elsewhere makes x_1 relevant. /*
2063
2064	static bool
2065	vect_active_double_reduction_p (stmt_vec_info stmt_info)
2066	{
2067	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
2068	return false;
2069
2070	return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
2071	}
2072
2073	/ Function vect_analyze_loop_operations.*
2074
2075	Scan the loop stmts and make sure they are all vectorizable. /*
2076
2077	static opt_result
2078	vect_analyze_loop_operations (loop_vec_info loop_vinfo)
2079	{
2080	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2081	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2082	int nbbs = loop->num_nodes;
2083	int i;
2084	stmt_vec_info stmt_info;
2085	bool need_to_vectorize = false;
2086	bool ok;
2087
2088	DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
2089
2090	auto_vec<stmt_info_for_cost> cost_vec;
2091
2092	for (i = `0`; i < nbbs; i++)
2093	{
2094	basic_block bb = bbs[i];
2095
2096	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
2097	gsi_next (i: &si))
2098	{
2099	gphi *phi = si.phi ();
2100	ok = true;
2101
2102	stmt_info = loop_vinfo->lookup_stmt (phi);
2103	if (dump_enabled_p ())
2104	dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G",
2105	(gimple *) phi);
2106	if (virtual_operand_p (op: gimple_phi_result (gs: phi)))
2107	continue;
2108
2109	/ Inner-loop loop-closed exit phi in outer-loop vectorization*
2110	(i.e., a phi in the tail of the outer-loop). /*
2111	if (! is_loop_header_bb_p (bb))
2112	{
2113	/ FORNOW: we currently don't support the case that these phis*
2114	are not used in the outerloop (unless it is double reduction,
2115	i.e., this phi is vect_reduction_def), cause this case
2116	requires to actually do something here. /*
2117	if (STMT_VINFO_LIVE_P (stmt_info)
2118	&& !vect_active_double_reduction_p (stmt_info))
2119	return opt_result::failure_at (loc: phi,
2120	fmt: "Unsupported loop-closed phi"
2121	" in outer-loop.\n");
2122
2123	/ If PHI is used in the outer loop, we check that its operand*
2124	is defined in the inner loop. /*
2125	if (STMT_VINFO_RELEVANT_P (stmt_info))
2126	{
2127	tree phi_op;
2128
2129	if (gimple_phi_num_args (gs: phi) != `1`)
2130	return opt_result::failure_at (loc: phi, fmt: "unsupported phi");
2131
2132	phi_op = PHI_ARG_DEF (phi, `0`);
2133	stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
2134	if (!op_def_info)
2135	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2136
2137	if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
2138	&& (STMT_VINFO_RELEVANT (op_def_info)
2139	!= vect_used_in_outer_by_reduction))
2140	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2141
2142	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
2143	\|\| (STMT_VINFO_DEF_TYPE (stmt_info)
2144	== vect_double_reduction_def))
2145	&& !vectorizable_lc_phi (loop_vinfo,
2146	stmt_info, NULL, NULL))
2147	return opt_result::failure_at (loc: phi, fmt: "unsupported phi\n");
2148	}
2149
2150	continue;
2151	}
2152
2153	gcc_assert (stmt_info);
2154
2155	if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
2156	\|\| STMT_VINFO_LIVE_P (stmt_info))
2157	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
2158	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
2159	/ A scalar-dependence cycle that we don't support. /
2160	return opt_result::failure_at (loc: phi,
2161	fmt: "not vectorized:"
2162	" scalar dependence cycle.\n");
2163
2164	if (STMT_VINFO_RELEVANT_P (stmt_info))
2165	{
2166	need_to_vectorize = true;
2167	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
2168	&& ! PURE_SLP_STMT (stmt_info))
2169	ok = vectorizable_induction (loop_vinfo,
2170	stmt_info, NULL, NULL,
2171	&cost_vec);
2172	else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
2173	\|\| (STMT_VINFO_DEF_TYPE (stmt_info)
2174	== vect_double_reduction_def)
2175	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
2176	&& ! PURE_SLP_STMT (stmt_info))
2177	ok = vectorizable_reduction (loop_vinfo,
2178	stmt_info, NULL, NULL, &cost_vec);
2179	else if ((STMT_VINFO_DEF_TYPE (stmt_info)
2180	== vect_first_order_recurrence)
2181	&& ! PURE_SLP_STMT (stmt_info))
2182	ok = vectorizable_recurr (loop_vinfo, stmt_info, NULL, NULL,
2183	&cost_vec);
2184	}
2185
2186	/ SLP PHIs are tested by vect_slp_analyze_node_operations. /
2187	if (ok
2188	&& STMT_VINFO_LIVE_P (stmt_info)
2189	&& !PURE_SLP_STMT (stmt_info))
2190	ok = vectorizable_live_operation (loop_vinfo, stmt_info, NULL, NULL,
2191	-`1`, false, &cost_vec);
2192
2193	if (!ok)
2194	return opt_result::failure_at (loc: phi,
2195	fmt: "not vectorized: relevant phi not "
2196	"supported: %G",
2197	static_cast <gimple *> (phi));
2198	}
2199
2200	for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (i: si);
2201	gsi_next (i: &si))
2202	{
2203	gimple *stmt = gsi_stmt (i: si);
2204	if (!gimple_clobber_p (s: stmt)
2205	&& !is_gimple_debug (gs: stmt))
2206	{
2207	opt_result res
2208	= vect_analyze_stmt (loop_vinfo,
2209	loop_vinfo->lookup_stmt (stmt),
2210	&need_to_vectorize,
2211	NULL, NULL, &cost_vec);
2212	if (!res)
2213	return res;
2214	}
2215	}
2216	} / bbs /
2217
2218	add_stmt_costs (costs: loop_vinfo->vector_costs, cost_vec: &cost_vec);
2219
2220	/ All operations in the loop are either irrelevant (deal with loop*
2221	control, or dead), or only used outside the loop and can be moved
2222	out of the loop (e.g. invariants, inductions). The loop can be
2223	optimized away by scalar optimizations. We're better off not
2224	touching this loop. /*
2225	if (!need_to_vectorize)
2226	{
2227	if (dump_enabled_p ())
2228	dump_printf_loc (MSG_NOTE, vect_location,
2229	"All the computation can be taken out of the loop.\n");
2230	return opt_result::failure_at
2231	(loc: vect_location,
2232	fmt: "not vectorized: redundant loop. no profit to vectorize.\n");
2233	}
2234
2235	return opt_result::success ();
2236	}
2237
2238	/ Return true if we know that the iteration count is smaller than the*
2239	vectorization factor. Return false if it isn't, or if we can't be sure
2240	either way. /*
2241
2242	static bool
2243	vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
2244	{
2245	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2246
2247	HOST_WIDE_INT max_niter;
2248	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2249	max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
2250	else
2251	max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2252
2253	if (max_niter != -`1` && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
2254	return true;
2255
2256	return false;
2257	}
2258
2259	/ Analyze the cost of the loop described by LOOP_VINFO. Decide if it*
2260	is worthwhile to vectorize. Return 1 if definitely yes, 0 if
2261	definitely no, or -1 if it's worth retrying. /*
2262
2263	static int
2264	vect_analyze_loop_costing (loop_vec_info loop_vinfo,
2265	unsigned *suggested_unroll_factor)
2266	{
2267	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2268	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2269
2270	/ Only loops that can handle partially-populated vectors can have iteration*
2271	counts less than the vectorization factor. /*
2272	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2273	&& vect_known_niters_smaller_than_vf (loop_vinfo))
2274	{
2275	if (dump_enabled_p ())
2276	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2277	"not vectorized: iteration count smaller than "
2278	"vectorization factor.\n");
2279	return `0`;
2280	}
2281
2282	/ If we know the number of iterations we can do better, for the*
2283	epilogue we can also decide whether the main loop leaves us
2284	with enough iterations, prefering a smaller vector epilog then
2285	also possibly used for the case we skip the vector loop. /*
2286	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2287	{
2288	widest_int scalar_niters
2289	= wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + `1`;
2290	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2291	{
2292	loop_vec_info orig_loop_vinfo
2293	= LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
2294	unsigned lowest_vf
2295	= constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo));
2296	int prolog_peeling = `0`;
2297	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2298	prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo);
2299	if (prolog_peeling >= `0`
2300	&& known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
2301	lowest_vf))
2302	{
2303	unsigned gap
2304	= LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? `1` : `0`;
2305	scalar_niters = ((scalar_niters - gap - prolog_peeling)
2306	% lowest_vf + gap);
2307	}
2308	}
2309	/ Reject vectorizing for a single scalar iteration, even if*
2310	we could in principle implement that using partial vectors. /*
2311	unsigned peeling_gap = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
2312	if (scalar_niters <= peeling_gap + `1`)
2313	{
2314	if (dump_enabled_p ())
2315	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316	"not vectorized: loop only has a single "
2317	"scalar iteration.\n");
2318	return `0`;
2319	}
2320
2321	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2322	{
2323	/ Check that the loop processes at least one full vector. /
2324	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2325	if (known_lt (scalar_niters, vf))
2326	{
2327	if (dump_enabled_p ())
2328	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2329	"loop does not have enough iterations "
2330	"to support vectorization.\n");
2331	return `0`;
2332	}
2333
2334	/ If we need to peel an extra epilogue iteration to handle data*
2335	accesses with gaps, check that there are enough scalar iterations
2336	available.
2337
2338	The check above is redundant with this one when peeling for gaps,
2339	but the distinction is useful for diagnostics. /*
2340	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2341	&& known_le (scalar_niters, vf))
2342	{
2343	if (dump_enabled_p ())
2344	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2345	"loop does not have enough iterations "
2346	"to support peeling for gaps.\n");
2347	return `0`;
2348	}
2349	}
2350	}
2351
2352	/ If using the "very cheap" model. reject cases in which we'd keep*
2353	a copy of the scalar code (even if we might be able to vectorize it). /*
2354	if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
2355	&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2356	\|\| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2357	\|\| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
2358	{
2359	if (dump_enabled_p ())
2360	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2361	"some scalar iterations would need to be peeled\n");
2362	return `0`;
2363	}
2364
2365	int min_profitable_iters, min_profitable_estimate;
2366	vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2367	&min_profitable_estimate,
2368	suggested_unroll_factor);
2369
2370	if (min_profitable_iters < `0`)
2371	{
2372	if (dump_enabled_p ())
2373	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374	"not vectorized: vectorization not profitable.\n");
2375	if (dump_enabled_p ())
2376	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377	"not vectorized: vector version will never be "
2378	"profitable.\n");
2379	return -`1`;
2380	}
2381
2382	int min_scalar_loop_bound = (param_min_vect_loop_bound
2383	* assumed_vf);
2384
2385	/ Use the cost model only if it is more conservative than user specified*
2386	threshold. /*
2387	unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
2388	min_profitable_iters);
2389
2390	LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2391
2392	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2393	&& LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
2394	{
2395	if (dump_enabled_p ())
2396	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2397	"not vectorized: vectorization not profitable.\n");
2398	if (dump_enabled_p ())
2399	dump_printf_loc (MSG_NOTE, vect_location,
2400	"not vectorized: iteration count smaller than user "
2401	"specified loop bound parameter or minimum profitable "
2402	"iterations (whichever is more conservative).\n");
2403	return `0`;
2404	}
2405
2406	/ The static profitablity threshold min_profitable_estimate includes*
2407	the cost of having to check at runtime whether the scalar loop
2408	should be used instead. If it turns out that we don't need or want
2409	such a check, the threshold we should use for the static estimate
2410	is simply the point at which the vector loop becomes more profitable
2411	than the scalar loop. /*
2412	if (min_profitable_estimate > min_profitable_iters
2413	&& !LOOP_REQUIRES_VERSIONING (loop_vinfo)
2414	&& !LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
2415	&& !LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2416	&& !vect_apply_runtime_profitability_check_p (loop_vinfo))
2417	{
2418	if (dump_enabled_p ())
2419	dump_printf_loc (MSG_NOTE, vect_location, "no need for a runtime"
2420	" choice between the scalar and vector loops\n");
2421	min_profitable_estimate = min_profitable_iters;
2422	}
2423
2424	/ If the vector loop needs multiple iterations to be beneficial then*
2425	things are probably too close to call, and the conservative thing
2426	would be to stick with the scalar code. /*
2427	if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
2428	&& min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
2429	{
2430	if (dump_enabled_p ())
2431	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432	"one iteration of the vector loop would be"
2433	" more expensive than the equivalent number of"
2434	" iterations of the scalar loop\n");
2435	return `0`;
2436	}
2437
2438	HOST_WIDE_INT estimated_niter;
2439
2440	/ If we are vectorizing an epilogue then we know the maximum number of*
2441	scalar iterations it will cover is at least one lower than the
2442	vectorization factor of the main loop. /*
2443	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2444	estimated_niter
2445	= vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - `1`;
2446	else
2447	{
2448	estimated_niter = estimated_stmt_executions_int (loop);
2449	if (estimated_niter == -`1`)
2450	estimated_niter = likely_max_stmt_executions_int (loop);
2451	}
2452	if (estimated_niter != -`1`
2453	&& ((unsigned HOST_WIDE_INT) estimated_niter
2454	< MAX (th, (unsigned) min_profitable_estimate)))
2455	{
2456	if (dump_enabled_p ())
2457	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458	"not vectorized: estimated iteration count too "
2459	"small.\n");
2460	if (dump_enabled_p ())
2461	dump_printf_loc (MSG_NOTE, vect_location,
2462	"not vectorized: estimated iteration count smaller "
2463	"than specified loop bound parameter or minimum "
2464	"profitable iterations (whichever is more "
2465	"conservative).\n");
2466	return -`1`;
2467	}
2468
2469	return `1`;
2470	}
2471
2472	static opt_result
2473	vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
2474	vec<data_reference_p> *datarefs,
2475	unsigned int *n_stmts)
2476	{
2477	*n_stmts = `0`;
2478	for (unsigned i = `0`; i < loop->num_nodes; i++)
2479	for (gimple_stmt_iterator gsi = gsi_start_bb (bb: bbs[i]);
2480	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
2481	{
2482	gimple *stmt = gsi_stmt (i: gsi);
2483	if (is_gimple_debug (gs: stmt))
2484	continue;
2485	++(*n_stmts);
2486	opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs,
2487	NULL, `0`);
2488	if (!res)
2489	{
2490	if (is_gimple_call (gs: stmt) && loop->safelen)
2491	{
2492	tree fndecl = gimple_call_fndecl (gs: stmt), op;
2493	if (fndecl == NULL_TREE
2494	&& gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
2495	{
2496	fndecl = gimple_call_arg (gs: stmt, index: `0`);
2497	gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
2498	fndecl = TREE_OPERAND (fndecl, `0`);
2499	gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
2500	}
2501	if (fndecl != NULL_TREE)
2502	{
2503	cgraph_node *node = cgraph_node::get (decl: fndecl);
2504	if (node != NULL && node->simd_clones != NULL)
2505	{
2506	unsigned int j, n = gimple_call_num_args (gs: stmt);
2507	for (j = `0`; j < n; j++)
2508	{
2509	op = gimple_call_arg (gs: stmt, index: j);
2510	if (DECL_P (op)
2511	\|\| (REFERENCE_CLASS_P (op)
2512	&& get_base_address (t: op)))
2513	break;
2514	}
2515	op = gimple_call_lhs (gs: stmt);
2516	/ Ignore #pragma omp declare simd functions*
2517	if they don't have data references in the
2518	call stmt itself. /*
2519	if (j == n
2520	&& !(op
2521	&& (DECL_P (op)
2522	\|\| (REFERENCE_CLASS_P (op)
2523	&& get_base_address (t: op)))))
2524	continue;
2525	}
2526	}
2527	}
2528	return res;
2529	}
2530	/ If dependence analysis will give up due to the limit on the*
2531	number of datarefs stop here and fail fatally. /*
2532	if (datarefs->length ()
2533	> (unsigned)param_loop_max_datarefs_for_datadeps)
2534	return opt_result::failure_at (loc: stmt, fmt: "exceeded param "
2535	"loop-max-datarefs-for-datadeps\n");
2536	}
2537	return opt_result::success ();
2538	}
2539
2540	/ Look for SLP-only access groups and turn each individual access into its own*
2541	group. /*
2542	static void
2543	vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
2544	{
2545	unsigned int i;
2546	struct data_reference *dr;
2547
2548	DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
2549
2550	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2551	FOR_EACH_VEC_ELT (datarefs, i, dr)
2552	{
2553	gcc_assert (DR_REF (dr));
2554	stmt_vec_info stmt_info
2555	= vect_stmt_to_vectorize (stmt_info: loop_vinfo->lookup_stmt (DR_STMT (dr)));
2556
2557	/ Check if the load is a part of an interleaving chain. /
2558	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2559	{
2560	stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
2561	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_element);
2562	unsigned int group_size = DR_GROUP_SIZE (first_element);
2563
2564	/ Check if SLP-only groups. /
2565	if (!STMT_SLP_TYPE (stmt_info)
2566	&& STMT_VINFO_SLP_VECT_ONLY (first_element))
2567	{
2568	/ Dissolve the group. /
2569	STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
2570
2571	stmt_vec_info vinfo = first_element;
2572	while (vinfo)
2573	{
2574	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
2575	DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
2576	DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2577	DR_GROUP_SIZE (vinfo) = `1`;
2578	if (STMT_VINFO_STRIDED_P (first_element)
2579	/ We cannot handle stores with gaps. /
2580	\|\| DR_IS_WRITE (dr_info->dr))
2581	{
2582	STMT_VINFO_STRIDED_P (vinfo) = true;
2583	DR_GROUP_GAP (vinfo) = `0`;
2584	}
2585	else
2586	DR_GROUP_GAP (vinfo) = group_size - `1`;
2587	/ Duplicate and adjust alignment info, it needs to*
2588	be present on each group leader, see dr_misalignment. /*
2589	if (vinfo != first_element)
2590	{
2591	dr_vec_info *dr_info2 = STMT_VINFO_DR_INFO (vinfo);
2592	dr_info2->target_alignment = dr_info->target_alignment;
2593	int misalignment = dr_info->misalignment;
2594	if (misalignment != DR_MISALIGNMENT_UNKNOWN)
2595	{
2596	HOST_WIDE_INT diff
2597	= (TREE_INT_CST_LOW (DR_INIT (dr_info2->dr))
2598	- TREE_INT_CST_LOW (DR_INIT (dr_info->dr)));
2599	unsigned HOST_WIDE_INT align_c
2600	= dr_info->target_alignment.to_constant ();
2601	misalignment = (misalignment + diff) % align_c;
2602	}
2603	dr_info2->misalignment = misalignment;
2604	}
2605	vinfo = next;
2606	}
2607	}
2608	}
2609	}
2610	}
2611
2612	/ Determine if operating on full vectors for LOOP_VINFO might leave*
2613	some scalar iterations still to do. If so, decide how we should
2614	handle those scalar iterations. The possibilities are:
2615
2616	(1) Make LOOP_VINFO operate on partial vectors instead of full vectors.
2617	In this case:
2618
2619	LOOP_VINFO_USING_PARTIAL_VECTORS_P == true
2620	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2621	LOOP_VINFO_PEELING_FOR_NITER == false
2622
2623	(2) Make LOOP_VINFO operate on full vectors and use an epilogue loop
2624	to handle the remaining scalar iterations. In this case:
2625
2626	LOOP_VINFO_USING_PARTIAL_VECTORS_P == false
2627	LOOP_VINFO_PEELING_FOR_NITER == true
2628
2629	There are two choices:
2630
2631	(2a) Consider vectorizing the epilogue loop at the same VF as the
2632	main loop, but using partial vectors instead of full vectors.
2633	In this case:
2634
2635	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true
2636
2637	(2b) Consider vectorizing the epilogue loop at lower VFs only.
2638	In this case:
2639
2640	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false
2641	*/
2642
2643	opt_result
2644	vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
2645	{
2646	/ Determine whether there would be any scalar iterations left over. /
2647	bool need_peeling_or_partial_vectors_p
2648	= vect_need_peeling_or_partial_vectors_p (loop_vinfo);
2649
2650	/ Decide whether to vectorize the loop with partial vectors. /
2651	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
2652	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
2653	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
2654	&& need_peeling_or_partial_vectors_p)
2655	{
2656	/ For partial-vector-usage=1, try to push the handling of partial*
2657	vectors to the epilogue, with the main loop continuing to operate
2658	on full vectors.
2659
2660	If we are unrolling we also do not want to use partial vectors. This
2661	is to avoid the overhead of generating multiple masks and also to
2662	avoid having to execute entire iterations of FALSE masked instructions
2663	when dealing with one or less full iterations.
2664
2665	??? We could then end up failing to use partial vectors if we
2666	decide to peel iterations into a prologue, and if the main loop
2667	then ends up processing fewer than VF iterations. /*
2668	if ((param_vect_partial_vector_usage == `1`
2669	\|\| loop_vinfo->suggested_unroll_factor > `1`)
2670	&& !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
2671	&& !vect_known_niters_smaller_than_vf (loop_vinfo))
2672	LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
2673	else
2674	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true;
2675	}
2676
2677	if (dump_enabled_p ())
2678	dump_printf_loc (MSG_NOTE, vect_location,
2679	"operating on %s vectors%s.\n",
2680	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2681	? "partial" : "full",
2682	LOOP_VINFO_EPILOGUE_P (loop_vinfo)
2683	? " for epilogue loop" : "");
2684
2685	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
2686	= (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
2687	&& need_peeling_or_partial_vectors_p);
2688
2689	/ We set LOOP_VINFO_USING_SELECT_VL_P as true before loop vectorization*
2690	analysis that we don't know whether the loop is vectorized by partial
2691	vectors (More details see tree-vect-loop-manip.cc).
2692
2693	However, SELECT_VL vectorizaton style should only applied on partial
2694	vectorization since SELECT_VL is the GIMPLE IR that calculates the
2695	number of elements to be process for each iteration.
2696
2697	After loop vectorization analysis, Clear LOOP_VINFO_USING_SELECT_VL_P
2698	if it is not partial vectorized loop. /*
2699	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
2700	LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false;
2701
2702	return opt_result::success ();
2703	}
2704
2705	/ Function vect_analyze_loop_2.*
2706
2707	Apply a set of analyses on LOOP specified by LOOP_VINFO, the different
2708	analyses will record information in some members of LOOP_VINFO. FATAL
2709	indicates if some analysis meets fatal error. If one non-NULL pointer
2710	SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one
2711	worked out suggested unroll factor, while one NULL pointer shows it's
2712	going to apply the suggested unroll factor. SLP_DONE_FOR_SUGGESTED_UF
2713	is to hold the slp decision when the suggested unroll factor is worked
2714	out. /*
2715	static opt_result
2716	vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
2717	unsigned *suggested_unroll_factor,
2718	bool& slp_done_for_suggested_uf)
2719	{
2720	opt_result ok = opt_result::success ();
2721	int res;
2722	unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
2723	poly_uint64 min_vf = `2`;
2724	loop_vec_info orig_loop_vinfo = NULL;
2725
2726	/ If we are dealing with an epilogue then orig_loop_vinfo points to the*
2727	loop_vec_info of the first vectorized loop. /*
2728	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2729	orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
2730	else
2731	orig_loop_vinfo = loop_vinfo;
2732	gcc_assert (orig_loop_vinfo);
2733
2734	/ The first group of checks is independent of the vector size. /
2735	fatal = true;
2736
2737	if (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)
2738	&& integer_zerop (LOOP_VINFO_SIMD_IF_COND (loop_vinfo)))
2739	return opt_result::failure_at (loc: vect_location,
2740	fmt: "not vectorized: simd if(0)\n");
2741
2742	/ Find all data references in the loop (which correspond to vdefs/vuses)*
2743	and analyze their evolution in the loop. /*
2744
2745	loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
2746
2747	/ Gather the data references and count stmts in the loop. /
2748	if (!LOOP_VINFO_DATAREFS (loop_vinfo).exists ())
2749	{
2750	opt_result res
2751	= vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
2752	datarefs: &LOOP_VINFO_DATAREFS (loop_vinfo),
2753	n_stmts: &LOOP_VINFO_N_STMTS (loop_vinfo));
2754	if (!res)
2755	{
2756	if (dump_enabled_p ())
2757	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2758	"not vectorized: loop contains function "
2759	"calls or data references that cannot "
2760	"be analyzed\n");
2761	return res;
2762	}
2763	loop_vinfo->shared->save_datarefs ();
2764	}
2765	else
2766	loop_vinfo->shared->check_datarefs ();
2767
2768	/ Analyze the data references and also adjust the minimal*
2769	vectorization factor according to the loads and stores. /*
2770
2771	ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
2772	if (!ok)
2773	{
2774	if (dump_enabled_p ())
2775	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2776	"bad data references.\n");
2777	return ok;
2778	}
2779
2780	/ Check if we are applying unroll factor now. /
2781	bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > `1`;
2782	gcc_assert (!applying_suggested_uf \|\| !suggested_unroll_factor);
2783
2784	/ If the slp decision is false when suggested unroll factor is worked*
2785	out, and we are applying suggested unroll factor, we can simply skip
2786	all slp related analyses this time. /*
2787	bool slp = !applying_suggested_uf \|\| slp_done_for_suggested_uf;
2788
2789	/ Classify all cross-iteration scalar data-flow cycles.*
2790	Cross-iteration cycles caused by virtual phis are analyzed separately. /*
2791	vect_analyze_scalar_cycles (loop_vinfo, slp);
2792
2793	vect_pattern_recog (loop_vinfo);
2794
2795	vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
2796
2797	/ Analyze the access patterns of the data-refs in the loop (consecutive,*
2798	complex, etc.). FORNOW: Only handle consecutive access pattern. /*
2799
2800	ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
2801	if (!ok)
2802	{
2803	if (dump_enabled_p ())
2804	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2805	"bad data access.\n");
2806	return ok;
2807	}
2808
2809	/ Data-flow analysis to detect stmts that do not need to be vectorized. /
2810
2811	ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
2812	if (!ok)
2813	{
2814	if (dump_enabled_p ())
2815	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2816	"unexpected pattern.\n");
2817	return ok;
2818	}
2819
2820	/ While the rest of the analysis below depends on it in some way. /
2821	fatal = false;
2822
2823	/ Analyze data dependences between the data-refs in the loop*
2824	and adjust the maximum vectorization factor according to
2825	the dependences.
2826	FORNOW: fail at the first data dependence that we encounter. /*
2827
2828	ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
2829	if (!ok)
2830	{
2831	if (dump_enabled_p ())
2832	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2833	"bad data dependence.\n");
2834	return ok;
2835	}
2836	if (max_vf != MAX_VECTORIZATION_FACTOR
2837	&& maybe_lt (a: max_vf, b: min_vf))
2838	return opt_result::failure_at (loc: vect_location, fmt: "bad data dependence.\n");
2839	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
2840
2841	ok = vect_determine_vectorization_factor (loop_vinfo);
2842	if (!ok)
2843	{
2844	if (dump_enabled_p ())
2845	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2846	"can't determine vectorization factor.\n");
2847	return ok;
2848	}
2849
2850	/ Compute the scalar iteration cost. /
2851	vect_compute_single_scalar_iteration_cost (loop_vinfo);
2852
2853	poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2854
2855	if (slp)
2856	{
2857	/ Check the SLP opportunities in the loop, analyze and build*
2858	SLP trees. /*
2859	ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
2860	if (!ok)
2861	return ok;
2862
2863	/ If there are any SLP instances mark them as pure_slp. /
2864	slp = vect_make_slp_decision (loop_vinfo);
2865	if (slp)
2866	{
2867	/ Find stmts that need to be both vectorized and SLPed. /
2868	vect_detect_hybrid_slp (loop_vinfo);
2869
2870	/ Update the vectorization factor based on the SLP decision. /
2871	vect_update_vf_for_slp (loop_vinfo);
2872
2873	/ Optimize the SLP graph with the vectorization factor fixed. /
2874	vect_optimize_slp (loop_vinfo);
2875
2876	/ Gather the loads reachable from the SLP graph entries. /
2877	vect_gather_slp_loads (loop_vinfo);
2878	}
2879	}
2880
2881	bool saved_can_use_partial_vectors_p
2882	= LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
2883
2884	/ We don't expect to have to roll back to anything other than an empty*
2885	set of rgroups. /*
2886	gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
2887
2888	/ This is the point where we can re-start analysis with SLP forced off. /
2889	start_over:
2890
2891	/ Apply the suggested unrolling factor, this was determined by the backend*
2892	during finish_cost the first time we ran the analyzis for this
2893	vector mode. /*
2894	if (applying_suggested_uf)
2895	LOOP_VINFO_VECT_FACTOR (loop_vinfo) *= loop_vinfo->suggested_unroll_factor;
2896
2897	/ Now the vectorization factor is final. /
2898	poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2899	gcc_assert (known_ne (vectorization_factor, `0U`));
2900
2901	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
2902	{
2903	dump_printf_loc (MSG_NOTE, vect_location,
2904	"vectorization_factor = ");
2905	dump_dec (MSG_NOTE, vectorization_factor);
2906	dump_printf (MSG_NOTE, ", niters = %wd\n",
2907	LOOP_VINFO_INT_NITERS (loop_vinfo));
2908	}
2909
2910	if (max_vf != MAX_VECTORIZATION_FACTOR
2911	&& maybe_lt (a: max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2912	return opt_result::failure_at (loc: vect_location, fmt: "bad data dependence.\n");
2913
2914	loop_vinfo->vector_costs = init_cost (vinfo: loop_vinfo, costing_for_scalar: false);
2915
2916	/ Analyze the alignment of the data-refs in the loop.*
2917	Fail if a data reference is found that cannot be vectorized. /*
2918
2919	ok = vect_analyze_data_refs_alignment (loop_vinfo);
2920	if (!ok)
2921	{
2922	if (dump_enabled_p ())
2923	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2924	"bad data alignment.\n");
2925	return ok;
2926	}
2927
2928	/ Prune the list of ddrs to be tested at run-time by versioning for alias.*
2929	It is important to call pruning after vect_analyze_data_ref_accesses,
2930	since we use grouping information gathered by interleaving analysis. /*
2931	ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2932	if (!ok)
2933	return ok;
2934
2935	/ Do not invoke vect_enhance_data_refs_alignment for epilogue*
2936	vectorization, since we do not want to add extra peeling or
2937	add versioning for alignment. /*
2938	if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2939	/ This pass will decide on using loop versioning and/or loop peeling in*
2940	order to enhance the alignment of data references in the loop. /*
2941	ok = vect_enhance_data_refs_alignment (loop_vinfo);
2942	if (!ok)
2943	return ok;
2944
2945	if (slp)
2946	{
2947	/ Analyze operations in the SLP instances. Note this may*
2948	remove unsupported SLP instances which makes the above
2949	SLP kind detection invalid. /*
2950	unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2951	vect_slp_analyze_operations (loop_vinfo);
2952	if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2953	{
2954	ok = opt_result::failure_at (loc: vect_location,
2955	fmt: "unsupported SLP instances\n");
2956	goto again;
2957	}
2958
2959	/ Check whether any load in ALL SLP instances is possibly permuted. /
2960	slp_tree load_node, slp_root;
2961	unsigned i, x;
2962	slp_instance instance;
2963	bool can_use_lanes = true;
2964	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
2965	{
2966	slp_root = SLP_INSTANCE_TREE (instance);
2967	int group_size = SLP_TREE_LANES (slp_root);
2968	tree vectype = SLP_TREE_VECTYPE (slp_root);
2969	bool loads_permuted = false;
2970	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
2971	{
2972	if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2973	continue;
2974	unsigned j;
2975	stmt_vec_info load_info;
2976	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2977	if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2978	{
2979	loads_permuted = true;
2980	break;
2981	}
2982	}
2983
2984	/ If the loads and stores can be handled with load/store-lane*
2985	instructions record it and move on to the next instance. /*
2986	if (loads_permuted
2987	&& SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
2988	&& vect_store_lanes_supported (vectype, group_size, false)
2989	!= IFN_LAST)
2990	{
2991	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
2992	if (STMT_VINFO_GROUPED_ACCESS
2993	(SLP_TREE_REPRESENTATIVE (load_node)))
2994	{
2995	stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2996	(SLP_TREE_REPRESENTATIVE (load_node));
2997	/ Use SLP for strided accesses (or if we can't*
2998	load-lanes). /*
2999	if (STMT_VINFO_STRIDED_P (stmt_vinfo)
3000	\|\| vect_load_lanes_supported
3001	(STMT_VINFO_VECTYPE (stmt_vinfo),
3002	DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
3003	break;
3004	}
3005
3006	can_use_lanes
3007	= can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
3008
3009	if (can_use_lanes && dump_enabled_p ())
3010	dump_printf_loc (MSG_NOTE, vect_location,
3011	"SLP instance %p can use load/store-lanes\n",
3012	(void *) instance);
3013	}
3014	else
3015	{
3016	can_use_lanes = false;
3017	break;
3018	}
3019	}
3020
3021	/ If all SLP instances can use load/store-lanes abort SLP and try again*
3022	with SLP disabled. /*
3023	if (can_use_lanes)
3024	{
3025	ok = opt_result::failure_at (loc: vect_location,
3026	fmt: "Built SLP cancelled: can use "
3027	"load/store-lanes\n");
3028	if (dump_enabled_p ())
3029	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3030	"Built SLP cancelled: all SLP instances support "
3031	"load/store-lanes\n");
3032	goto again;
3033	}
3034	}
3035
3036	/ Dissolve SLP-only groups. /
3037	vect_dissolve_slp_only_groups (loop_vinfo);
3038
3039	/ Scan all the remaining operations in the loop that are not subject*
3040	to SLP and make sure they are vectorizable. /*
3041	ok = vect_analyze_loop_operations (loop_vinfo);
3042	if (!ok)
3043	{
3044	if (dump_enabled_p ())
3045	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3046	"bad operation or unsupported loop bound.\n");
3047	return ok;
3048	}
3049
3050	/ For now, we don't expect to mix both masking and length approaches for one*
3051	loop, disable it if both are recorded. /*
3052	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3053	&& !LOOP_VINFO_MASKS (loop_vinfo).is_empty ()
3054	&& !LOOP_VINFO_LENS (loop_vinfo).is_empty ())
3055	{
3056	if (dump_enabled_p ())
3057	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3058	"can't vectorize a loop with partial vectors"
3059	" because we don't expect to mix different"
3060	" approaches with partial vectors for the"
3061	" same loop.\n");
3062	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3063	}
3064
3065	/ If we still have the option of using partial vectors,*
3066	check whether we can generate the necessary loop controls. /*
3067	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
3068	{
3069	if (!LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
3070	{
3071	if (!vect_verify_full_masking (loop_vinfo)
3072	&& !vect_verify_full_masking_avx512 (loop_vinfo))
3073	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3074	}
3075	else / !LOOP_VINFO_LENS (loop_vinfo).is_empty () /
3076	if (!vect_verify_loop_lens (loop_vinfo))
3077	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3078	}
3079
3080	/ If we're vectorizing a loop that uses length "controls" and*
3081	can iterate more than once, we apply decrementing IV approach
3082	in loop control. /*
3083	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3084	&& LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo) == vect_partial_vectors_len
3085	&& LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) == `0`
3086	&& !(LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3087	&& known_le (LOOP_VINFO_INT_NITERS (loop_vinfo),
3088	LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
3089	LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo) = true;
3090
3091	/ If a loop uses length controls and has a decrementing loop control IV,*
3092	we will normally pass that IV through a MIN_EXPR to calcaluate the
3093	basis for the length controls. E.g. in a loop that processes one
3094	element per scalar iteration, the number of elements would be
3095	MIN_EXPR <N, VF>, where N is the number of scalar iterations left.
3096
3097	This MIN_EXPR approach allows us to use pointer IVs with an invariant
3098	step, since only the final iteration of the vector loop can have
3099	inactive lanes.
3100
3101	However, some targets have a dedicated instruction for calculating the
3102	preferred length, given the total number of elements that still need to
3103	be processed. This is encapsulated in the SELECT_VL internal function.
3104
3105	If the target supports SELECT_VL, we can use it instead of MIN_EXPR
3106	to determine the basis for the length controls. However, unlike the
3107	MIN_EXPR calculation, the SELECT_VL calculation can decide to make
3108	lanes inactive in any iteration of the vector loop, not just the last
3109	iteration. This SELECT_VL approach therefore requires us to use pointer
3110	IVs with variable steps.
3111
3112	Once we've decided how many elements should be processed by one
3113	iteration of the vector loop, we need to populate the rgroup controls.
3114	If a loop has multiple rgroups, we need to make sure that those rgroups
3115	"line up" (that is, they must be consistent about which elements are
3116	active and which aren't). This is done by vect_adjust_loop_lens_control.
3117
3118	In principle, it would be possible to use vect_adjust_loop_lens_control
3119	on either the result of a MIN_EXPR or the result of a SELECT_VL.
3120	However:
3121
3122	(1) In practice, it only makes sense to use SELECT_VL when a vector
3123	operation will be controlled directly by the result. It is not
3124	worth using SELECT_VL if it would only be the input to other
3125	calculations.
3126
3127	(2) If we use SELECT_VL for an rgroup that has N controls, each associated
3128	pointer IV will need N updates by a variable amount (N-1 updates
3129	within the iteration and 1 update to move to the next iteration).
3130
3131	Because of this, we prefer to use the MIN_EXPR approach whenever there
3132	is more than one length control.
3133
3134	In addition, SELECT_VL always operates to a granularity of 1 unit.
3135	If we wanted to use it to control an SLP operation on N consecutive
3136	elements, we would need to make the SELECT_VL inputs measure scalar
3137	iterations (rather than elements) and then multiply the SELECT_VL
3138	result by N. But using SELECT_VL this way is inefficient because
3139	of (1) above.
3140
3141	2. We don't apply SELECT_VL on single-rgroup when both (1) and (2) are
3142	satisfied:
3143
3144	(1). LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) is true.
3145	(2). LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant () is true.
3146
3147	Since SELECT_VL (variable step) will make SCEV analysis failed and then
3148	we will fail to gain benefits of following unroll optimizations. We prefer
3149	using the MIN_EXPR approach in this situation. /*
3150	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
3151	{
3152	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
3153	if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
3154	OPTIMIZE_FOR_SPEED)
3155	&& LOOP_VINFO_LENS (loop_vinfo).length () == `1`
3156	&& LOOP_VINFO_LENS (loop_vinfo)[`0`].factor == `1` && !slp
3157	&& (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3158	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
3159	LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
3160	}
3161
3162	/ Decide whether this loop_vinfo should use partial vectors or peeling,*
3163	assuming that the loop will be used as a main loop. We will redo
3164	this analysis later if we instead decide to use the loop as an
3165	epilogue loop. /*
3166	ok = vect_determine_partial_vectors_and_peeling (loop_vinfo);
3167	if (!ok)
3168	return ok;
3169
3170	/ If we're vectorizing an epilogue loop, the vectorized loop either needs*
3171	to be able to handle fewer than VF scalars, or needs to have a lower VF
3172	than the main loop. /*
3173	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
3174	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
3175	{
3176	poly_uint64 unscaled_vf
3177	= exact_div (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
3178	b: orig_loop_vinfo->suggested_unroll_factor);
3179	if (maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo), unscaled_vf))
3180	return opt_result::failure_at (loc: vect_location,
3181	fmt: "Vectorization factor too high for"
3182	" epilogue loop.\n");
3183	}
3184
3185	/ Check the costings of the loop make vectorizing worthwhile. /
3186	res = vect_analyze_loop_costing (loop_vinfo, suggested_unroll_factor);
3187	if (res < `0`)
3188	{
3189	ok = opt_result::failure_at (loc: vect_location,
3190	fmt: "Loop costings may not be worthwhile.\n");
3191	goto again;
3192	}
3193	if (!res)
3194	return opt_result::failure_at (loc: vect_location,
3195	fmt: "Loop costings not worthwhile.\n");
3196
3197	/ If an epilogue loop is required make sure we can create one. /
3198	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
3199	\|\| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
3200	\|\| LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
3201	{
3202	if (dump_enabled_p ())
3203	dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
3204	if (!vect_can_advance_ivs_p (loop_vinfo)
3205	\|\| !slpeel_can_duplicate_loop_p (loop,
3206	LOOP_VINFO_IV_EXIT (loop_vinfo),
3207	LOOP_VINFO_IV_EXIT (loop_vinfo)))
3208	{
3209	ok = opt_result::failure_at (loc: vect_location,
3210	fmt: "not vectorized: can't create required "
3211	"epilog loop\n");
3212	goto again;
3213	}
3214	}
3215
3216	/ During peeling, we need to check if number of loop iterations is*
3217	enough for both peeled prolog loop and vector loop. This check
3218	can be merged along with threshold check of loop versioning, so
3219	increase threshold for this case if necessary.
3220
3221	If we are analyzing an epilogue we still want to check what its
3222	versioning threshold would be. If we decide to vectorize the epilogues we
3223	will want to use the lowest versioning threshold of all epilogues and main
3224	loop. This will enable us to enter a vectorized epilogue even when
3225	versioning the loop. We can't simply check whether the epilogue requires
3226	versioning though since we may have skipped some versioning checks when
3227	analyzing the epilogue. For instance, checks for alias versioning will be
3228	skipped when dealing with epilogues as we assume we already checked them
3229	for the main loop. So instead we always check the 'orig_loop_vinfo'. /*
3230	if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))
3231	{
3232	poly_uint64 niters_th = `0`;
3233	unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
3234
3235	if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
3236	{
3237	/ Niters for peeled prolog loop. /
3238	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`)
3239	{
3240	dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3241	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
3242	niters_th += TYPE_VECTOR_SUBPARTS (node: vectype) - `1`;
3243	}
3244	else
3245	niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3246	}
3247
3248	/ Niters for at least one iteration of vectorized loop. /
3249	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
3250	niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3251	/ One additional iteration because of peeling for gap. /
3252	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3253	niters_th += `1`;
3254
3255	/ Use the same condition as vect_transform_loop to decide when to use*
3256	the cost to determine a versioning threshold. /*
3257	if (vect_apply_runtime_profitability_check_p (loop_vinfo)
3258	&& ordered_p (a: th, b: niters_th))
3259	niters_th = ordered_max (a: poly_uint64 (th), b: niters_th);
3260
3261	LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
3262	}
3263
3264	gcc_assert (known_eq (vectorization_factor,
3265	LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
3266
3267	slp_done_for_suggested_uf = slp;
3268
3269	/ Ok to vectorize! /
3270	LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = `1`;
3271	return opt_result::success ();
3272
3273	again:
3274	/ Ensure that "ok" is false (with an opt_problem if dumping is enabled). /
3275	gcc_assert (!ok);
3276
3277	/ Try again with SLP forced off but if we didn't do any SLP there is*
3278	no point in re-trying. /*
3279	if (!slp)
3280	return ok;
3281
3282	/ If the slp decision is true when suggested unroll factor is worked*
3283	out, and we are applying suggested unroll factor, we don't need to
3284	re-try any more. /*
3285	if (applying_suggested_uf && slp_done_for_suggested_uf)
3286	return ok;
3287
3288	/ If there are reduction chains re-trying will fail anyway. /
3289	if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
3290	return ok;
3291
3292	/ Likewise if the grouped loads or stores in the SLP cannot be handled*
3293	via interleaving or lane instructions. /*
3294	slp_instance instance;
3295	slp_tree node;
3296	unsigned i, j;
3297	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
3298	{
3299	stmt_vec_info vinfo;
3300	vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[`0`];
3301	if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
3302	continue;
3303	vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
3304	unsigned int size = DR_GROUP_SIZE (vinfo);
3305	tree vectype = STMT_VINFO_VECTYPE (vinfo);
3306	if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST
3307	&& ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), `1U`)
3308	&& ! vect_grouped_store_supported (vectype, size))
3309	return opt_result::failure_at (loc: vinfo->stmt,
3310	fmt: "unsupported grouped store\n");
3311	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
3312	{
3313	vinfo = SLP_TREE_REPRESENTATIVE (node);
3314	if (STMT_VINFO_GROUPED_ACCESS (vinfo))
3315	{
3316	vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
3317	bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
3318	size = DR_GROUP_SIZE (vinfo);
3319	vectype = STMT_VINFO_VECTYPE (vinfo);
3320	if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
3321	&& ! vect_grouped_load_supported (vectype, single_element_p,
3322	size))
3323	return opt_result::failure_at (loc: vinfo->stmt,
3324	fmt: "unsupported grouped load\n");
3325	}
3326	}
3327	}
3328
3329	if (dump_enabled_p ())
3330	dump_printf_loc (MSG_NOTE, vect_location,
3331	"re-trying with SLP disabled\n");
3332
3333	/ Roll back state appropriately. No SLP this time. /
3334	slp = false;
3335	/ Restore vectorization factor as it were without SLP. /
3336	LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
3337	/ Free the SLP instances. /
3338	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
3339	vect_free_slp_instance (instance);
3340	LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
3341	/ Reset SLP type to loop_vect on all stmts. /
3342	for (i = `0`; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
3343	{
3344	basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
3345	for (gimple_stmt_iterator si = gsi_start_phis (bb);
3346	!gsi_end_p (i: si); gsi_next (i: &si))
3347	{
3348	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
3349	STMT_SLP_TYPE (stmt_info) = loop_vect;
3350	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
3351	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
3352	{
3353	/ vectorizable_reduction adjusts reduction stmt def-types,*
3354	restore them to that of the PHI. /*
3355	STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))
3356	= STMT_VINFO_DEF_TYPE (stmt_info);
3357	STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize
3358	(STMT_VINFO_REDUC_DEF (stmt_info)))
3359	= STMT_VINFO_DEF_TYPE (stmt_info);
3360	}
3361	}
3362	for (gimple_stmt_iterator si = gsi_start_bb (bb);
3363	!gsi_end_p (i: si); gsi_next (i: &si))
3364	{
3365	if (is_gimple_debug (gs: gsi_stmt (i: si)))
3366	continue;
3367	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
3368	STMT_SLP_TYPE (stmt_info) = loop_vect;
3369	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3370	{
3371	stmt_vec_info pattern_stmt_info
3372	= STMT_VINFO_RELATED_STMT (stmt_info);
3373	if (STMT_VINFO_SLP_VECT_ONLY_PATTERN (pattern_stmt_info))
3374	STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
3375
3376	gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
3377	STMT_SLP_TYPE (pattern_stmt_info) = loop_vect;
3378	for (gimple_stmt_iterator pi = gsi_start (seq&: pattern_def_seq);
3379	!gsi_end_p (i: pi); gsi_next (i: &pi))
3380	STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
3381	= loop_vect;
3382	}
3383	}
3384	}
3385	/ Free optimized alias test DDRS. /
3386	LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (size: `0`);
3387	LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
3388	LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
3389	/ Reset target cost data. /
3390	delete loop_vinfo->vector_costs;
3391	loop_vinfo->vector_costs = nullptr;
3392	/ Reset accumulated rgroup information. /
3393	LOOP_VINFO_MASKS (loop_vinfo).mask_set.empty ();
3394	release_vec_loop_controls (controls: &LOOP_VINFO_MASKS (loop_vinfo).rgc_vec);
3395	release_vec_loop_controls (controls: &LOOP_VINFO_LENS (loop_vinfo));
3396	/ Reset assorted flags. /
3397	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
3398	LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
3399	LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = `0`;
3400	LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = `0`;
3401	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3402	= saved_can_use_partial_vectors_p;
3403	LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false;
3404
3405	goto start_over;
3406	}
3407
3408	/ Return true if vectorizing a loop using NEW_LOOP_VINFO appears*
3409	to be better than vectorizing it using OLD_LOOP_VINFO. Assume that
3410	OLD_LOOP_VINFO is better unless something specifically indicates
3411	otherwise.
3412
3413	Note that this deliberately isn't a partial order. /*
3414
3415	static bool
3416	vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
3417	loop_vec_info old_loop_vinfo)
3418	{
3419	struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
3420	gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
3421
3422	poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
3423	poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
3424
3425	/ Always prefer a VF of loop->simdlen over any other VF. /
3426	if (loop->simdlen)
3427	{
3428	bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
3429	bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
3430	if (new_simdlen_p != old_simdlen_p)
3431	return new_simdlen_p;
3432	}
3433
3434	const auto *old_costs = old_loop_vinfo->vector_costs;
3435	const auto *new_costs = new_loop_vinfo->vector_costs;
3436	if (loop_vec_info main_loop = LOOP_VINFO_ORIG_LOOP_INFO (old_loop_vinfo))
3437	return new_costs->better_epilogue_loop_than_p (other: old_costs, main_loop);
3438
3439	return new_costs->better_main_loop_than_p (other: old_costs);
3440	}
3441
3442	/ Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO. Return*
3443	true if we should. /*
3444
3445	static bool
3446	vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
3447	loop_vec_info old_loop_vinfo)
3448	{
3449	if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
3450	return false;
3451
3452	if (dump_enabled_p ())
3453	dump_printf_loc (MSG_NOTE, vect_location,
3454	"***** Preferring vector mode %s to vector mode %s\n",
3455	GET_MODE_NAME (new_loop_vinfo->vector_mode),
3456	GET_MODE_NAME (old_loop_vinfo->vector_mode));
3457	return true;
3458	}
3459
3460	/ Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is*
3461	not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
3462	MODE_I to the next mode useful to analyze.
3463	Return the loop_vinfo on success and wrapped null on failure. /*
3464
3465	static opt_loop_vec_info
3466	vect_analyze_loop_1 (class loop loop, vec_info_shared shared,
3467	const vect_loop_form_info *loop_form_info,
3468	loop_vec_info main_loop_vinfo,
3469	const vector_modes &vector_modes, unsigned &mode_i,
3470	machine_mode &autodetected_vector_mode,
3471	bool &fatal)
3472	{
3473	loop_vec_info loop_vinfo
3474	= vect_create_loop_vinfo (loop, shared, info: loop_form_info, main_loop_info: main_loop_vinfo);
3475
3476	machine_mode vector_mode = vector_modes [mode_i];
3477	loop_vinfo->vector_mode = vector_mode;
3478	unsigned int suggested_unroll_factor = `1`;
3479	bool slp_done_for_suggested_uf = false;
3480
3481	/ Run the main analysis. /
3482	opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
3483	suggested_unroll_factor: &suggested_unroll_factor,
3484	slp_done_for_suggested_uf);
3485	if (dump_enabled_p ())
3486	dump_printf_loc (MSG_NOTE, vect_location,
3487	"***** Analysis %s with vector mode %s\n",
3488	res ? "succeeded" : " failed",
3489	GET_MODE_NAME (loop_vinfo->vector_mode));
3490
3491	if (res && !main_loop_vinfo && suggested_unroll_factor > `1`)
3492	{
3493	if (dump_enabled_p ())
3494	dump_printf_loc (MSG_NOTE, vect_location,
3495	"***** Re-trying analysis for unrolling"
3496	" with unroll factor %d and slp %s.\n",
3497	suggested_unroll_factor,
3498	slp_done_for_suggested_uf ? "on" : "off");
3499	loop_vec_info unroll_vinfo
3500	= vect_create_loop_vinfo (loop, shared, info: loop_form_info, main_loop_info: main_loop_vinfo);
3501	unroll_vinfo->vector_mode = vector_mode;
3502	unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
3503	opt_result new_res = vect_analyze_loop_2 (loop_vinfo: unroll_vinfo, fatal, NULL,
3504	slp_done_for_suggested_uf);
3505	if (new_res)
3506	{
3507	delete loop_vinfo;
3508	loop_vinfo = unroll_vinfo;
3509	}
3510	else
3511	delete unroll_vinfo;
3512	}
3513
3514	/ Remember the autodetected vector mode. /
3515	if (vector_mode == VOIDmode)
3516	autodetected_vector_mode = loop_vinfo->vector_mode;
3517
3518	/ Advance mode_i, first skipping modes that would result in the*
3519	same analysis result. /*
3520	while (mode_i + `1` < vector_modes.length ()
3521	&& vect_chooses_same_modes_p (loop_vinfo,
3522	vector_modes [mode_i + `1`]))
3523	{
3524	if (dump_enabled_p ())
3525	dump_printf_loc (MSG_NOTE, vect_location,
3526	"***** The result for vector mode %s would"
3527	" be the same\n",
3528	GET_MODE_NAME (vector_modes[mode_i + `1`]));
3529	mode_i += `1`;
3530	}
3531	if (mode_i + `1` < vector_modes.length ()
3532	&& VECTOR_MODE_P (autodetected_vector_mode)
3533	&& (related_vector_mode (vector_modes [mode_i + `1`],
3534	GET_MODE_INNER (autodetected_vector_mode))
3535	== autodetected_vector_mode)
3536	&& (related_vector_mode (autodetected_vector_mode,
3537	GET_MODE_INNER (vector_modes[mode_i + `1`]))
3538	== vector_modes [mode_i + `1`]))
3539	{
3540	if (dump_enabled_p ())
3541	dump_printf_loc (MSG_NOTE, vect_location,
3542	"***** Skipping vector mode %s, which would"
3543	" repeat the analysis for %s\n",
3544	GET_MODE_NAME (vector_modes[mode_i + `1`]),
3545	GET_MODE_NAME (autodetected_vector_mode));
3546	mode_i += `1`;
3547	}
3548	mode_i++;
3549
3550	if (!res)
3551	{
3552	delete loop_vinfo;
3553	if (fatal)
3554	gcc_checking_assert (main_loop_vinfo == NULL);
3555	return opt_loop_vec_info::propagate_failure (other: res);
3556	}
3557
3558	return opt_loop_vec_info::success (ptr: loop_vinfo);
3559	}
3560
3561	/ Function vect_analyze_loop.*
3562
3563	Apply a set of analyses on LOOP, and create a loop_vec_info struct
3564	for it. The different analyses will record information in the
3565	loop_vec_info struct. /*
3566	opt_loop_vec_info
3567	vect_analyze_loop (class loop loop, vec_info_shared shared)
3568	{
3569	DUMP_VECT_SCOPE ("analyze_loop_nest");
3570
3571	if (loop_outer (loop)
3572	&& loop_vec_info_for_loop (loop: loop_outer (loop))
3573	&& LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
3574	return opt_loop_vec_info::failure_at (loc: vect_location,
3575	fmt: "outer-loop already vectorized.\n");
3576
3577	if (!find_loop_nest (loop, &shared->loop_nest))
3578	return opt_loop_vec_info::failure_at
3579	(loc: vect_location,
3580	fmt: "not vectorized: loop nest containing two or more consecutive inner"
3581	" loops cannot be vectorized\n");
3582
3583	/ Analyze the loop form. /
3584	vect_loop_form_info loop_form_info;
3585	opt_result res = vect_analyze_loop_form (loop, info: &loop_form_info);
3586	if (!res)
3587	{
3588	if (dump_enabled_p ())
3589	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3590	"bad loop form.\n");
3591	return opt_loop_vec_info::propagate_failure (other: res);
3592	}
3593	if (!integer_onep (loop_form_info.assumptions))
3594	{
3595	/ We consider to vectorize this loop by versioning it under*
3596	some assumptions. In order to do this, we need to clear
3597	existing information computed by scev and niter analyzer. /*
3598	scev_reset_htab ();
3599	free_numbers_of_iterations_estimates (loop);
3600	/ Also set flag for this loop so that following scev and niter*
3601	analysis are done under the assumptions. /*
3602	loop_constraint_set (loop, LOOP_C_FINITE);
3603	}
3604	else
3605	/ Clear the existing niter information to make sure the nonwrapping flag*
3606	will be calculated and set propriately. /*
3607	free_numbers_of_iterations_estimates (loop);
3608
3609	auto_vector_modes vector_modes;
3610	/ Autodetect first vector size we try. /
3611	vector_modes.safe_push (VOIDmode);
3612	unsigned int autovec_flags
3613	= targetm.vectorize.autovectorize_vector_modes (&vector_modes,
3614	loop->simdlen != `0`);
3615	bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
3616	&& !unlimited_cost_model (loop));
3617	machine_mode autodetected_vector_mode = VOIDmode;
3618	opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
3619	unsigned int mode_i = `0`;
3620	unsigned HOST_WIDE_INT simdlen = loop->simdlen;
3621
3622	/ Keep track of the VF for each mode. Initialize all to 0 which indicates*
3623	a mode has not been analyzed. /*
3624	auto_vec<poly_uint64, `8`> cached_vf_per_mode;
3625	for (unsigned i = `0`; i < vector_modes.length (); ++i)
3626	cached_vf_per_mode.safe_push (obj: `0`);
3627
3628	/ First determine the main loop vectorization mode, either the first*
3629	one that works, starting with auto-detecting the vector mode and then
3630	following the targets order of preference, or the one with the
3631	lowest cost if pick_lowest_cost_p. /*
3632	while (`1`)
3633	{
3634	bool fatal;
3635	unsigned int last_mode_i = mode_i;
3636	/ Set cached VF to -1 prior to analysis, which indicates a mode has*
3637	failed. /*
3638	cached_vf_per_mode [last_mode_i] = -`1`;
3639	opt_loop_vec_info loop_vinfo
3640	= vect_analyze_loop_1 (loop, shared, loop_form_info: &loop_form_info,
3641	NULL, vector_modes, mode_i,
3642	autodetected_vector_mode, fatal);
3643	if (fatal)
3644	break;
3645
3646	if (loop_vinfo)
3647	{
3648	/ Analyzis has been successful so update the VF value. The*
3649	VF should always be a multiple of unroll_factor and we want to
3650	capture the original VF here. /*
3651	cached_vf_per_mode [last_mode_i]
3652	= exact_div (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3653	b: loop_vinfo ->suggested_unroll_factor);
3654	/ Once we hit the desired simdlen for the first time,*
3655	discard any previous attempts. /*
3656	if (simdlen
3657	&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
3658	{
3659	delete first_loop_vinfo;
3660	first_loop_vinfo = opt_loop_vec_info::success (NULL);
3661	simdlen = `0`;
3662	}
3663	else if (pick_lowest_cost_p
3664	&& first_loop_vinfo
3665	&& vect_joust_loop_vinfos (new_loop_vinfo: loop_vinfo, old_loop_vinfo: first_loop_vinfo))
3666	{
3667	/ Pick loop_vinfo over first_loop_vinfo. /
3668	delete first_loop_vinfo;
3669	first_loop_vinfo = opt_loop_vec_info::success (NULL);
3670	}
3671	if (first_loop_vinfo == NULL)
3672	first_loop_vinfo = loop_vinfo;
3673	else
3674	{
3675	delete loop_vinfo;
3676	loop_vinfo = opt_loop_vec_info::success (NULL);
3677	}
3678
3679	/ Commit to first_loop_vinfo if we have no reason to try*
3680	alternatives. /*
3681	if (!simdlen && !pick_lowest_cost_p)
3682	break;
3683	}
3684	if (mode_i == vector_modes.length ()
3685	\|\| autodetected_vector_mode == VOIDmode)
3686	break;
3687
3688	/ Try the next biggest vector size. /
3689	if (dump_enabled_p ())
3690	dump_printf_loc (MSG_NOTE, vect_location,
3691	"***** Re-trying analysis with vector mode %s\n",
3692	GET_MODE_NAME (vector_modes[mode_i]));
3693	}
3694	if (!first_loop_vinfo)
3695	return opt_loop_vec_info::propagate_failure (other: res);
3696
3697	if (dump_enabled_p ())
3698	dump_printf_loc (MSG_NOTE, vect_location,
3699	"***** Choosing vector mode %s\n",
3700	GET_MODE_NAME (first_loop_vinfo ->vector_mode));
3701
3702	/ Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is*
3703	enabled, SIMDUID is not set, it is the innermost loop and we have
3704	either already found the loop's SIMDLEN or there was no SIMDLEN to
3705	begin with.
3706	TODO: Enable epilogue vectorization for loops with SIMDUID set. /*
3707	bool vect_epilogues = (!simdlen
3708	&& loop->inner == NULL
3709	&& param_vect_epilogues_nomask
3710	&& LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
3711	/ No code motion support for multiple epilogues so for now*
3712	not supported when multiple exits. /*
3713	&& !LOOP_VINFO_EARLY_BREAKS (first_loop_vinfo)
3714	&& !loop->simduid);
3715	if (!vect_epilogues)
3716	return first_loop_vinfo;
3717
3718	/ Now analyze first_loop_vinfo for epilogue vectorization. /
3719	poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
3720
3721	/ For epilogues start the analysis from the first mode. The motivation*
3722	behind starting from the beginning comes from cases where the VECTOR_MODES
3723	array may contain length-agnostic and length-specific modes. Their
3724	ordering is not guaranteed, so we could end up picking a mode for the main
3725	loop that is after the epilogue's optimal mode. /*
3726	vector_modes [`0`] = autodetected_vector_mode;
3727	mode_i = `0`;
3728
3729	bool supports_partial_vectors =
3730	partial_vectors_supported_p () && param_vect_partial_vector_usage != `0`;
3731	poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
3732
3733	while (`1`)
3734	{
3735	/ If the target does not support partial vectors we can shorten the*
3736	number of modes to analyze for the epilogue as we know we can't pick a
3737	mode that would lead to a VF at least as big as the
3738	FIRST_VINFO_VF. /*
3739	if (!supports_partial_vectors
3740	&& maybe_ge (cached_vf_per_mode[mode_i], first_vinfo_vf))
3741	{
3742	mode_i++;
3743	if (mode_i == vector_modes.length ())
3744	break;
3745	continue;
3746	}
3747
3748	if (dump_enabled_p ())
3749	dump_printf_loc (MSG_NOTE, vect_location,
3750	"***** Re-trying epilogue analysis with vector "
3751	"mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
3752
3753	bool fatal;
3754	opt_loop_vec_info loop_vinfo
3755	= vect_analyze_loop_1 (loop, shared, loop_form_info: &loop_form_info,
3756	main_loop_vinfo: first_loop_vinfo,
3757	vector_modes, mode_i,
3758	autodetected_vector_mode, fatal);
3759	if (fatal)
3760	break;
3761
3762	if (loop_vinfo)
3763	{
3764	if (pick_lowest_cost_p)
3765	{
3766	/ Keep trying to roll back vectorization attempts while the*
3767	loop_vec_infos they produced were worse than this one. /*
3768	vec<loop_vec_info> &vinfos = first_loop_vinfo ->epilogue_vinfos;
3769	while (!vinfos.is_empty ()
3770	&& vect_joust_loop_vinfos (new_loop_vinfo: loop_vinfo, old_loop_vinfo: vinfos.last ()))
3771	{
3772	gcc_assert (vect_epilogues);
3773	delete vinfos.pop ();
3774	}
3775	}
3776	/ For now only allow one epilogue loop. /
3777	if (first_loop_vinfo ->epilogue_vinfos.is_empty ())
3778	{
3779	first_loop_vinfo ->epilogue_vinfos.safe_push (obj: loop_vinfo);
3780	poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
3781	gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
3782	\|\| maybe_ne (lowest_th, `0U`));
3783	/ Keep track of the known smallest versioning*
3784	threshold. /*
3785	if (ordered_p (a: lowest_th, b: th))
3786	lowest_th = ordered_min (a: lowest_th, b: th);
3787	}
3788	else
3789	{
3790	delete loop_vinfo;
3791	loop_vinfo = opt_loop_vec_info::success (NULL);
3792	}
3793
3794	/ For now only allow one epilogue loop, but allow*
3795	pick_lowest_cost_p to replace it, so commit to the
3796	first epilogue if we have no reason to try alternatives. /*
3797	if (!pick_lowest_cost_p)
3798	break;
3799	}
3800
3801	if (mode_i == vector_modes.length ())
3802	break;
3803
3804	}
3805
3806	if (!first_loop_vinfo ->epilogue_vinfos.is_empty ())
3807	{
3808	LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
3809	if (dump_enabled_p ())
3810	dump_printf_loc (MSG_NOTE, vect_location,
3811	"***** Choosing epilogue vector mode %s\n",
3812	GET_MODE_NAME
3813	(first_loop_vinfo ->epilogue_vinfos[`0`]->vector_mode));
3814	}
3815
3816	return first_loop_vinfo;
3817	}
3818
3819	/ Return true if there is an in-order reduction function for CODE, storing*
3820	it in REDUC_FN if so. /
3821
3822	static bool
3823	fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
3824	{
3825	/ We support MINUS_EXPR by negating the operand. This also preserves an*
3826	initial -0.0 since -0.0 - 0.0 (neutral op for MINUS_EXPR) == -0.0 +
3827	(-0.0) = -0.0. /*
3828	if (code == PLUS_EXPR \|\| code == MINUS_EXPR)
3829	{
3830	*reduc_fn = IFN_FOLD_LEFT_PLUS;
3831	return true;
3832	}
3833	return false;
3834	}
3835
3836	/ Function reduction_fn_for_scalar_code*
3837
3838	Input:
3839	CODE - tree_code of a reduction operations.
3840
3841	Output:
3842	REDUC_FN - the corresponding internal function to be used to reduce the
3843	vector of partial results into a single scalar result, or IFN_LAST
3844	if the operation is a supported reduction operation, but does not have
3845	such an internal function.
3846
3847	Return FALSE if CODE currently cannot be vectorized as reduction. /*
3848
3849	bool
3850	reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
3851	{
3852	if (code.is_tree_code ())
3853	switch (tree_code (code))
3854	{
3855	case MAX_EXPR:
3856	*reduc_fn = IFN_REDUC_MAX;
3857	return true;
3858
3859	case MIN_EXPR:
3860	*reduc_fn = IFN_REDUC_MIN;
3861	return true;
3862
3863	case PLUS_EXPR:
3864	*reduc_fn = IFN_REDUC_PLUS;
3865	return true;
3866
3867	case BIT_AND_EXPR:
3868	*reduc_fn = IFN_REDUC_AND;
3869	return true;
3870
3871	case BIT_IOR_EXPR:
3872	*reduc_fn = IFN_REDUC_IOR;
3873	return true;
3874
3875	case BIT_XOR_EXPR:
3876	*reduc_fn = IFN_REDUC_XOR;
3877	return true;
3878
3879	case MULT_EXPR:
3880	case MINUS_EXPR:
3881	*reduc_fn = IFN_LAST;
3882	return true;
3883
3884	default:
3885	return false;
3886	}
3887	else
3888	switch (combined_fn (code))
3889	{
3890	CASE_CFN_FMAX:
3891	*reduc_fn = IFN_REDUC_FMAX;
3892	return true;
3893
3894	CASE_CFN_FMIN:
3895	*reduc_fn = IFN_REDUC_FMIN;
3896	return true;
3897
3898	default:
3899	return false;
3900	}
3901	}
3902
3903	/ If there is a neutral value X such that a reduction would not be affected*
3904	by the introduction of additional X elements, return that X, otherwise
3905	return null. CODE is the code of the reduction and SCALAR_TYPE is type
3906	of the scalar elements. If the reduction has just a single initial value
3907	then INITIAL_VALUE is that value, otherwise it is null.
3908	If AS_INITIAL is TRUE the value is supposed to be used as initial value.
3909	In that case no signed zero is returned. /*
3910
3911	tree
3912	neutral_op_for_reduction (tree scalar_type, code_helper code,
3913	tree initial_value, bool as_initial)
3914	{
3915	if (code.is_tree_code ())
3916	switch (tree_code (code))
3917	{
3918	case DOT_PROD_EXPR:
3919	case SAD_EXPR:
3920	case MINUS_EXPR:
3921	case BIT_IOR_EXPR:
3922	case BIT_XOR_EXPR:
3923	return build_zero_cst (scalar_type);
3924	case WIDEN_SUM_EXPR:
3925	case PLUS_EXPR:
3926	if (!as_initial && HONOR_SIGNED_ZEROS (scalar_type))
3927	return build_real (scalar_type, dconstm0);
3928	else
3929	return build_zero_cst (scalar_type);
3930
3931	case MULT_EXPR:
3932	return build_one_cst (scalar_type);
3933
3934	case BIT_AND_EXPR:
3935	return build_all_ones_cst (scalar_type);
3936
3937	case MAX_EXPR:
3938	case MIN_EXPR:
3939	return initial_value;
3940
3941	default:
3942	return NULL_TREE;
3943	}
3944	else
3945	switch (combined_fn (code))
3946	{
3947	CASE_CFN_FMIN:
3948	CASE_CFN_FMAX:
3949	return initial_value;
3950
3951	default:
3952	return NULL_TREE;
3953	}
3954	}
3955
3956	/ Error reporting helper for vect_is_simple_reduction below. GIMPLE statement*
3957	STMT is printed with a message MSG. /*
3958
3959	static void
3960	report_vect_op (dump_flags_t msg_type, gimple stmt, const* char *msg)
3961	{
3962	dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
3963	}
3964
3965	/ Return true if we need an in-order reduction for operation CODE*
3966	on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
3967	overflow must wrap. /*
3968
3969	bool
3970	needs_fold_left_reduction_p (tree type, code_helper code)
3971	{
3972	/ CHECKME: check for !flag_finite_math_only too? /
3973	if (SCALAR_FLOAT_TYPE_P (type))
3974	{
3975	if (code.is_tree_code ())
3976	switch (tree_code (code))
3977	{
3978	case MIN_EXPR:
3979	case MAX_EXPR:
3980	return false;
3981
3982	default:
3983	return !flag_associative_math;
3984	}
3985	else
3986	switch (combined_fn (code))
3987	{
3988	CASE_CFN_FMIN:
3989	CASE_CFN_FMAX:
3990	return false;
3991
3992	default:
3993	return !flag_associative_math;
3994	}
3995	}
3996
3997	if (INTEGRAL_TYPE_P (type))
3998	return (!code.is_tree_code ()
3999	\|\| !operation_no_trapping_overflow (type, tree_code (code)));
4000
4001	if (SAT_FIXED_POINT_TYPE_P (type))
4002	return true;
4003
4004	return false;
4005	}
4006
4007	/ Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and*
4008	has a handled computation expression. Store the main reduction
4009	operation in CODE. /
4010
4011	static bool
4012	check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
4013	tree loop_arg, code_helper *code,
4014	vec<std::pair<ssa_op_iter, use_operand_p> > &path)
4015	{
4016	auto_bitmap visited;
4017	tree lookfor = PHI_RESULT (phi);
4018	ssa_op_iter curri;
4019	use_operand_p curr = op_iter_init_phiuse (ptr: &curri, phi, SSA_OP_USE);
4020	while (USE_FROM_PTR (curr) != loop_arg)
4021	curr = op_iter_next_use (ptr: &curri);
4022	curri.i = curri.numops;
4023	do
4024	{
4025	path.safe_push (obj: std::make_pair (x&: curri, y&: curr));
4026	tree use = USE_FROM_PTR (curr);
4027	if (use == lookfor)
4028	break;
4029	gimple *def = SSA_NAME_DEF_STMT (use);
4030	if (gimple_nop_p (g: def)
4031	\|\| ! flow_bb_inside_loop_p (loop, gimple_bb (g: def)))
4032	{
4033	pop:
4034	do
4035	{
4036	std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
4037	curri = x.first;
4038	curr = x.second;
4039	do
4040	curr = op_iter_next_use (ptr: &curri);
4041	/ Skip already visited or non-SSA operands (from iterating*
4042	over PHI args). /*
4043	while (curr != NULL_USE_OPERAND_P
4044	&& (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
4045	\|\| ! bitmap_set_bit (visited,
4046	SSA_NAME_VERSION
4047	(USE_FROM_PTR (curr)))));
4048	}
4049	while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
4050	if (curr == NULL_USE_OPERAND_P)
4051	break;
4052	}
4053	else
4054	{
4055	if (gimple_code (g: def) == GIMPLE_PHI)
4056	curr = op_iter_init_phiuse (ptr: &curri, phi: as_a <gphi *>(p: def), SSA_OP_USE);
4057	else
4058	curr = op_iter_init_use (ptr: &curri, stmt: def, SSA_OP_USE);
4059	while (curr != NULL_USE_OPERAND_P
4060	&& (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
4061	\|\| ! bitmap_set_bit (visited,
4062	SSA_NAME_VERSION
4063	(USE_FROM_PTR (curr)))))
4064	curr = op_iter_next_use (ptr: &curri);
4065	if (curr == NULL_USE_OPERAND_P)
4066	goto pop;
4067	}
4068	}
4069	while (`1`);
4070	if (dump_file && (dump_flags & TDF_DETAILS))
4071	{
4072	dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
4073	unsigned i;
4074	std::pair<ssa_op_iter, use_operand_p> *x;
4075	FOR_EACH_VEC_ELT (path, i, x)
4076	dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second));
4077	dump_printf (MSG_NOTE, "\n");
4078	}
4079
4080	/ Check whether the reduction path detected is valid. /
4081	bool fail = path.length () == `0`;
4082	bool neg = false;
4083	int sign = -`1`;
4084	*code = ERROR_MARK;
4085	for (unsigned i = `1`; i < path.length (); ++i)
4086	{
4087	gimple *use_stmt = USE_STMT (path[i].second);
4088	gimple_match_op op;
4089	if (!gimple_extract_op (use_stmt, &op))
4090	{
4091	fail = true;
4092	break;
4093	}
4094	unsigned int opi = op.num_ops;
4095	if (gassign assign = dyn_cast<gassign > (p: use_stmt))
4096	{
4097	/ The following make sure we can compute the operand index*
4098	easily plus it mostly disallows chaining via COND_EXPR condition
4099	operands. /*
4100	for (opi = `0`; opi < op.num_ops; ++opi)
4101	if (gimple_assign_rhs1_ptr (gs: assign) + opi == path [i].second->use)
4102	break;
4103	}
4104	else if (gcall call = dyn_cast<gcall > (p: use_stmt))
4105	{
4106	for (opi = `0`; opi < op.num_ops; ++opi)
4107	if (gimple_call_arg_ptr (gs: call, index: opi) == path [i].second->use)
4108	break;
4109	}
4110	if (opi == op.num_ops)
4111	{
4112	fail = true;
4113	break;
4114	}
4115	op.code = canonicalize_code (op.code, op.type);
4116	if (op.code == MINUS_EXPR)
4117	{
4118	op.code = PLUS_EXPR;
4119	/ Track whether we negate the reduction value each iteration. /
4120	if (op.ops[`1`] == op.ops[opi])
4121	neg = ! neg;
4122	}
4123	else if (op.code == IFN_COND_SUB)
4124	{
4125	op.code = IFN_COND_ADD;
4126	/ Track whether we negate the reduction value each iteration. /
4127	if (op.ops[`2`] == op.ops[opi])
4128	neg = ! neg;
4129	}
4130	if (CONVERT_EXPR_CODE_P (op.code)
4131	&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[`0`])))
4132	;
4133	else if (*code == ERROR_MARK)
4134	{
4135	*code = op.code;
4136	sign = TYPE_SIGN (op.type);
4137	}
4138	else if (op.code != *code)
4139	{
4140	fail = true;
4141	break;
4142	}
4143	else if ((op.code == MIN_EXPR
4144	\|\| op.code == MAX_EXPR)
4145	&& sign != TYPE_SIGN (op.type))
4146	{
4147	fail = true;
4148	break;
4149	}
4150	/ Check there's only a single stmt the op is used on. For the*
4151	not value-changing tail and the last stmt allow out-of-loop uses.
4152	??? We could relax this and handle arbitrary live stmts by
4153	forcing a scalar epilogue for example. /*
4154	imm_use_iterator imm_iter;
4155	use_operand_p use_p;
4156	gimple *op_use_stmt;
4157	unsigned cnt = `0`;
4158	bool cond_fn_p = op.code.is_internal_fn ()
4159	&& (conditional_internal_fn_code (internal_fn (op.code))
4160	!= ERROR_MARK);
4161
4162	FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi])
4163	{
4164	/ In case of a COND_OP (mask, op1, op2, op1) reduction we might have*
4165	op1 twice (once as definition, once as else) in the same operation.
4166	Allow this. /*
4167	if (cond_fn_p && op_use_stmt == use_stmt)
4168	{
4169	gcall call = as_a<gcall > (p: use_stmt);
4170	unsigned else_pos
4171	= internal_fn_else_index (internal_fn (op.code));
4172
4173	for (unsigned int j = `0`; j < gimple_call_num_args (gs: call); ++j)
4174	{
4175	if (j == else_pos)
4176	continue;
4177	if (gimple_call_arg (gs: call, index: j) == op.ops[opi])
4178	cnt++;
4179	}
4180	}
4181	else if (!is_gimple_debug (gs: op_use_stmt)
4182	&& (*code != ERROR_MARK
4183	\|\| flow_bb_inside_loop_p (loop,
4184	gimple_bb (g: op_use_stmt))))
4185	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4186	cnt++;
4187	}
4188
4189	if (cnt != `1`)
4190	{
4191	fail = true;
4192	break;
4193	}
4194	}
4195	return ! fail && ! neg && *code != ERROR_MARK;
4196	}
4197
4198	bool
4199	check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
4200	tree loop_arg, enum tree_code code)
4201	{
4202	auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
4203	code_helper code_;
4204	return (check_reduction_path (loc, loop, phi, loop_arg, code: &code_, path)
4205	&& code_ == code);
4206	}
4207
4208
4209
4210	/ Function vect_is_simple_reduction*
4211
4212	(1) Detect a cross-iteration def-use cycle that represents a simple
4213	reduction computation. We look for the following pattern:
4214
4215	loop_header:
4216	a1 = phi < a0, a2 >
4217	a3 = ...
4218	a2 = operation (a3, a1)
4219
4220	or
4221
4222	a3 = ...
4223	loop_header:
4224	a1 = phi < a0, a2 >
4225	a2 = operation (a3, a1)
4226
4227	such that:
4228	1. operation is commutative and associative and it is safe to
4229	change the order of the computation
4230	2. no uses for a2 in the loop (a2 is used out of the loop)
4231	3. no uses of a1 in the loop besides the reduction operation
4232	4. no uses of a1 outside the loop.
4233
4234	Conditions 1,4 are tested here.
4235	Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
4236
4237	(2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
4238	nested cycles.
4239
4240	(3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
4241	reductions:
4242
4243	a1 = phi < a0, a2 >
4244	inner loop (def of a3)
4245	a2 = phi < a3 >
4246
4247	(4) Detect condition expressions, ie:
4248	for (int i = 0; i < N; i++)
4249	if (a[i] < val)
4250	ret_val = a[i];
4251
4252	*/
4253
4254	static stmt_vec_info
4255	vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
4256	bool double_reduc, bool* reduc_chain_p, bool* slp)
4257	{
4258	gphi phi = as_a <gphi > (p: phi_info->stmt);
4259	gimple *phi_use_stmt = NULL;
4260	imm_use_iterator imm_iter;
4261	use_operand_p use_p;
4262
4263	double_reduc = false*;
4264	reduc_chain_p = false*;
4265	STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
4266
4267	tree phi_name = PHI_RESULT (phi);
4268	/ ??? If there are no uses of the PHI result the inner loop reduction*
4269	won't be detected as possibly double-reduction by vectorizable_reduction
4270	because that tries to walk the PHI arg from the preheader edge which
4271	can be constant. See PR60382. /*
4272	if (has_zero_uses (var: phi_name))
4273	return NULL;
4274	class loop *loop = (gimple_bb (g: phi))->loop_father;
4275	unsigned nphi_def_loop_uses = `0`;
4276	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
4277	{
4278	gimple *use_stmt = USE_STMT (use_p);
4279	if (is_gimple_debug (gs: use_stmt))
4280	continue;
4281
4282	if (!flow_bb_inside_loop_p (loop, gimple_bb (g: use_stmt)))
4283	{
4284	if (dump_enabled_p ())
4285	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4286	"intermediate value used outside loop.\n");
4287
4288	return NULL;
4289	}
4290
4291	/ In case of a COND_OP (mask, op1, op2, op1) reduction we might have*
4292	op1 twice (once as definition, once as else) in the same operation.
4293	Only count it as one. /*
4294	if (use_stmt != phi_use_stmt)
4295	{
4296	nphi_def_loop_uses++;
4297	phi_use_stmt = use_stmt;
4298	}
4299	}
4300
4301	tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
4302	if (TREE_CODE (latch_def) != SSA_NAME)
4303	{
4304	if (dump_enabled_p ())
4305	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4306	"reduction: not ssa_name: %T\n", latch_def);
4307	return NULL;
4308	}
4309
4310	stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
4311	if (!def_stmt_info
4312	\|\| !flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt_info->stmt)))
4313	return NULL;
4314
4315	bool nested_in_vect_loop
4316	= flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
4317	unsigned nlatch_def_loop_uses = `0`;
4318	auto_vec<gphi *, `3`> lcphis;
4319	bool inner_loop_of_double_reduc = false;
4320	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)
4321	{
4322	gimple *use_stmt = USE_STMT (use_p);
4323	if (is_gimple_debug (gs: use_stmt))
4324	continue;
4325	if (flow_bb_inside_loop_p (loop, gimple_bb (g: use_stmt)))
4326	nlatch_def_loop_uses++;
4327	else
4328	{
4329	/ We can have more than one loop-closed PHI. /
4330	lcphis.safe_push (obj: as_a <gphi *> (p: use_stmt));
4331	if (nested_in_vect_loop
4332	&& (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
4333	== vect_double_reduction_def))
4334	inner_loop_of_double_reduc = true;
4335	}
4336	}
4337
4338	/ If we are vectorizing an inner reduction we are executing that*
4339	in the original order only in case we are not dealing with a
4340	double reduction. /*
4341	if (nested_in_vect_loop && !inner_loop_of_double_reduc)
4342	{
4343	if (dump_enabled_p ())
4344	report_vect_op (msg_type: MSG_NOTE, stmt: def_stmt_info->stmt,
4345	msg: "detected nested cycle: ");
4346	return def_stmt_info;
4347	}
4348
4349	/ When the inner loop of a double reduction ends up with more than*
4350	one loop-closed PHI we have failed to classify alternate such
4351	PHIs as double reduction, leading to wrong code. See PR103237. /*
4352	if (inner_loop_of_double_reduc && lcphis.length () != `1`)
4353	{
4354	if (dump_enabled_p ())
4355	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4356	"unhandle double reduction\n");
4357	return NULL;
4358	}
4359
4360	/ If this isn't a nested cycle or if the nested cycle reduction value*
4361	is used ouside of the inner loop we cannot handle uses of the reduction
4362	value. /*
4363	if (nlatch_def_loop_uses > `1` \|\| nphi_def_loop_uses > `1`)
4364	{
4365	if (dump_enabled_p ())
4366	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4367	"reduction used in loop.\n");
4368	return NULL;
4369	}
4370
4371	/ If DEF_STMT is a phi node itself, we expect it to have a single argument*
4372	defined in the inner loop. /*
4373	if (gphi def_stmt = dyn_cast <gphi > (p: def_stmt_info->stmt))
4374	{
4375	tree op1 = PHI_ARG_DEF (def_stmt, `0`);
4376	if (gimple_phi_num_args (gs: def_stmt) != `1`
4377	\|\| TREE_CODE (op1) != SSA_NAME)
4378	{
4379	if (dump_enabled_p ())
4380	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4381	"unsupported phi node definition.\n");
4382
4383	return NULL;
4384	}
4385
4386	/ Verify there is an inner cycle composed of the PHI phi_use_stmt*
4387	and the latch definition op1. /*
4388	gimple *def1 = SSA_NAME_DEF_STMT (op1);
4389	if (gimple_bb (g: def1)
4390	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt))
4391	&& loop->inner
4392	&& flow_bb_inside_loop_p (loop->inner, gimple_bb (g: def1))
4393	&& (is_gimple_assign (gs: def1) \|\| is_gimple_call (gs: def1))
4394	&& is_a <gphi *> (p: phi_use_stmt)
4395	&& flow_bb_inside_loop_p (loop->inner, gimple_bb (g: phi_use_stmt))
4396	&& (op1 == PHI_ARG_DEF_FROM_EDGE (phi_use_stmt,
4397	loop_latch_edge (loop->inner))))
4398	{
4399	if (dump_enabled_p ())
4400	report_vect_op (msg_type: MSG_NOTE, stmt: def_stmt,
4401	msg: "detected double reduction: ");
4402
4403	double_reduc = true*;
4404	return def_stmt_info;
4405	}
4406
4407	return NULL;
4408	}
4409
4410	/ Look for the expression computing latch_def from then loop PHI result. /
4411	auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
4412	code_helper code;
4413	if (check_reduction_path (loc: vect_location, loop, phi, loop_arg: latch_def, code: &code,
4414	path))
4415	{
4416	STMT_VINFO_REDUC_CODE (phi_info) = code;
4417	if (code == COND_EXPR && !nested_in_vect_loop)
4418	STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
4419
4420	/ Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP*
4421	reduction chain for which the additional restriction is that
4422	all operations in the chain are the same. /*
4423	auto_vec<stmt_vec_info, `8`> reduc_chain;
4424	unsigned i;
4425	bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
4426	for (i = path.length () - `1`; i >= `1`; --i)
4427	{
4428	gimple *stmt = USE_STMT (path[i].second);
4429	stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
4430	gimple_match_op op;
4431	if (!gimple_extract_op (stmt, &op))
4432	gcc_unreachable ();
4433	if (gassign assign = dyn_cast<gassign > (p: stmt))
4434	STMT_VINFO_REDUC_IDX (stmt_info)
4435	= path [i].second->use - gimple_assign_rhs1_ptr (gs: assign);
4436	else
4437	{
4438	gcall call = as_a<gcall > (p: stmt);
4439	STMT_VINFO_REDUC_IDX (stmt_info)
4440	= path [i].second->use - gimple_call_arg_ptr (gs: call, index: `0`);
4441	}
4442	bool leading_conversion = (CONVERT_EXPR_CODE_P (op.code)
4443	&& (i == `1` \|\| i == path.length () - `1`));
4444	if ((op.code != code && !leading_conversion)
4445	/ We can only handle the final value in epilogue*
4446	generation for reduction chains. /*
4447	\|\| (i != `1` && !has_single_use (var: gimple_get_lhs (stmt))))
4448	is_slp_reduc = false;
4449	/ For reduction chains we support a trailing/leading*
4450	conversions. We do not store those in the actual chain. /*
4451	if (leading_conversion)
4452	continue;
4453	reduc_chain.safe_push (obj: stmt_info);
4454	}
4455	if (slp && is_slp_reduc && reduc_chain.length () > `1`)
4456	{
4457	for (unsigned i = `0`; i < reduc_chain.length () - `1`; ++i)
4458	{
4459	REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain [`0`];
4460	REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain [i+`1`];
4461	}
4462	REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain [`0`];
4463	REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
4464
4465	/ Save the chain for further analysis in SLP detection. /
4466	LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (obj: reduc_chain [`0`]);
4467	REDUC_GROUP_SIZE (reduc_chain[`0`]) = reduc_chain.length ();
4468
4469	reduc_chain_p = true*;
4470	if (dump_enabled_p ())
4471	dump_printf_loc (MSG_NOTE, vect_location,
4472	"reduction: detected reduction chain\n");
4473	}
4474	else if (dump_enabled_p ())
4475	dump_printf_loc (MSG_NOTE, vect_location,
4476	"reduction: detected reduction\n");
4477
4478	return def_stmt_info;
4479	}
4480
4481	if (dump_enabled_p ())
4482	dump_printf_loc (MSG_NOTE, vect_location,
4483	"reduction: unknown pattern\n");
4484
4485	return NULL;
4486	}
4487
4488	/ Estimate the number of peeled epilogue iterations for LOOP_VINFO.*
4489	PEEL_ITERS_PROLOGUE is the number of peeled prologue iterations,
4490	or -1 if not known. /*
4491
4492	static int
4493	vect_get_peel_iters_epilogue (loop_vec_info loop_vinfo, int peel_iters_prologue)
4494	{
4495	int assumed_vf = vect_vf_for_cost (loop_vinfo);
4496	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) \|\| peel_iters_prologue == -`1`)
4497	{
4498	if (dump_enabled_p ())
4499	dump_printf_loc (MSG_NOTE, vect_location,
4500	"cost model: epilogue peel iters set to vf/2 "
4501	"because loop iterations are unknown .\n");
4502	return assumed_vf / `2`;
4503	}
4504	else
4505	{
4506	int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
4507	peel_iters_prologue = MIN (niters, peel_iters_prologue);
4508	int peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
4509	/ If we need to peel for gaps, but no peeling is required, we have to*
4510	peel VF iterations. /*
4511	if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !peel_iters_epilogue)
4512	peel_iters_epilogue = assumed_vf;
4513	return peel_iters_epilogue;
4514	}
4515	}
4516
4517	/ Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. /
4518	int
4519	vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
4520	int *peel_iters_epilogue,
4521	stmt_vector_for_cost *scalar_cost_vec,
4522	stmt_vector_for_cost *prologue_cost_vec,
4523	stmt_vector_for_cost *epilogue_cost_vec)
4524	{
4525	int retval = `0`;
4526
4527	*peel_iters_epilogue
4528	= vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue);
4529
4530	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
4531	{
4532	/ If peeled iterations are known but number of scalar loop*
4533	iterations are unknown, count a taken branch per peeled loop. /*
4534	if (peel_iters_prologue > `0`)
4535	retval = record_stmt_cost (prologue_cost_vec, `1`, cond_branch_taken,
4536	vect_prologue);
4537	if (*peel_iters_epilogue > `0`)
4538	retval += record_stmt_cost (epilogue_cost_vec, `1`, cond_branch_taken,
4539	vect_epilogue);
4540	}
4541
4542	stmt_info_for_cost *si;
4543	int j;
4544	if (peel_iters_prologue)
4545	FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
4546	retval += record_stmt_cost (body_cost_vec: prologue_cost_vec,
4547	count: si->count * peel_iters_prologue,
4548	kind: si->kind, stmt_info: si->stmt_info, misalign: si->misalign,
4549	where: vect_prologue);
4550	if (*peel_iters_epilogue)
4551	FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
4552	retval += record_stmt_cost (body_cost_vec: epilogue_cost_vec,
4553	count: si->count * *peel_iters_epilogue,
4554	kind: si->kind, stmt_info: si->stmt_info, misalign: si->misalign,
4555	where: vect_epilogue);
4556
4557	return retval;
4558	}
4559
4560	/ Function vect_estimate_min_profitable_iters*
4561
4562	Return the number of iterations required for the vector version of the
4563	loop to be profitable relative to the cost of the scalar version of the
4564	loop.
4565
4566	*RET_MIN_PROFITABLE_NITERS is a cost model profitability threshold
4567	of iterations for vectorization. -1 value means loop vectorization
4568	is not profitable. This returned value may be used for dynamic
4569	profitability check.
4570
4571	*RET_MIN_PROFITABLE_ESTIMATE is a profitability threshold to be used
4572	for static check against estimated number of iterations. /*
4573
4574	static void
4575	vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
4576	int *ret_min_profitable_niters,
4577	int *ret_min_profitable_estimate,
4578	unsigned *suggested_unroll_factor)
4579	{
4580	int min_profitable_iters;
4581	int min_profitable_estimate;
4582	int peel_iters_prologue;
4583	int peel_iters_epilogue;
4584	unsigned vec_inside_cost = `0`;
4585	int vec_outside_cost = `0`;
4586	unsigned vec_prologue_cost = `0`;
4587	unsigned vec_epilogue_cost = `0`;
4588	int scalar_single_iter_cost = `0`;
4589	int scalar_outside_cost = `0`;
4590	int assumed_vf = vect_vf_for_cost (loop_vinfo);
4591	int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
4592	vector_costs *target_cost_data = loop_vinfo->vector_costs;
4593
4594	/ Cost model disabled. /
4595	if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
4596	{
4597	if (dump_enabled_p ())
4598	dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
4599	*ret_min_profitable_niters = `0`;
4600	*ret_min_profitable_estimate = `0`;
4601	return;
4602	}
4603
4604	/ Requires loop versioning tests to handle misalignment. /
4605	if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
4606	{
4607	/ FIXME: Make cost depend on complexity of individual check. /
4608	unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
4609	(void) add_stmt_cost (costs: target_cost_data, count: len, kind: scalar_stmt, where: vect_prologue);
4610	if (dump_enabled_p ())
4611	dump_printf (MSG_NOTE,
4612	"cost model: Adding cost of checks for loop "
4613	"versioning to treat misalignment.\n");
4614	}
4615
4616	/ Requires loop versioning with alias checks. /
4617	if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
4618	{
4619	/ FIXME: Make cost depend on complexity of individual check. /
4620	unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
4621	(void) add_stmt_cost (costs: target_cost_data, count: len, kind: scalar_stmt, where: vect_prologue);
4622	len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
4623	if (len)
4624	/ Count LEN - 1 ANDs and LEN comparisons. /
4625	(void) add_stmt_cost (costs: target_cost_data, count: len * `2` - `1`,
4626	kind: scalar_stmt, where: vect_prologue);
4627	len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
4628	if (len)
4629	{
4630	/ Count LEN - 1 ANDs and LEN comparisons. /
4631	unsigned int nstmts = len * `2` - `1`;
4632	/ +1 for each bias that needs adding. /
4633	for (unsigned int i = `0`; i < len; ++i)
4634	if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
4635	nstmts += `1`;
4636	(void) add_stmt_cost (costs: target_cost_data, count: nstmts,
4637	kind: scalar_stmt, where: vect_prologue);
4638	}
4639	if (dump_enabled_p ())
4640	dump_printf (MSG_NOTE,
4641	"cost model: Adding cost of checks for loop "
4642	"versioning aliasing.\n");
4643	}
4644
4645	/ Requires loop versioning with niter checks. /
4646	if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
4647	{
4648	/ FIXME: Make cost depend on complexity of individual check. /
4649	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: vector_stmt,
4650	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_prologue);
4651	if (dump_enabled_p ())
4652	dump_printf (MSG_NOTE,
4653	"cost model: Adding cost of checks for loop "
4654	"versioning niters.\n");
4655	}
4656
4657	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
4658	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4659	where: vect_prologue);
4660
4661	/ Count statements in scalar loop. Using this as scalar cost for a single*
4662	iteration for now.
4663
4664	TODO: Add outer loop support.
4665
4666	TODO: Consider assigning different costs to different scalar
4667	statements. /*
4668
4669	scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
4670
4671	/ Add additional cost for the peeled instructions in prologue and epilogue*
4672	loop. (For fully-masked loops there will be no peeling.)
4673
4674	FORNOW: If we don't know the value of peel_iters for prologue or epilogue
4675	at compile-time - we assume it's vf/2 (the worst would be vf-1).
4676
4677	TODO: Build an expression that represents peel_iters for prologue and
4678	epilogue to be used in a run-time test. /*
4679
4680	bool prologue_need_br_taken_cost = false;
4681	bool prologue_need_br_not_taken_cost = false;
4682
4683	/ Calculate peel_iters_prologue. /
4684	if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
4685	peel_iters_prologue = `0`;
4686	else if (npeel < `0`)
4687	{
4688	peel_iters_prologue = assumed_vf / `2`;
4689	if (dump_enabled_p ())
4690	dump_printf (MSG_NOTE, "cost model: "
4691	"prologue peel iters set to vf/2.\n");
4692
4693	/ If peeled iterations are unknown, count a taken branch and a not taken*
4694	branch per peeled loop. Even if scalar loop iterations are known,
4695	vector iterations are not known since peeled prologue iterations are
4696	not known. Hence guards remain the same. /*
4697	prologue_need_br_taken_cost = true;
4698	prologue_need_br_not_taken_cost = true;
4699	}
4700	else
4701	{
4702	peel_iters_prologue = npeel;
4703	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_prologue > `0`)
4704	/ If peeled iterations are known but number of scalar loop*
4705	iterations are unknown, count a taken branch per peeled loop. /*
4706	prologue_need_br_taken_cost = true;
4707	}
4708
4709	bool epilogue_need_br_taken_cost = false;
4710	bool epilogue_need_br_not_taken_cost = false;
4711
4712	/ Calculate peel_iters_epilogue. /
4713	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
4714	/ We need to peel exactly one iteration for gaps. /
4715	peel_iters_epilogue = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? `1` : `0`;
4716	else if (npeel < `0`)
4717	{
4718	/ If peeling for alignment is unknown, loop bound of main loop*
4719	becomes unknown. /*
4720	peel_iters_epilogue = assumed_vf / `2`;
4721	if (dump_enabled_p ())
4722	dump_printf (MSG_NOTE, "cost model: "
4723	"epilogue peel iters set to vf/2 because "
4724	"peeling for alignment is unknown.\n");
4725
4726	/ See the same reason above in peel_iters_prologue calculation. /
4727	epilogue_need_br_taken_cost = true;
4728	epilogue_need_br_not_taken_cost = true;
4729	}
4730	else
4731	{
4732	peel_iters_epilogue = vect_get_peel_iters_epilogue (loop_vinfo, peel_iters_prologue: npeel);
4733	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && peel_iters_epilogue > `0`)
4734	/ If peeled iterations are known but number of scalar loop*
4735	iterations are unknown, count a taken branch per peeled loop. /*
4736	epilogue_need_br_taken_cost = true;
4737	}
4738
4739	stmt_info_for_cost *si;
4740	int j;
4741	/ Add costs associated with peel_iters_prologue. /
4742	if (peel_iters_prologue)
4743	FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
4744	{
4745	(void) add_stmt_cost (costs: target_cost_data,
4746	count: si->count * peel_iters_prologue, kind: si->kind,
4747	stmt_info: si->stmt_info, node: si->node, vectype: si->vectype,
4748	misalign: si->misalign, where: vect_prologue);
4749	}
4750
4751	/ Add costs associated with peel_iters_epilogue. /
4752	if (peel_iters_epilogue)
4753	FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
4754	{
4755	(void) add_stmt_cost (costs: target_cost_data,
4756	count: si->count * peel_iters_epilogue, kind: si->kind,
4757	stmt_info: si->stmt_info, node: si->node, vectype: si->vectype,
4758	misalign: si->misalign, where: vect_epilogue);
4759	}
4760
4761	/ Add possible cond_branch_taken/cond_branch_not_taken cost. /
4762
4763	if (prologue_need_br_taken_cost)
4764	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4765	where: vect_prologue);
4766
4767	if (prologue_need_br_not_taken_cost)
4768	(void) add_stmt_cost (costs: target_cost_data, count: `1`,
4769	kind: cond_branch_not_taken, where: vect_prologue);
4770
4771	if (epilogue_need_br_taken_cost)
4772	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: cond_branch_taken,
4773	where: vect_epilogue);
4774
4775	if (epilogue_need_br_not_taken_cost)
4776	(void) add_stmt_cost (costs: target_cost_data, count: `1`,
4777	kind: cond_branch_not_taken, where: vect_epilogue);
4778
4779	/ Take care of special costs for rgroup controls of partial vectors. /
4780	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
4781	&& (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
4782	== vect_partial_vectors_avx512))
4783	{
4784	/ Calculate how many masks we need to generate. /
4785	unsigned int num_masks = `0`;
4786	bool need_saturation = false;
4787	for (auto rgm : LOOP_VINFO_MASKS (loop_vinfo).rgc_vec)
4788	if (rgm.type)
4789	{
4790	unsigned nvectors = rgm.factor;
4791	num_masks += nvectors;
4792	if (TYPE_PRECISION (TREE_TYPE (rgm.compare_type))
4793	< TYPE_PRECISION (LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo)))
4794	need_saturation = true;
4795	}
4796
4797	/ ??? The target isn't able to identify the costs below as*
4798	producing masks so it cannot penaltize cases where we'd run
4799	out of mask registers for example. /*
4800
4801	/ ??? We are also failing to account for smaller vector masks*
4802	we generate by splitting larger masks in vect_get_loop_mask. /*
4803
4804	/ In the worst case, we need to generate each mask in the prologue*
4805	and in the loop body. We need one splat per group and one
4806	compare per mask.
4807
4808	Sometimes the prologue mask will fold to a constant,
4809	so the actual prologue cost might be smaller. However, it's
4810	simpler and safer to use the worst-case cost; if this ends up
4811	being the tie-breaker between vectorizing or not, then it's
4812	probably better not to vectorize. /*
4813	(void) add_stmt_cost (costs: target_cost_data,
4814	count: num_masks
4815	+ LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.length (),
4816	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4817	where: vect_prologue);
4818	(void) add_stmt_cost (costs: target_cost_data,
4819	count: num_masks
4820	+ LOOP_VINFO_MASKS (loop_vinfo).rgc_vec.length (),
4821	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`, where: vect_body);
4822
4823	/ When we need saturation we need it both in the prologue and*
4824	the epilogue. /*
4825	if (need_saturation)
4826	{
4827	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: scalar_stmt,
4828	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_prologue);
4829	(void) add_stmt_cost (costs: target_cost_data, count: `1`, kind: scalar_stmt,
4830	NULL, NULL, NULL_TREE, misalign: `0`, where: vect_body);
4831	}
4832	}
4833	else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
4834	&& (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
4835	== vect_partial_vectors_while_ult))
4836	{
4837	/ Calculate how many masks we need to generate. /
4838	unsigned int num_masks = `0`;
4839	rgroup_controls *rgm;
4840	unsigned int num_vectors_m1;
4841	FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo).rgc_vec,
4842	num_vectors_m1, rgm)
4843	if (rgm->type)
4844	num_masks += num_vectors_m1 + `1`;
4845	gcc_assert (num_masks > `0`);
4846
4847	/ In the worst case, we need to generate each mask in the prologue*
4848	and in the loop body. One of the loop body mask instructions
4849	replaces the comparison in the scalar loop, and since we don't
4850	count the scalar comparison against the scalar body, we shouldn't
4851	count that vector instruction against the vector body either.
4852
4853	Sometimes we can use unpacks instead of generating prologue
4854	masks and sometimes the prologue mask will fold to a constant,
4855	so the actual prologue cost might be smaller. However, it's
4856	simpler and safer to use the worst-case cost; if this ends up
4857	being the tie-breaker between vectorizing or not, then it's
4858	probably better not to vectorize. /*
4859	(void) add_stmt_cost (costs: target_cost_data, count: num_masks,
4860	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4861	where: vect_prologue);
4862	(void) add_stmt_cost (costs: target_cost_data, count: num_masks - `1`,
4863	kind: vector_stmt, NULL, NULL, NULL_TREE, misalign: `0`,
4864	where: vect_body);
4865	}
4866	else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
4867	{
4868	/ Referring to the functions vect_set_loop_condition_partial_vectors*
4869	and vect_set_loop_controls_directly, we need to generate each
4870	length in the prologue and in the loop body if required. Although
4871	there are some possible optimizations, we consider the worst case
4872	here. /*
4873
4874	bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
4875	signed char partial_load_store_bias
4876	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
4877	bool need_iterate_p
4878	= (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)
4879	&& !vect_known_niters_smaller_than_vf (loop_vinfo));
4880
4881	/ Calculate how many statements to be added. /
4882	unsigned int prologue_stmts = `0`;
4883	unsigned int body_stmts = `0`;
4884
4885	rgroup_controls *rgc;
4886	unsigned int num_vectors_m1;
4887	FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
4888	if (rgc->type)
4889	{
4890	/ May need one SHIFT for nitems_total computation. /
4891	unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
4892	if (nitems != `1` && !niters_known_p)
4893	prologue_stmts += `1`;
4894
4895	/ May need one MAX and one MINUS for wrap around. /
4896	if (vect_rgroup_iv_might_wrap_p (loop_vinfo, rgc))
4897	prologue_stmts += `2`;
4898
4899	/ Need one MAX and one MINUS for each batch limit excepting for*
4900	the 1st one. /*
4901	prologue_stmts += num_vectors_m1 * `2`;
4902
4903	unsigned int num_vectors = num_vectors_m1 + `1`;
4904
4905	/ Need to set up lengths in prologue, only one MIN required*
4906	for each since start index is zero. /*
4907	prologue_stmts += num_vectors;
4908
4909	/ If we have a non-zero partial load bias, we need one PLUS*
4910	to adjust the load length. /*
4911	if (partial_load_store_bias != `0`)
4912	body_stmts += `1`;
4913
4914	unsigned int length_update_cost = `0`;
4915	if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
4916	/ For decrement IV style, Each only need a single SELECT_VL*
4917	or MIN since beginning to calculate the number of elements
4918	need to be processed in current iteration. /*
4919	length_update_cost = `1`;
4920	else
4921	/ For increment IV stype, Each may need two MINs and one MINUS to*
4922	update lengths in body for next iteration. /*
4923	length_update_cost = `3`;
4924
4925	if (need_iterate_p)
4926	body_stmts += length_update_cost * num_vectors;
4927	}
4928
4929	(void) add_stmt_cost (costs: target_cost_data, count: prologue_stmts,
4930	kind: scalar_stmt, where: vect_prologue);
4931	(void) add_stmt_cost (costs: target_cost_data, count: body_stmts,
4932	kind: scalar_stmt, where: vect_body);
4933	}
4934
4935	/ FORNOW: The scalar outside cost is incremented in one of the*
4936	following ways:
4937
4938	1. The vectorizer checks for alignment and aliasing and generates
4939	a condition that allows dynamic vectorization. A cost model
4940	check is ANDED with the versioning condition. Hence scalar code
4941	path now has the added cost of the versioning check.
4942
4943	if (cost > th & versioning_check)
4944	jmp to vector code
4945
4946	Hence run-time scalar is incremented by not-taken branch cost.
4947
4948	2. The vectorizer then checks if a prologue is required. If the
4949	cost model check was not done before during versioning, it has to
4950	be done before the prologue check.
4951
4952	if (cost <= th)
4953	prologue = scalar_iters
4954	if (prologue == 0)
4955	jmp to vector code
4956	else
4957	execute prologue
4958	if (prologue == num_iters)
4959	go to exit
4960
4961	Hence the run-time scalar cost is incremented by a taken branch,
4962	plus a not-taken branch, plus a taken branch cost.
4963
4964	3. The vectorizer then checks if an epilogue is required. If the
4965	cost model check was not done before during prologue check, it
4966	has to be done with the epilogue check.
4967
4968	if (prologue == 0)
4969	jmp to vector code
4970	else
4971	execute prologue
4972	if (prologue == num_iters)
4973	go to exit
4974	vector code:
4975	if ((cost <= th) \| (scalar_iters-prologue-epilogue == 0))
4976	jmp to epilogue
4977
4978	Hence the run-time scalar cost should be incremented by 2 taken
4979	branches.
4980
4981	TODO: The back end may reorder the BBS's differently and reverse
4982	conditions/branch directions. Change the estimates below to
4983	something more reasonable. /*
4984
4985	/ If the number of iterations is known and we do not do versioning, we can*
4986	decide whether to vectorize at compile time. Hence the scalar version
4987	do not carry cost model guard costs. /*
4988	if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4989	\|\| LOOP_REQUIRES_VERSIONING (loop_vinfo))
4990	{
4991	/ Cost model check occurs at versioning. /
4992	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
4993	scalar_outside_cost += vect_get_stmt_cost (type_of_cost: cond_branch_not_taken);
4994	else
4995	{
4996	/ Cost model check occurs at prologue generation. /
4997	if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < `0`)
4998	scalar_outside_cost += `2` * vect_get_stmt_cost (type_of_cost: cond_branch_taken)
4999	+ vect_get_stmt_cost (type_of_cost: cond_branch_not_taken);
5000	/ Cost model check occurs at epilogue generation. /
5001	else
5002	scalar_outside_cost += `2` * vect_get_stmt_cost (type_of_cost: cond_branch_taken);
5003	}
5004	}
5005
5006	/ Complete the target-specific cost calculations. /
5007	finish_cost (costs: loop_vinfo->vector_costs, scalar_costs: loop_vinfo->scalar_costs,
5008	prologue_cost: &vec_prologue_cost, body_cost: &vec_inside_cost, epilogue_cost: &vec_epilogue_cost,
5009	suggested_unroll_factor);
5010
5011	if (suggested_unroll_factor && *suggested_unroll_factor > `1`
5012	&& LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) != MAX_VECTORIZATION_FACTOR
5013	&& !known_le (LOOP_VINFO_VECT_FACTOR (loop_vinfo) *
5014	*suggested_unroll_factor,
5015	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo)))
5016	{
5017	if (dump_enabled_p ())
5018	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5019	"can't unroll as unrolled vectorization factor larger"
5020	" than maximum vectorization factor: "
5021	HOST_WIDE_INT_PRINT_UNSIGNED "\n",
5022	LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo));
5023	*suggested_unroll_factor = `1`;
5024	}
5025
5026	vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
5027
5028	if (dump_enabled_p ())
5029	{
5030	dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
5031	dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
5032	vec_inside_cost);
5033	dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
5034	vec_prologue_cost);
5035	dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
5036	vec_epilogue_cost);
5037	dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
5038	scalar_single_iter_cost);
5039	dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
5040	scalar_outside_cost);
5041	dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
5042	vec_outside_cost);
5043	dump_printf (MSG_NOTE, " prologue iterations: %d\n",
5044	peel_iters_prologue);
5045	dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
5046	peel_iters_epilogue);
5047	}
5048
5049	/ Calculate number of iterations required to make the vector version*
5050	profitable, relative to the loop bodies only. The following condition
5051	must hold true:
5052	SIC niters + SOC > VIC * ((niters - NPEEL) / VF) + VOC*
5053	where
5054	SIC = scalar iteration cost, VIC = vector iteration cost,
5055	VOC = vector outside cost, VF = vectorization factor,
5056	NPEEL = prologue iterations + epilogue iterations,
5057	SOC = scalar outside cost for run time cost model check. /*
5058
5059	int saving_per_viter = (scalar_single_iter_cost * assumed_vf
5060	- vec_inside_cost);
5061	if (saving_per_viter <= `0`)
5062	{
5063	if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
5064	warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
5065	"vectorization did not happen for a simd loop");
5066
5067	if (dump_enabled_p ())
5068	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069	"cost model: the vector iteration cost = %d "
5070	"divided by the scalar iteration cost = %d "
5071	"is greater or equal to the vectorization factor = %d"
5072	".\n",
5073	vec_inside_cost, scalar_single_iter_cost, assumed_vf);
5074	*ret_min_profitable_niters = -`1`;
5075	*ret_min_profitable_estimate = -`1`;
5076	return;
5077	}
5078
5079	/ ??? The "if" arm is written to handle all cases; see below for what*
5080	we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. /*
5081	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5082	{
5083	/ Rewriting the condition above in terms of the number of*
5084	vector iterations (vniters) rather than the number of
5085	scalar iterations (niters) gives:
5086
5087	SIC (vniters * VF + NPEEL) + SOC > VIC * vniters + VOC*
5088
5089	<==> vniters (SIC * VF - VIC) > VOC - SIC * NPEEL - SOC*
5090
5091	For integer N, X and Y when X > 0:
5092
5093	N X > Y <==> N >= (Y /[floor] X) + 1. /
5094	int outside_overhead = (vec_outside_cost
5095	- scalar_single_iter_cost * peel_iters_prologue
5096	- scalar_single_iter_cost * peel_iters_epilogue
5097	- scalar_outside_cost);
5098	/ We're only interested in cases that require at least one*
5099	vector iteration. /*
5100	int min_vec_niters = `1`;
5101	if (outside_overhead > `0`)
5102	min_vec_niters = outside_overhead / saving_per_viter + `1`;
5103
5104	if (dump_enabled_p ())
5105	dump_printf (MSG_NOTE, " Minimum number of vector iterations: %d\n",
5106	min_vec_niters);
5107
5108	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5109	{
5110	/ Now that we know the minimum number of vector iterations,*
5111	find the minimum niters for which the scalar cost is larger:
5112
5113	SIC niters > VIC * vniters + VOC - SOC*
5114
5115	We know that the minimum niters is no more than
5116	vniters VF + NPEEL, but it might be (and often is) less*
5117	than that if a partial vector iteration is cheaper than the
5118	equivalent scalar code. /*
5119	int threshold = (vec_inside_cost * min_vec_niters
5120	+ vec_outside_cost
5121	- scalar_outside_cost);
5122	if (threshold <= `0`)
5123	min_profitable_iters = `1`;
5124	else
5125	min_profitable_iters = threshold / scalar_single_iter_cost + `1`;
5126	}
5127	else
5128	/ Convert the number of vector iterations into a number of*
5129	scalar iterations. /*
5130	min_profitable_iters = (min_vec_niters * assumed_vf
5131	+ peel_iters_prologue
5132	+ peel_iters_epilogue);
5133	}
5134	else
5135	{
5136	min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
5137	* assumed_vf
5138	- vec_inside_cost * peel_iters_prologue
5139	- vec_inside_cost * peel_iters_epilogue);
5140	if (min_profitable_iters <= `0`)
5141	min_profitable_iters = `0`;
5142	else
5143	{
5144	min_profitable_iters /= saving_per_viter;
5145
5146	if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
5147	<= (((int) vec_inside_cost * min_profitable_iters)
5148	+ (((int) vec_outside_cost - scalar_outside_cost)
5149	* assumed_vf)))
5150	min_profitable_iters++;
5151	}
5152	}
5153
5154	if (dump_enabled_p ())
5155	dump_printf (MSG_NOTE,
5156	" Calculated minimum iters for profitability: %d\n",
5157	min_profitable_iters);
5158
5159	if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
5160	&& min_profitable_iters < (assumed_vf + peel_iters_prologue))
5161	/ We want the vectorized loop to execute at least once. /
5162	min_profitable_iters = assumed_vf + peel_iters_prologue;
5163	else if (min_profitable_iters < peel_iters_prologue)
5164	/ For LOOP_VINFO_USING_PARTIAL_VECTORS_P, we need to ensure the*
5165	vectorized loop executes at least once. /*
5166	min_profitable_iters = peel_iters_prologue;
5167
5168	if (dump_enabled_p ())
5169	dump_printf_loc (MSG_NOTE, vect_location,
5170	" Runtime profitability threshold = %d\n",
5171	min_profitable_iters);
5172
5173	*ret_min_profitable_niters = min_profitable_iters;
5174
5175	/ Calculate number of iterations required to make the vector version*
5176	profitable, relative to the loop bodies only.
5177
5178	Non-vectorized variant is SIC niters and it must win over vector*
5179	variant on the expected loop trip count. The following condition must hold true:
5180	SIC niters > VIC * ((niters - NPEEL) / VF) + VOC + SOC /
5181
5182	if (vec_outside_cost <= `0`)
5183	min_profitable_estimate = `0`;
5184	/ ??? This "else if" arm is written to handle all cases; see below for*
5185	what we would do for !LOOP_VINFO_USING_PARTIAL_VECTORS_P. /*
5186	else if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5187	{
5188	/ This is a repeat of the code above, but with + SOC rather*
5189	than - SOC. /*
5190	int outside_overhead = (vec_outside_cost
5191	- scalar_single_iter_cost * peel_iters_prologue
5192	- scalar_single_iter_cost * peel_iters_epilogue
5193	+ scalar_outside_cost);
5194	int min_vec_niters = `1`;
5195	if (outside_overhead > `0`)
5196	min_vec_niters = outside_overhead / saving_per_viter + `1`;
5197
5198	if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
5199	{
5200	int threshold = (vec_inside_cost * min_vec_niters
5201	+ vec_outside_cost
5202	+ scalar_outside_cost);
5203	min_profitable_estimate = threshold / scalar_single_iter_cost + `1`;
5204	}
5205	else
5206	min_profitable_estimate = (min_vec_niters * assumed_vf
5207	+ peel_iters_prologue
5208	+ peel_iters_epilogue);
5209	}
5210	else
5211	{
5212	min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
5213	* assumed_vf
5214	- vec_inside_cost * peel_iters_prologue
5215	- vec_inside_cost * peel_iters_epilogue)
5216	/ ((scalar_single_iter_cost * assumed_vf)
5217	- vec_inside_cost);
5218	}
5219	min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
5220	if (dump_enabled_p ())
5221	dump_printf_loc (MSG_NOTE, vect_location,
5222	" Static estimate profitability threshold = %d\n",
5223	min_profitable_estimate);
5224
5225	*ret_min_profitable_estimate = min_profitable_estimate;
5226	}
5227
5228	/ Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET*
5229	vector elements (not bits) for a vector with NELT elements. /*
5230	static void
5231	calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
5232	vec_perm_builder *sel)
5233	{
5234	/ The encoding is a single stepped pattern. Any wrap-around is handled*
5235	by vec_perm_indices. /*
5236	sel->new_vector (full_nelts: nelt, npatterns: `1`, nelts_per_pattern: `3`);
5237	for (unsigned int i = `0`; i < `3`; i++)
5238	sel->quick_push (obj: i + offset);
5239	}
5240
5241	/ Checks whether the target supports whole-vector shifts for vectors of mode*
5242	MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
5243	it supports vec_perm_const with masks for all necessary shift amounts. /*
5244	static bool
5245	have_whole_vector_shift (machine_mode mode)
5246	{
5247	if (optab_handler (op: vec_shr_optab, mode) != CODE_FOR_nothing)
5248	return true;
5249
5250	/ Variable-length vectors should be handled via the optab. /
5251	unsigned int nelt;
5252	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
5253	return false;
5254
5255	vec_perm_builder sel;
5256	vec_perm_indices indices;
5257	for (unsigned int i = nelt / `2`; i >= `1`; i /= `2`)
5258	{
5259	calc_vec_perm_mask_for_shift (offset: i, nelt, sel: &sel);
5260	indices.new_vector (sel, `2`, nelt);
5261	if (!can_vec_perm_const_p (mode, mode, indices, false))
5262	return false;
5263	}
5264	return true;
5265	}
5266
5267	/ Return true if (a) STMT_INFO is a DOT_PROD_EXPR reduction whose*
5268	multiplication operands have differing signs and (b) we intend
5269	to emulate the operation using a series of signed DOT_PROD_EXPRs.
5270	See vect_emulate_mixed_dot_prod for the actual sequence used. /*
5271
5272	static bool
5273	vect_is_emulated_mixed_dot_prod (loop_vec_info loop_vinfo,
5274	stmt_vec_info stmt_info)
5275	{
5276	gassign assign = dyn_cast<gassign > (p: stmt_info->stmt);
5277	if (!assign \|\| gimple_assign_rhs_code (gs: assign) != DOT_PROD_EXPR)
5278	return false;
5279
5280	tree rhs1 = gimple_assign_rhs1 (gs: assign);
5281	tree rhs2 = gimple_assign_rhs2 (gs: assign);
5282	if (TYPE_SIGN (TREE_TYPE (rhs1)) == TYPE_SIGN (TREE_TYPE (rhs2)))
5283	return false;
5284
5285	stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info);
5286	gcc_assert (reduc_info->is_reduc_info);
5287	return !directly_supported_p (DOT_PROD_EXPR,
5288	STMT_VINFO_REDUC_VECTYPE_IN (reduc_info),
5289	optab_vector_mixed_sign);
5290	}
5291
5292	/* TODO: Close dependency between vect_model__cost and vectorizable_
5293	functions. Design better to avoid maintenance issues. /*
5294
5295	/ Function vect_model_reduction_cost.*
5296
5297	Models cost for a reduction operation, including the vector ops
5298	generated within the strip-mine loop in some cases, the initial
5299	definition before the loop, and the epilogue code that must be generated. /*
5300
5301	static void
5302	vect_model_reduction_cost (loop_vec_info loop_vinfo,
5303	stmt_vec_info stmt_info, internal_fn reduc_fn,
5304	vect_reduction_type reduction_type,
5305	int ncopies, stmt_vector_for_cost *cost_vec)
5306	{
5307	int prologue_cost = `0`, epilogue_cost = `0`, inside_cost = `0`;
5308	tree vectype;
5309	machine_mode mode;
5310	class loop *loop = NULL;
5311
5312	if (loop_vinfo)
5313	loop = LOOP_VINFO_LOOP (loop_vinfo);
5314
5315	/ Condition reductions generate two reductions in the loop. /
5316	if (reduction_type == COND_REDUCTION)
5317	ncopies *= `2`;
5318
5319	vectype = STMT_VINFO_VECTYPE (stmt_info);
5320	mode = TYPE_MODE (vectype);
5321	stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
5322
5323	gimple_match_op op;
5324	if (!gimple_extract_op (orig_stmt_info->stmt, &op))
5325	gcc_unreachable ();
5326
5327	bool emulated_mixed_dot_prod
5328	= vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info);
5329	if (reduction_type == EXTRACT_LAST_REDUCTION)
5330	/ No extra instructions are needed in the prologue. The loop body*
5331	operations are costed in vectorizable_condition. /*
5332	inside_cost = `0`;
5333	else if (reduction_type == FOLD_LEFT_REDUCTION)
5334	{
5335	/ No extra instructions needed in the prologue. /
5336	prologue_cost = `0`;
5337
5338	if (reduc_fn != IFN_LAST)
5339	/ Count one reduction-like operation per vector. /
5340	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vec_to_scalar,
5341	stmt_info, misalign: `0`, where: vect_body);
5342	else
5343	{
5344	/ Use NELEMENTS extracts and NELEMENTS scalar ops. /
5345	unsigned int nelements = ncopies * vect_nunits_for_cost (vec_type: vectype);
5346	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: nelements,
5347	kind: vec_to_scalar, stmt_info, misalign: `0`,
5348	where: vect_body);
5349	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nelements,
5350	kind: scalar_stmt, stmt_info, misalign: `0`,
5351	where: vect_body);
5352	}
5353	}
5354	else
5355	{
5356	/ Add in the cost of the initial definitions. /
5357	int prologue_stmts;
5358	if (reduction_type == COND_REDUCTION)
5359	/ For cond reductions we have four vectors: initial index, step,*
5360	initial result of the data reduction, initial value of the index
5361	reduction. /*
5362	prologue_stmts = `4`;
5363	else if (emulated_mixed_dot_prod)
5364	/ We need the initial reduction value and two invariants:*
5365	one that contains the minimum signed value and one that
5366	contains half of its negative. /*
5367	prologue_stmts = `3`;
5368	else
5369	prologue_stmts = `1`;
5370	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: prologue_stmts,
5371	kind: scalar_to_vec, stmt_info, misalign: `0`,
5372	where: vect_prologue);
5373	}
5374
5375	/ Determine cost of epilogue code.*
5376
5377	We have a reduction operator that will reduce the vector in one statement.
5378	Also requires scalar extract. /*
5379
5380	if (!loop \|\| !nested_in_vect_loop_p (loop, stmt_info: orig_stmt_info))
5381	{
5382	if (reduc_fn != IFN_LAST)
5383	{
5384	if (reduction_type == COND_REDUCTION)
5385	{
5386	/ An EQ stmt and an COND_EXPR stmt. /
5387	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `2`,
5388	kind: vector_stmt, stmt_info, misalign: `0`,
5389	where: vect_epilogue);
5390	/ Reduction of the max index and a reduction of the found*
5391	values. /*
5392	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `2`,
5393	kind: vec_to_scalar, stmt_info, misalign: `0`,
5394	where: vect_epilogue);
5395	/ A broadcast of the max value. /
5396	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5397	kind: scalar_to_vec, stmt_info, misalign: `0`,
5398	where: vect_epilogue);
5399	}
5400	else
5401	{
5402	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vector_stmt,
5403	stmt_info, misalign: `0`, where: vect_epilogue);
5404	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5405	kind: vec_to_scalar, stmt_info, misalign: `0`,
5406	where: vect_epilogue);
5407	}
5408	}
5409	else if (reduction_type == COND_REDUCTION)
5410	{
5411	unsigned estimated_nunits = vect_nunits_for_cost (vec_type: vectype);
5412	/ Extraction of scalar elements. /
5413	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5414	count: `2` * estimated_nunits,
5415	kind: vec_to_scalar, stmt_info, misalign: `0`,
5416	where: vect_epilogue);
5417	/ Scalar max reductions via COND_EXPR / MAX_EXPR. /
5418	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5419	count: `2` * estimated_nunits - `3`,
5420	kind: scalar_stmt, stmt_info, misalign: `0`,
5421	where: vect_epilogue);
5422	}
5423	else if (reduction_type == EXTRACT_LAST_REDUCTION
5424	\|\| reduction_type == FOLD_LEFT_REDUCTION)
5425	/ No extra instructions need in the epilogue. /
5426	;
5427	else
5428	{
5429	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5430	tree bitsize = TYPE_SIZE (op.type);
5431	int element_bitsize = tree_to_uhwi (bitsize);
5432	int nelements = vec_size_in_bits / element_bitsize;
5433
5434	if (op.code == COND_EXPR)
5435	op.code = MAX_EXPR;
5436
5437	/ We have a whole vector shift available. /
5438	if (VECTOR_MODE_P (mode)
5439	&& directly_supported_p (op.code, vectype)
5440	&& have_whole_vector_shift (mode))
5441	{
5442	/ Final reduction via vector shifts and the reduction operator.*
5443	Also requires scalar extract. /*
5444	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5445	count: exact_log2 (x: nelements) * `2`,
5446	kind: vector_stmt, stmt_info, misalign: `0`,
5447	where: vect_epilogue);
5448	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`,
5449	kind: vec_to_scalar, stmt_info, misalign: `0`,
5450	where: vect_epilogue);
5451	}
5452	else
5453	/ Use extracts and reduction op for final reduction. For N*
5454	elements, we have N extracts and N-1 reduction ops. /*
5455	epilogue_cost += record_stmt_cost (body_cost_vec: cost_vec,
5456	count: nelements + nelements - `1`,
5457	kind: vector_stmt, stmt_info, misalign: `0`,
5458	where: vect_epilogue);
5459	}
5460	}
5461
5462	if (dump_enabled_p ())
5463	dump_printf (MSG_NOTE,
5464	"vect_model_reduction_cost: inside_cost = %d, "
5465	"prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
5466	prologue_cost, epilogue_cost);
5467	}
5468
5469	/ SEQ is a sequence of instructions that initialize the reduction*
5470	described by REDUC_INFO. Emit them in the appropriate place. /*
5471
5472	static void
5473	vect_emit_reduction_init_stmts (loop_vec_info loop_vinfo,
5474	stmt_vec_info reduc_info, gimple *seq)
5475	{
5476	if (reduc_info->reused_accumulator)
5477	{
5478	/ When reusing an accumulator from the main loop, we only need*
5479	initialization instructions if the main loop can be skipped.
5480	In that case, emit the initialization instructions at the end
5481	of the guard block that does the skip. /*
5482	edge skip_edge = loop_vinfo->skip_main_loop_edge;
5483	gcc_assert (skip_edge);
5484	gimple_stmt_iterator gsi = gsi_last_bb (bb: skip_edge->src);
5485	gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
5486	}
5487	else
5488	{
5489	/ The normal case: emit the initialization instructions on the*
5490	preheader edge. /*
5491	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5492	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), seq);
5493	}
5494	}
5495
5496	/ Function get_initial_def_for_reduction*
5497
5498	Input:
5499	REDUC_INFO - the info_for_reduction
5500	INIT_VAL - the initial value of the reduction variable
5501	NEUTRAL_OP - a value that has no effect on the reduction, as per
5502	neutral_op_for_reduction
5503
5504	Output:
5505	Return a vector variable, initialized according to the operation that
5506	STMT_VINFO performs. This vector will be used as the initial value
5507	of the vector of partial results.
5508
5509	The value we need is a vector in which element 0 has value INIT_VAL
5510	and every other element has value NEUTRAL_OP. /*
5511
5512	static tree
5513	get_initial_def_for_reduction (loop_vec_info loop_vinfo,
5514	stmt_vec_info reduc_info,
5515	tree init_val, tree neutral_op)
5516	{
5517	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5518	tree scalar_type = TREE_TYPE (init_val);
5519	tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
5520	tree init_def;
5521	gimple_seq stmts = NULL;
5522
5523	gcc_assert (vectype);
5524
5525	gcc_assert (POINTER_TYPE_P (scalar_type) \|\| INTEGRAL_TYPE_P (scalar_type)
5526	\|\| SCALAR_FLOAT_TYPE_P (scalar_type));
5527
5528	gcc_assert (nested_in_vect_loop_p (loop, reduc_info)
5529	\|\| loop == (gimple_bb (reduc_info->stmt))->loop_father);
5530
5531	if (operand_equal_p (init_val, neutral_op))
5532	{
5533	/ If both elements are equal then the vector described above is*
5534	just a splat. /*
5535	neutral_op = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: neutral_op);
5536	init_def = gimple_build_vector_from_val (seq: &stmts, type: vectype, op: neutral_op);
5537	}
5538	else
5539	{
5540	neutral_op = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: neutral_op);
5541	init_val = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: init_val);
5542	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ())
5543	{
5544	/ Construct a splat of NEUTRAL_OP and insert INIT_VAL into*
5545	element 0. /*
5546	init_def = gimple_build_vector_from_val (seq: &stmts, type: vectype,
5547	op: neutral_op);
5548	init_def = gimple_build (seq: &stmts, fn: CFN_VEC_SHL_INSERT,
5549	type: vectype, args: init_def, args: init_val);
5550	}
5551	else
5552	{
5553	/ Build {INIT_VAL, NEUTRAL_OP, NEUTRAL_OP, ...}. /
5554	tree_vector_builder elts (vectype, `1`, `2`);
5555	elts.quick_push (obj: init_val);
5556	elts.quick_push (obj: neutral_op);
5557	init_def = gimple_build_vector (seq: &stmts, builder: &elts);
5558	}
5559	}
5560
5561	if (stmts)
5562	vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, seq: stmts);
5563	return init_def;
5564	}
5565
5566	/ Get at the initial defs for the reduction PHIs for REDUC_INFO,*
5567	which performs a reduction involving GROUP_SIZE scalar statements.
5568	NUMBER_OF_VECTORS is the number of vector defs to create. If NEUTRAL_OP
5569	is nonnull, introducing extra elements of that value will not change the
5570	result. /*
5571
5572	static void
5573	get_initial_defs_for_reduction (loop_vec_info loop_vinfo,
5574	stmt_vec_info reduc_info,
5575	vec<tree> *vec_oprnds,
5576	unsigned int number_of_vectors,
5577	unsigned int group_size, tree neutral_op)
5578	{
5579	vec<tree> &initial_values = reduc_info->reduc_initial_values;
5580	unsigned HOST_WIDE_INT nunits;
5581	unsigned j, number_of_places_left_in_vector;
5582	tree vector_type = STMT_VINFO_VECTYPE (reduc_info);
5583	unsigned int i;
5584
5585	gcc_assert (group_size == initial_values.length () \|\| neutral_op);
5586
5587	/ NUMBER_OF_COPIES is the number of times we need to use the same values in*
5588	created vectors. It is greater than 1 if unrolling is performed.
5589
5590	For example, we have two scalar operands, s1 and s2 (e.g., group of
5591	strided accesses of size two), while NUNITS is four (i.e., four scalars
5592	of this type can be packed in a vector). The output vector will contain
5593	two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
5594	will be 2).
5595
5596	If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
5597	vectors containing the operands.
5598
5599	For example, NUNITS is four as before, and the group size is 8
5600	(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
5601	{s5, s6, s7, s8}. /*
5602
5603	if (!TYPE_VECTOR_SUBPARTS (node: vector_type).is_constant (const_value: &nunits))
5604	nunits = group_size;
5605
5606	number_of_places_left_in_vector = nunits;
5607	bool constant_p = true;
5608	tree_vector_builder elts (vector_type, nunits, `1`);
5609	elts.quick_grow (len: nunits);
5610	gimple_seq ctor_seq = NULL;
5611	for (j = `0`; j < nunits * number_of_vectors; ++j)
5612	{
5613	tree op;
5614	i = j % group_size;
5615
5616	/ Get the def before the loop. In reduction chain we have only*
5617	one initial value. Else we have as many as PHIs in the group. /*
5618	if (i >= initial_values.length () \|\| (j > i && neutral_op))
5619	op = neutral_op;
5620	else
5621	op = initial_values [i];
5622
5623	/ Create 'vect_ = {op0,op1,...,opn}'. /
5624	number_of_places_left_in_vector--;
5625	elts [nunits - number_of_places_left_in_vector - `1`] = op;
5626	if (!CONSTANT_CLASS_P (op))
5627	constant_p = false;
5628
5629	if (number_of_places_left_in_vector == `0`)
5630	{
5631	tree init;
5632	if (constant_p && !neutral_op
5633	? multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vector_type), b: nunits)
5634	: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
5635	/ Build the vector directly from ELTS. /
5636	init = gimple_build_vector (seq: &ctor_seq, builder: &elts);
5637	else if (neutral_op)
5638	{
5639	/ Build a vector of the neutral value and shift the*
5640	other elements into place. /*
5641	init = gimple_build_vector_from_val (seq: &ctor_seq, type: vector_type,
5642	op: neutral_op);
5643	int k = nunits;
5644	while (k > `0` && elts [k - `1`] == neutral_op)
5645	k -= `1`;
5646	while (k > `0`)
5647	{
5648	k -= `1`;
5649	init = gimple_build (seq: &ctor_seq, fn: CFN_VEC_SHL_INSERT,
5650	type: vector_type, args: init, args: elts [k]);
5651	}
5652	}
5653	else
5654	{
5655	/ First time round, duplicate ELTS to fill the*
5656	required number of vectors. /*
5657	duplicate_and_interleave (loop_vinfo, &ctor_seq, vector_type,
5658	elts, number_of_vectors, *vec_oprnds);
5659	break;
5660	}
5661	vec_oprnds->quick_push (obj: init);
5662
5663	number_of_places_left_in_vector = nunits;
5664	elts.new_vector (type: vector_type, npatterns: nunits, nelts_per_pattern: `1`);
5665	elts.quick_grow (len: nunits);
5666	constant_p = true;
5667	}
5668	}
5669	if (ctor_seq != NULL)
5670	vect_emit_reduction_init_stmts (loop_vinfo, reduc_info, seq: ctor_seq);
5671	}
5672
5673	/ For a statement STMT_INFO taking part in a reduction operation return*
5674	the stmt_vec_info the meta information is stored on. /*
5675
5676	stmt_vec_info
5677	info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info)
5678	{
5679	stmt_info = vect_orig_stmt (stmt_info);
5680	gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
5681	if (!is_a <gphi *> (p: stmt_info->stmt)
5682	\|\| !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
5683	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
5684	gphi phi = as_a <gphi > (p: stmt_info->stmt);
5685	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
5686	{
5687	if (gimple_phi_num_args (gs: phi) == `1`)
5688	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
5689	}
5690	else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
5691	{
5692	stmt_vec_info info = vinfo->lookup_def (vect_phi_initial_value (phi));
5693	if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
5694	stmt_info = info;
5695	}
5696	return stmt_info;
5697	}
5698
5699	/ See if LOOP_VINFO is an epilogue loop whose main loop had a reduction that*
5700	REDUC_INFO can build on. Adjust REDUC_INFO and return true if so, otherwise
5701	return false. /*
5702
5703	static bool
5704	vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
5705	stmt_vec_info reduc_info)
5706	{
5707	loop_vec_info main_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
5708	if (!main_loop_vinfo)
5709	return false;
5710
5711	if (STMT_VINFO_REDUC_TYPE (reduc_info) != TREE_CODE_REDUCTION)
5712	return false;
5713
5714	unsigned int num_phis = reduc_info->reduc_initial_values.length ();
5715	auto_vec<tree, `16`> main_loop_results (num_phis);
5716	auto_vec<tree, `16`> initial_values (num_phis);
5717	if (edge main_loop_edge = loop_vinfo->main_loop_edge)
5718	{
5719	/ The epilogue loop can be entered either from the main loop or*
5720	from an earlier guard block. /*
5721	edge skip_edge = loop_vinfo->skip_main_loop_edge;
5722	for (tree incoming_value : reduc_info->reduc_initial_values)
5723	{
5724	/ Look for:*
5725
5726	INCOMING_VALUE = phi<MAIN_LOOP_RESULT(main loop),
5727	INITIAL_VALUE(guard block)>. /*
5728	gcc_assert (TREE_CODE (incoming_value) == SSA_NAME);
5729
5730	gphi phi = as_a <gphi > (SSA_NAME_DEF_STMT (incoming_value));
5731	gcc_assert (gimple_bb (phi) == main_loop_edge->dest);
5732
5733	tree from_main_loop = PHI_ARG_DEF_FROM_EDGE (phi, main_loop_edge);
5734	tree from_skip = PHI_ARG_DEF_FROM_EDGE (phi, skip_edge);
5735
5736	main_loop_results.quick_push (obj: from_main_loop);
5737	initial_values.quick_push (obj: from_skip);
5738	}
5739	}
5740	else
5741	/ The main loop dominates the epilogue loop. /
5742	main_loop_results.splice (src: reduc_info->reduc_initial_values);
5743
5744	/ See if the main loop has the kind of accumulator we need. /
5745	vect_reusable_accumulator *accumulator
5746	= main_loop_vinfo->reusable_accumulators.get (k: main_loop_results [`0`]);
5747	if (!accumulator
5748	\|\| num_phis != accumulator->reduc_info->reduc_scalar_results.length ()
5749	\|\| !std::equal (first1: main_loop_results.begin (), last1: main_loop_results.end (),
5750	first2: accumulator->reduc_info->reduc_scalar_results.begin ()))
5751	return false;
5752
5753	/ Handle the case where we can reduce wider vectors to narrower ones. /
5754	tree vectype = STMT_VINFO_VECTYPE (reduc_info);
5755	tree old_vectype = TREE_TYPE (accumulator->reduc_input);
5756	unsigned HOST_WIDE_INT m;
5757	if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: old_vectype),
5758	b: TYPE_VECTOR_SUBPARTS (node: vectype), multiple: &m))
5759	return false;
5760	/ Check the intermediate vector types and operations are available. /
5761	tree prev_vectype = old_vectype;
5762	poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (node: old_vectype);
5763	while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype)))
5764	{
5765	intermediate_nunits = exact_div (a: intermediate_nunits, b: `2`);
5766	tree intermediate_vectype = get_related_vectype_for_scalar_type
5767	(TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits);
5768	if (!intermediate_vectype
5769	\|\| !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
5770	intermediate_vectype)
5771	\|\| !can_vec_extract (TYPE_MODE (prev_vectype),
5772	TYPE_MODE (intermediate_vectype)))
5773	return false;
5774	prev_vectype = intermediate_vectype;
5775	}
5776
5777	/ Non-SLP reductions might apply an adjustment after the reduction*
5778	operation, in order to simplify the initialization of the accumulator.
5779	If the epilogue loop carries on from where the main loop left off,
5780	it should apply the same adjustment to the final reduction result.
5781
5782	If the epilogue loop can also be entered directly (rather than via
5783	the main loop), we need to be able to handle that case in the same way,
5784	with the same adjustment. (In principle we could add a PHI node
5785	to select the correct adjustment, but in practice that shouldn't be
5786	necessary.) /*
5787	tree main_adjustment
5788	= STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (accumulator->reduc_info);
5789	if (loop_vinfo->main_loop_edge && main_adjustment)
5790	{
5791	gcc_assert (num_phis == `1`);
5792	tree initial_value = initial_values [`0`];
5793	/ Check that we can use INITIAL_VALUE as the adjustment and*
5794	initialize the accumulator with a neutral value instead. /*
5795	if (!operand_equal_p (initial_value, main_adjustment))
5796	return false;
5797	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
5798	initial_values [`0`] = neutral_op_for_reduction (TREE_TYPE (initial_value),
5799	code, initial_value);
5800	}
5801	STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = main_adjustment;
5802	reduc_info->reduc_initial_values.truncate (size: `0`);
5803	reduc_info->reduc_initial_values.splice (src: initial_values);
5804	reduc_info->reused_accumulator = accumulator;
5805	return true;
5806	}
5807
5808	/ Reduce the vector VEC_DEF down to VECTYPE with reduction operation*
5809	CODE emitting stmts before GSI. Returns a vector def of VECTYPE. /*
5810
5811	static tree
5812	vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code,
5813	gimple_seq *seq)
5814	{
5815	unsigned nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec_def)).to_constant ();
5816	unsigned nunits1 = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
5817	tree stype = TREE_TYPE (vectype);
5818	tree new_temp = vec_def;
5819	while (nunits > nunits1)
5820	{
5821	nunits /= `2`;
5822	tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
5823	stype, nunits);
5824	unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
5825
5826	/ The target has to make sure we support lowpart/highpart*
5827	extraction, either via direct vector extract or through
5828	an integer mode punning. /*
5829	tree dst1, dst2;
5830	gimple *epilog_stmt;
5831	if (convert_optab_handler (op: vec_extract_optab,
5832	TYPE_MODE (TREE_TYPE (new_temp)),
5833	TYPE_MODE (vectype1))
5834	!= CODE_FOR_nothing)
5835	{
5836	/ Extract sub-vectors directly once vec_extract becomes*
5837	a conversion optab. /*
5838	dst1 = make_ssa_name (var: vectype1);
5839	epilog_stmt
5840	= gimple_build_assign (dst1, BIT_FIELD_REF,
5841	build3 (BIT_FIELD_REF, vectype1,
5842	new_temp, TYPE_SIZE (vectype1),
5843	bitsize_int (`0`)));
5844	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5845	dst2 = make_ssa_name (var: vectype1);
5846	epilog_stmt
5847	= gimple_build_assign (dst2, BIT_FIELD_REF,
5848	build3 (BIT_FIELD_REF, vectype1,
5849	new_temp, TYPE_SIZE (vectype1),
5850	bitsize_int (bitsize)));
5851	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5852	}
5853	else
5854	{
5855	/ Extract via punning to appropriately sized integer mode*
5856	vector. /*
5857	tree eltype = build_nonstandard_integer_type (bitsize, `1`);
5858	tree etype = build_vector_type (eltype, `2`);
5859	gcc_assert (convert_optab_handler (vec_extract_optab,
5860	TYPE_MODE (etype),
5861	TYPE_MODE (eltype))
5862	!= CODE_FOR_nothing);
5863	tree tem = make_ssa_name (var: etype);
5864	epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
5865	build1 (VIEW_CONVERT_EXPR,
5866	etype, new_temp));
5867	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5868	new_temp = tem;
5869	tem = make_ssa_name (var: eltype);
5870	epilog_stmt
5871	= gimple_build_assign (tem, BIT_FIELD_REF,
5872	build3 (BIT_FIELD_REF, eltype,
5873	new_temp, TYPE_SIZE (eltype),
5874	bitsize_int (`0`)));
5875	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5876	dst1 = make_ssa_name (var: vectype1);
5877	epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
5878	build1 (VIEW_CONVERT_EXPR,
5879	vectype1, tem));
5880	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5881	tem = make_ssa_name (var: eltype);
5882	epilog_stmt
5883	= gimple_build_assign (tem, BIT_FIELD_REF,
5884	build3 (BIT_FIELD_REF, eltype,
5885	new_temp, TYPE_SIZE (eltype),
5886	bitsize_int (bitsize)));
5887	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5888	dst2 = make_ssa_name (var: vectype1);
5889	epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
5890	build1 (VIEW_CONVERT_EXPR,
5891	vectype1, tem));
5892	gimple_seq_add_stmt_without_update (seq, epilog_stmt);
5893	}
5894
5895	new_temp = gimple_build (seq, code, type: vectype1, ops: dst1, ops: dst2);
5896	}
5897
5898	return new_temp;
5899	}
5900
5901	/ Function vect_create_epilog_for_reduction*
5902
5903	Create code at the loop-epilog to finalize the result of a reduction
5904	computation.
5905
5906	STMT_INFO is the scalar reduction stmt that is being vectorized.
5907	SLP_NODE is an SLP node containing a group of reduction statements. The
5908	first one in this group is STMT_INFO.
5909	SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
5910	REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
5911	(counting from 0)
5912	LOOP_EXIT is the edge to update in the merge block. In the case of a single
5913	exit this edge is always the main loop exit.
5914
5915	This function:
5916	1. Completes the reduction def-use cycles.
5917	2. "Reduces" each vector of partial results VECT_DEFS into a single result,
5918	by calling the function specified by REDUC_FN if available, or by
5919	other means (whole-vector shifts or a scalar loop).
5920	The function also creates a new phi node at the loop exit to preserve
5921	loop-closed form, as illustrated below.
5922
5923	The flow at the entry to this function:
5924
5925	loop:
5926	vec_def = phi <vec_init, null> # REDUCTION_PHI
5927	VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5928	s_loop = scalar_stmt # (scalar) STMT_INFO
5929	loop_exit:
5930	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5931	use <s_out0>
5932	use <s_out0>
5933
5934	The above is transformed by this function into:
5935
5936	loop:
5937	vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
5938	VECT_DEF = vector_stmt # vectorized form of STMT_INFO
5939	s_loop = scalar_stmt # (scalar) STMT_INFO
5940	loop_exit:
5941	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
5942	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
5943	v_out2 = reduce <v_out1>
5944	s_out3 = extract_field <v_out2, 0>
5945	s_out4 = adjust_result <s_out3>
5946	use <s_out4>
5947	use <s_out4>
5948	*/
5949
5950	static void
5951	vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
5952	stmt_vec_info stmt_info,
5953	slp_tree slp_node,
5954	slp_instance slp_node_instance,
5955	edge loop_exit)
5956	{
5957	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
5958	gcc_assert (reduc_info->is_reduc_info);
5959	/ For double reductions we need to get at the inner loop reduction*
5960	stmt which has the meta info attached. Our stmt_info is that of the
5961	loop-closed PHI of the inner loop which we remember as
5962	def for the reduction PHI generation. /*
5963	bool double_reduc = false;
5964	stmt_vec_info rdef_info = stmt_info;
5965	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
5966	{
5967	gcc_assert (!slp_node);
5968	double_reduc = true;
5969	stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
5970	(gs: stmt_info->stmt, index: `0`));
5971	stmt_info = vect_stmt_to_vectorize (stmt_info);
5972	}
5973	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
5974	internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
5975	tree vectype;
5976	machine_mode mode;
5977	class loop loop = LOOP_VINFO_LOOP (loop_vinfo), outer_loop = NULL;
5978	basic_block exit_bb;
5979	tree scalar_dest;
5980	tree scalar_type;
5981	gimple new_phi = NULL, phi = NULL;
5982	gimple_stmt_iterator exit_gsi;
5983	tree new_temp = NULL_TREE, new_name, new_scalar_dest;
5984	gimple *epilog_stmt = NULL;
5985	gimple *exit_phi;
5986	tree bitsize;
5987	tree def;
5988	tree orig_name, scalar_result;
5989	imm_use_iterator imm_iter, phi_imm_iter;
5990	use_operand_p use_p, phi_use_p;
5991	gimple *use_stmt;
5992	auto_vec<tree> reduc_inputs;
5993	int j, i;
5994	vec<tree> &scalar_results = reduc_info->reduc_scalar_results;
5995	unsigned int group_size = `1`, k;
5996	/ SLP reduction without reduction chain, e.g.,*
5997	# a1 = phi <a2, a0>
5998	# b1 = phi <b2, b0>
5999	a2 = operation (a1)
6000	b2 = operation (b1) /*
6001	bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
6002	bool direct_slp_reduc;
6003	tree induction_index = NULL_TREE;
6004
6005	if (slp_node)
6006	group_size = SLP_TREE_LANES (slp_node);
6007
6008	if (nested_in_vect_loop_p (loop, stmt_info))
6009	{
6010	outer_loop = loop;
6011	loop = loop->inner;
6012	gcc_assert (!slp_node && double_reduc);
6013	}
6014
6015	vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
6016	gcc_assert (vectype);
6017	mode = TYPE_MODE (vectype);
6018
6019	tree induc_val = NULL_TREE;
6020	tree adjustment_def = NULL;
6021	if (slp_node)
6022	;
6023	else
6024	{
6025	/ Optimize: for induction condition reduction, if we can't use zero*
6026	for induc_val, use initial_def. /*
6027	if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
6028	induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
6029	else if (double_reduc)
6030	;
6031	else
6032	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
6033	}
6034
6035	stmt_vec_info single_live_out_stmt[] = { stmt_info };
6036	array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
6037	if (slp_reduc)
6038	/ All statements produce live-out values. /
6039	live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6040
6041	unsigned vec_num;
6042	int ncopies;
6043	if (slp_node)
6044	{
6045	vec_num = SLP_TREE_VEC_DEFS (slp_node_instance->reduc_phis).length ();
6046	ncopies = `1`;
6047	}
6048	else
6049	{
6050	vec_num = `1`;
6051	ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length ();
6052	}
6053
6054	/ For cond reductions we want to create a new vector (INDEX_COND_EXPR)*
6055	which is updated with the current index of the loop for every match of
6056	the original loop's cond_expr (VEC_STMT). This results in a vector
6057	containing the last time the condition passed for that vector lane.
6058	The first match will be a 1 to allow 0 to be used for non-matching
6059	indexes. If there are no matches at all then the vector will be all
6060	zeroes.
6061
6062	PR92772: This algorithm is broken for architectures that support
6063	masked vectors, but do not provide fold_extract_last. /*
6064	if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
6065	{
6066	auto_vec<std::pair<tree, bool>, `2`> ccompares;
6067	stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
6068	cond_info = vect_stmt_to_vectorize (stmt_info: cond_info);
6069	while (cond_info != reduc_info)
6070	{
6071	if (gimple_assign_rhs_code (gs: cond_info->stmt) == COND_EXPR)
6072	{
6073	gimple *vec_stmt = STMT_VINFO_VEC_STMTS (cond_info)[`0`];
6074	gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
6075	ccompares.safe_push
6076	(obj: std::make_pair (x: unshare_expr (gimple_assign_rhs1 (gs: vec_stmt)),
6077	STMT_VINFO_REDUC_IDX (cond_info) == `2`));
6078	}
6079	cond_info
6080	= loop_vinfo->lookup_def (gimple_op (gs: cond_info->stmt,
6081	i: `1` + STMT_VINFO_REDUC_IDX
6082	(cond_info)));
6083	cond_info = vect_stmt_to_vectorize (stmt_info: cond_info);
6084	}
6085	gcc_assert (ccompares.length () != `0`);
6086
6087	tree indx_before_incr, indx_after_incr;
6088	poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype);
6089	int scalar_precision
6090	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
6091	tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
6092	tree cr_index_vector_type = get_related_vectype_for_scalar_type
6093	(TYPE_MODE (vectype), cr_index_scalar_type,
6094	TYPE_VECTOR_SUBPARTS (node: vectype));
6095
6096	/ First we create a simple vector induction variable which starts*
6097	with the values {1,2,3,...} (SERIES_VECT) and increments by the
6098	vector size (STEP). /*
6099
6100	/ Create a {1,2,3,...} vector. /
6101	tree series_vect = build_index_vector (cr_index_vector_type, `1`, `1`);
6102
6103	/ Create a vector of the step value. /
6104	tree step = build_int_cst (cr_index_scalar_type, nunits_out);
6105	tree vec_step = build_vector_from_val (cr_index_vector_type, step);
6106
6107	/ Create an induction variable. /
6108	gimple_stmt_iterator incr_gsi;
6109	bool insert_after;
6110	vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after);
6111	create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi,
6112	insert_after, &indx_before_incr, &indx_after_incr);
6113
6114	/ Next create a new phi node vector (NEW_PHI_TREE) which starts*
6115	filled with zeros (VEC_ZERO). /*
6116
6117	/ Create a vector of 0s. /
6118	tree zero = build_zero_cst (cr_index_scalar_type);
6119	tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
6120
6121	/ Create a vector phi node. /
6122	tree new_phi_tree = make_ssa_name (var: cr_index_vector_type);
6123	new_phi = create_phi_node (new_phi_tree, loop->header);
6124	add_phi_arg (as_a <gphi *> (p: new_phi), vec_zero,
6125	loop_preheader_edge (loop), UNKNOWN_LOCATION);
6126
6127	/ Now take the condition from the loops original cond_exprs*
6128	and produce a new cond_exprs (INDEX_COND_EXPR) which for
6129	every match uses values from the induction variable
6130	(INDEX_BEFORE_INCR) otherwise uses values from the phi node
6131	(NEW_PHI_TREE).
6132	Finally, we update the phi (NEW_PHI_TREE) to take the value of
6133	the new cond_expr (INDEX_COND_EXPR). /*
6134	gimple_seq stmts = NULL;
6135	for (int i = ccompares.length () - `1`; i != -`1`; --i)
6136	{
6137	tree ccompare = ccompares [i].first;
6138	if (ccompares [i].second)
6139	new_phi_tree = gimple_build (seq: &stmts, code: VEC_COND_EXPR,
6140	type: cr_index_vector_type,
6141	ops: ccompare,
6142	ops: indx_before_incr, ops: new_phi_tree);
6143	else
6144	new_phi_tree = gimple_build (seq: &stmts, code: VEC_COND_EXPR,
6145	type: cr_index_vector_type,
6146	ops: ccompare,
6147	ops: new_phi_tree, ops: indx_before_incr);
6148	}
6149	gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
6150
6151	/ Update the phi with the vec cond. /
6152	induction_index = new_phi_tree;
6153	add_phi_arg (as_a <gphi *> (p: new_phi), induction_index,
6154	loop_latch_edge (loop), UNKNOWN_LOCATION);
6155	}
6156
6157	/ 2. Create epilog code.*
6158	The reduction epilog code operates across the elements of the vector
6159	of partial results computed by the vectorized loop.
6160	The reduction epilog code consists of:
6161
6162	step 1: compute the scalar result in a vector (v_out2)
6163	step 2: extract the scalar result (s_out3) from the vector (v_out2)
6164	step 3: adjust the scalar result (s_out3) if needed.
6165
6166	Step 1 can be accomplished using one the following three schemes:
6167	(scheme 1) using reduc_fn, if available.
6168	(scheme 2) using whole-vector shifts, if available.
6169	(scheme 3) using a scalar loop. In this case steps 1+2 above are
6170	combined.
6171
6172	The overall epilog code looks like this:
6173
6174	s_out0 = phi <s_loop> # original EXIT_PHI
6175	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6176	v_out2 = reduce <v_out1> # step 1
6177	s_out3 = extract_field <v_out2, 0> # step 2
6178	s_out4 = adjust_result <s_out3> # step 3
6179
6180	(step 3 is optional, and steps 1 and 2 may be combined).
6181	Lastly, the uses of s_out0 are replaced by s_out4. /*
6182
6183
6184	/ 2.1 Create new loop-exit-phis to preserve loop-closed form:*
6185	v_out1 = phi <VECT_DEF>
6186	Store them in NEW_PHIS. /*
6187	if (double_reduc)
6188	loop = outer_loop;
6189	/ We need to reduce values in all exits. /
6190	exit_bb = loop_exit->dest;
6191	exit_gsi = gsi_after_labels (bb: exit_bb);
6192	reduc_inputs.create (nelems: slp_node ? vec_num : ncopies);
6193	for (unsigned i = `0`; i < vec_num; i++)
6194	{
6195	gimple_seq stmts = NULL;
6196	if (slp_node)
6197	def = vect_get_slp_vect_def (slp_node, i);
6198	else
6199	def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[`0`]);
6200	for (j = `0`; j < ncopies; j++)
6201	{
6202	tree new_def = copy_ssa_name (var: def);
6203	phi = create_phi_node (new_def, exit_bb);
6204	if (j)
6205	def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]);
6206	if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit)
6207	SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def);
6208	else
6209	{
6210	for (unsigned k = `0`; k < gimple_phi_num_args (gs: phi); k++)
6211	SET_PHI_ARG_DEF (phi, k, def);
6212	}
6213	new_def = gimple_convert (seq: &stmts, type: vectype, op: new_def);
6214	reduc_inputs.quick_push (obj: new_def);
6215	}
6216	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6217	}
6218
6219	/ 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3*
6220	(i.e. when reduc_fn is not available) and in the final adjustment
6221	code (if needed). Also get the original scalar reduction variable as
6222	defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
6223	represents a reduction pattern), the tree-code and scalar-def are
6224	taken from the original stmt that the pattern-stmt (STMT) replaces.
6225	Otherwise (it is a regular reduction) - the tree-code and scalar-def
6226	are taken from STMT. /*
6227
6228	stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
6229	if (orig_stmt_info != stmt_info)
6230	{
6231	/ Reduction pattern /
6232	gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
6233	gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
6234	}
6235
6236	scalar_dest = gimple_get_lhs (orig_stmt_info->stmt);
6237	scalar_type = TREE_TYPE (scalar_dest);
6238	scalar_results.truncate (size: `0`);
6239	scalar_results.reserve_exact (nelems: group_size);
6240	new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
6241	bitsize = TYPE_SIZE (scalar_type);
6242
6243	/ True if we should implement SLP_REDUC using native reduction operations*
6244	instead of scalar operations. /*
6245	direct_slp_reduc = (reduc_fn != IFN_LAST
6246	&& slp_reduc
6247	&& !TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ());
6248
6249	/ In case of reduction chain, e.g.,*
6250	# a1 = phi <a3, a0>
6251	a2 = operation (a1)
6252	a3 = operation (a2),
6253
6254	we may end up with more than one vector result. Here we reduce them
6255	to one vector.
6256
6257	The same is true for a SLP reduction, e.g.,
6258	# a1 = phi <a2, a0>
6259	# b1 = phi <b2, b0>
6260	a2 = operation (a1)
6261	b2 = operation (a2),
6262
6263	where we can end up with more than one vector as well. We can
6264	easily accumulate vectors when the number of vector elements is
6265	a multiple of the SLP group size.
6266
6267	The same is true if we couldn't use a single defuse cycle. /*
6268	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)
6269	\|\| direct_slp_reduc
6270	\|\| (slp_reduc
6271	&& constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: group_size))
6272	\|\| ncopies > `1`)
6273	{
6274	gimple_seq stmts = NULL;
6275	tree single_input = reduc_inputs [`0`];
6276	for (k = `1`; k < reduc_inputs.length (); k++)
6277	single_input = gimple_build (seq: &stmts, code, type: vectype,
6278	ops: single_input, ops: reduc_inputs [k]);
6279	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6280
6281	reduc_inputs.truncate (size: `0`);
6282	reduc_inputs.safe_push (obj: single_input);
6283	}
6284
6285	tree orig_reduc_input = reduc_inputs [`0`];
6286
6287	/ If this loop is an epilogue loop that can be skipped after the*
6288	main loop, we can only share a reduction operation between the
6289	main loop and the epilogue if we put it at the target of the
6290	skip edge.
6291
6292	We can still reuse accumulators if this check fails. Doing so has
6293	the minor(?) benefit of making the epilogue loop's scalar result
6294	independent of the main loop's scalar result. /*
6295	bool unify_with_main_loop_p = false;
6296	if (reduc_info->reused_accumulator
6297	&& loop_vinfo->skip_this_loop_edge
6298	&& single_succ_p (bb: exit_bb)
6299	&& single_succ (bb: exit_bb) == loop_vinfo->skip_this_loop_edge->dest)
6300	{
6301	unify_with_main_loop_p = true;
6302
6303	basic_block reduc_block = loop_vinfo->skip_this_loop_edge->dest;
6304	reduc_inputs [`0`] = make_ssa_name (var: vectype);
6305	gphi *new_phi = create_phi_node (reduc_inputs [`0`], reduc_block);
6306	add_phi_arg (new_phi, orig_reduc_input, single_succ_edge (bb: exit_bb),
6307	UNKNOWN_LOCATION);
6308	add_phi_arg (new_phi, reduc_info->reused_accumulator->reduc_input,
6309	loop_vinfo->skip_this_loop_edge, UNKNOWN_LOCATION);
6310	exit_gsi = gsi_after_labels (bb: reduc_block);
6311	}
6312
6313	/ Shouldn't be used beyond this point. /
6314	exit_bb = nullptr;
6315
6316	if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
6317	&& reduc_fn != IFN_LAST)
6318	{
6319	/ For condition reductions, we have a vector (REDUC_INPUTS 0) containing*
6320	various data values where the condition matched and another vector
6321	(INDUCTION_INDEX) containing all the indexes of those matches. We
6322	need to extract the last matching index (which will be the index with
6323	highest value) and use this to index into the data vector.
6324	For the case where there were no matches, the data vector will contain
6325	all default values and the index vector will be all zeros. /*
6326
6327	/ Get various versions of the type of the vector of indexes. /
6328	tree index_vec_type = TREE_TYPE (induction_index);
6329	gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
6330	tree index_scalar_type = TREE_TYPE (index_vec_type);
6331	tree index_vec_cmp_type = truth_type_for (index_vec_type);
6332
6333	/ Get an unsigned integer version of the type of the data vector. /
6334	int scalar_precision
6335	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
6336	tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
6337	tree vectype_unsigned = get_same_sized_vectype (scalar_type_unsigned,
6338	vectype);
6339
6340	/ First we need to create a vector (ZERO_VEC) of zeros and another*
6341	vector (MAX_INDEX_VEC) filled with the last matching index, which we
6342	can create using a MAX reduction and then expanding.
6343	In the case where the loop never made any matches, the max index will
6344	be zero. /*
6345
6346	/ Vector of {0, 0, 0,...}. /
6347	tree zero_vec = build_zero_cst (vectype);
6348
6349	/ Find maximum value from the vector of found indexes. /
6350	tree max_index = make_ssa_name (var: index_scalar_type);
6351	gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
6352	`1`, induction_index);
6353	gimple_call_set_lhs (gs: max_index_stmt, lhs: max_index);
6354	gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
6355
6356	/ Vector of {max_index, max_index, max_index,...}. /
6357	tree max_index_vec = make_ssa_name (var: index_vec_type);
6358	tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
6359	max_index);
6360	gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
6361	max_index_vec_rhs);
6362	gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
6363
6364	/ Next we compare the new vector (MAX_INDEX_VEC) full of max indexes*
6365	with the vector (INDUCTION_INDEX) of found indexes, choosing values
6366	from the data vector (REDUC_INPUTS 0) for matches, 0 (ZERO_VEC)
6367	otherwise. Only one value should match, resulting in a vector
6368	(VEC_COND) with one data value and the rest zeros.
6369	In the case where the loop never made any matches, every index will
6370	match, resulting in a vector with all data values (which will all be
6371	the default value). /*
6372
6373	/ Compare the max index vector to the vector of found indexes to find*
6374	the position of the max value. /*
6375	tree vec_compare = make_ssa_name (var: index_vec_cmp_type);
6376	gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
6377	induction_index,
6378	max_index_vec);
6379	gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
6380
6381	/ Use the compare to choose either values from the data vector or*
6382	zero. /*
6383	tree vec_cond = make_ssa_name (var: vectype);
6384	gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
6385	vec_compare,
6386	reduc_inputs [`0`],
6387	zero_vec);
6388	gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
6389
6390	/ Finally we need to extract the data value from the vector (VEC_COND)*
6391	into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
6392	reduction, but because this doesn't exist, we can use a MAX reduction
6393	instead. The data value might be signed or a float so we need to cast
6394	it first.
6395	In the case where the loop never made any matches, the data values are
6396	all identical, and so will reduce down correctly. /*
6397
6398	/ Make the matched data values unsigned. /
6399	tree vec_cond_cast = make_ssa_name (var: vectype_unsigned);
6400	tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
6401	vec_cond);
6402	gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
6403	VIEW_CONVERT_EXPR,
6404	vec_cond_cast_rhs);
6405	gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
6406
6407	/ Reduce down to a scalar value. /
6408	tree data_reduc = make_ssa_name (var: scalar_type_unsigned);
6409	gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
6410	`1`, vec_cond_cast);
6411	gimple_call_set_lhs (gs: data_reduc_stmt, lhs: data_reduc);
6412	gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
6413
6414	/ Convert the reduced value back to the result type and set as the*
6415	result. /*
6416	gimple_seq stmts = NULL;
6417	new_temp = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR, type: scalar_type,
6418	ops: data_reduc);
6419	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6420	scalar_results.safe_push (obj: new_temp);
6421	}
6422	else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
6423	&& reduc_fn == IFN_LAST)
6424	{
6425	/ Condition reduction without supported IFN_REDUC_MAX. Generate*
6426	idx = 0;
6427	idx_val = induction_index[0];
6428	val = data_reduc[0];
6429	for (idx = 0, val = init, i = 0; i < nelts; ++i)
6430	if (induction_index[i] > idx_val)
6431	val = data_reduc[i], idx_val = induction_index[i];
6432	return val; /*
6433
6434	tree data_eltype = TREE_TYPE (vectype);
6435	tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
6436	unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
6437	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
6438	/ Enforced by vectorizable_reduction, which ensures we have target*
6439	support before allowing a conditional reduction on variable-length
6440	vectors. /*
6441	unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
6442	tree idx_val = NULL_TREE, val = NULL_TREE;
6443	for (unsigned HOST_WIDE_INT off = `0`; off < v_size; off += el_size)
6444	{
6445	tree old_idx_val = idx_val;
6446	tree old_val = val;
6447	idx_val = make_ssa_name (var: idx_eltype);
6448	epilog_stmt = gimple_build_assign (idx_val, BIT_FIELD_REF,
6449	build3 (BIT_FIELD_REF, idx_eltype,
6450	induction_index,
6451	bitsize_int (el_size),
6452	bitsize_int (off)));
6453	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6454	val = make_ssa_name (var: data_eltype);
6455	epilog_stmt = gimple_build_assign (val, BIT_FIELD_REF,
6456	build3 (BIT_FIELD_REF,
6457	data_eltype,
6458	reduc_inputs [`0`],
6459	bitsize_int (el_size),
6460	bitsize_int (off)));
6461	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6462	if (off != `0`)
6463	{
6464	tree new_idx_val = idx_val;
6465	if (off != v_size - el_size)
6466	{
6467	new_idx_val = make_ssa_name (var: idx_eltype);
6468	epilog_stmt = gimple_build_assign (new_idx_val,
6469	MAX_EXPR, idx_val,
6470	old_idx_val);
6471	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6472	}
6473	tree cond = make_ssa_name (boolean_type_node);
6474	epilog_stmt = gimple_build_assign (cond, GT_EXPR,
6475	idx_val, old_idx_val);
6476	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6477	tree new_val = make_ssa_name (var: data_eltype);
6478	epilog_stmt = gimple_build_assign (new_val, COND_EXPR,
6479	cond, val, old_val);
6480	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6481	idx_val = new_idx_val;
6482	val = new_val;
6483	}
6484	}
6485	/ Convert the reduced value back to the result type and set as the*
6486	result. /*
6487	gimple_seq stmts = NULL;
6488	val = gimple_convert (seq: &stmts, type: scalar_type, op: val);
6489	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6490	scalar_results.safe_push (obj: val);
6491	}
6492
6493	/ 2.3 Create the reduction code, using one of the three schemes described*
6494	above. In SLP we simply need to extract all the elements from the
6495	vector (without reducing them), so we use scalar shifts. /*
6496	else if (reduc_fn != IFN_LAST && !slp_reduc)
6497	{
6498	tree tmp;
6499	tree vec_elem_type;
6500
6501	/ Case 1: Create:*
6502	v_out2 = reduc_expr <v_out1> /*
6503
6504	if (dump_enabled_p ())
6505	dump_printf_loc (MSG_NOTE, vect_location,
6506	"Reduce using direct vector reduction.\n");
6507
6508	gimple_seq stmts = NULL;
6509	vec_elem_type = TREE_TYPE (vectype);
6510	new_temp = gimple_build (seq: &stmts, fn: as_combined_fn (fn: reduc_fn),
6511	type: vec_elem_type, args: reduc_inputs [`0`]);
6512	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6513	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6514
6515	if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
6516	&& induc_val)
6517	{
6518	/ Earlier we set the initial value to be a vector if induc_val*
6519	values. Check the result and if it is induc_val then replace
6520	with the original initial value, unless induc_val is
6521	the same as initial_def already. /*
6522	tree zcompare = make_ssa_name (boolean_type_node);
6523	epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR,
6524	new_temp, induc_val);
6525	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6526	tree initial_def = reduc_info->reduc_initial_values [`0`];
6527	tmp = make_ssa_name (var: new_scalar_dest);
6528	epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
6529	initial_def, new_temp);
6530	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6531	new_temp = tmp;
6532	}
6533
6534	scalar_results.safe_push (obj: new_temp);
6535	}
6536	else if (direct_slp_reduc)
6537	{
6538	/ Here we create one vector for each of the REDUC_GROUP_SIZE results,*
6539	with the elements for other SLP statements replaced with the
6540	neutral value. We can then do a normal reduction on each vector. /*
6541
6542	/ Enforced by vectorizable_reduction. /
6543	gcc_assert (reduc_inputs.length () == `1`);
6544	gcc_assert (pow2p_hwi (group_size));
6545
6546	gimple_seq seq = NULL;
6547
6548	/ Build a vector {0, 1, 2, ...}, with the same number of elements*
6549	and the same element size as VECTYPE. /*
6550	tree index = build_index_vector (vectype, `0`, `1`);
6551	tree index_type = TREE_TYPE (index);
6552	tree index_elt_type = TREE_TYPE (index_type);
6553	tree mask_type = truth_type_for (index_type);
6554
6555	/ Create a vector that, for each element, identifies which of*
6556	the REDUC_GROUP_SIZE results should use it. /*
6557	tree index_mask = build_int_cst (index_elt_type, group_size - `1`);
6558	index = gimple_build (seq: &seq, code: BIT_AND_EXPR, type: index_type, ops: index,
6559	ops: build_vector_from_val (index_type, index_mask));
6560
6561	/ Get a neutral vector value. This is simply a splat of the neutral*
6562	scalar value if we have one, otherwise the initial scalar value
6563	is itself a neutral value. /*
6564	tree vector_identity = NULL_TREE;
6565	tree neutral_op = NULL_TREE;
6566	if (slp_node)
6567	{
6568	tree initial_value = NULL_TREE;
6569	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
6570	initial_value = reduc_info->reduc_initial_values [`0`];
6571	neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype), code,
6572	initial_value, as_initial: false);
6573	}
6574	if (neutral_op)
6575	vector_identity = gimple_build_vector_from_val (seq: &seq, type: vectype,
6576	op: neutral_op);
6577	for (unsigned int i = `0`; i < group_size; ++i)
6578	{
6579	/ If there's no univeral neutral value, we can use the*
6580	initial scalar value from the original PHI. This is used
6581	for MIN and MAX reduction, for example. /*
6582	if (!neutral_op)
6583	{
6584	tree scalar_value = reduc_info->reduc_initial_values [i];
6585	scalar_value = gimple_convert (seq: &seq, TREE_TYPE (vectype),
6586	op: scalar_value);
6587	vector_identity = gimple_build_vector_from_val (seq: &seq, type: vectype,
6588	op: scalar_value);
6589	}
6590
6591	/ Calculate the equivalent of:*
6592
6593	sel[j] = (index[j] == i);
6594
6595	which selects the elements of REDUC_INPUTS[0] that should
6596	be included in the result. /*
6597	tree compare_val = build_int_cst (index_elt_type, i);
6598	compare_val = build_vector_from_val (index_type, compare_val);
6599	tree sel = gimple_build (seq: &seq, code: EQ_EXPR, type: mask_type,
6600	ops: index, ops: compare_val);
6601
6602	/ Calculate the equivalent of:*
6603
6604	vec = seq ? reduc_inputs[0] : vector_identity;
6605
6606	VEC is now suitable for a full vector reduction. /*
6607	tree vec = gimple_build (seq: &seq, code: VEC_COND_EXPR, type: vectype,
6608	ops: sel, ops: reduc_inputs [`0`], ops: vector_identity);
6609
6610	/ Do the reduction and convert it to the appropriate type. /
6611	tree scalar = gimple_build (seq: &seq, fn: as_combined_fn (fn: reduc_fn),
6612	TREE_TYPE (vectype), args: vec);
6613	scalar = gimple_convert (seq: &seq, type: scalar_type, op: scalar);
6614	scalar_results.safe_push (obj: scalar);
6615	}
6616	gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
6617	}
6618	else
6619	{
6620	bool reduce_with_shift;
6621	tree vec_temp;
6622
6623	gcc_assert (slp_reduc \|\| reduc_inputs.length () == `1`);
6624
6625	/ See if the target wants to do the final (shift) reduction*
6626	in a vector mode of smaller size and first reduce upper/lower
6627	halves against each other. /*
6628	enum machine_mode mode1 = mode;
6629	tree stype = TREE_TYPE (vectype);
6630	unsigned nunits = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
6631	unsigned nunits1 = nunits;
6632	if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
6633	&& reduc_inputs.length () == `1`)
6634	{
6635	nunits1 = GET_MODE_NUNITS (mode: mode1).to_constant ();
6636	/ For SLP reductions we have to make sure lanes match up, but*
6637	since we're doing individual element final reduction reducing
6638	vector width here is even more important.
6639	??? We can also separate lanes with permutes, for the common
6640	case of power-of-two group-size odd/even extracts would work. /*
6641	if (slp_reduc && nunits != nunits1)
6642	{
6643	nunits1 = least_common_multiple (nunits1, group_size);
6644	gcc_assert (exact_log2 (nunits1) != -`1` && nunits1 <= nunits);
6645	}
6646	}
6647	if (!slp_reduc
6648	&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
6649	nunits1 = GET_MODE_NUNITS (mode: mode1).to_constant ();
6650
6651	tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
6652	stype, nunits1);
6653	reduce_with_shift = have_whole_vector_shift (mode: mode1);
6654	if (!VECTOR_MODE_P (mode1)
6655	\|\| !directly_supported_p (code, vectype1))
6656	reduce_with_shift = false;
6657
6658	/ First reduce the vector to the desired vector size we should*
6659	do shift reduction on by combining upper and lower halves. /*
6660	gimple_seq stmts = NULL;
6661	new_temp = vect_create_partial_epilog (vec_def: reduc_inputs [`0`], vectype: vectype1,
6662	code, seq: &stmts);
6663	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6664	reduc_inputs [`0`] = new_temp;
6665
6666	if (reduce_with_shift && !slp_reduc)
6667	{
6668	int element_bitsize = tree_to_uhwi (bitsize);
6669	/ Enforced by vectorizable_reduction, which disallows SLP reductions*
6670	for variable-length vectors and also requires direct target support
6671	for loop reductions. /*
6672	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
6673	int nelements = vec_size_in_bits / element_bitsize;
6674	vec_perm_builder sel;
6675	vec_perm_indices indices;
6676
6677	int elt_offset;
6678
6679	tree zero_vec = build_zero_cst (vectype1);
6680	/ Case 2: Create:*
6681	for (offset = nelements/2; offset >= 1; offset/=2)
6682	{
6683	Create: va' = vec_shift <va, offset>
6684	Create: va = vop <va, va'>
6685	} /*
6686
6687	tree rhs;
6688
6689	if (dump_enabled_p ())
6690	dump_printf_loc (MSG_NOTE, vect_location,
6691	"Reduce using vector shifts\n");
6692
6693	gimple_seq stmts = NULL;
6694	new_temp = gimple_convert (seq: &stmts, type: vectype1, op: new_temp);
6695	for (elt_offset = nelements / `2`;
6696	elt_offset >= `1`;
6697	elt_offset /= `2`)
6698	{
6699	calc_vec_perm_mask_for_shift (offset: elt_offset, nelt: nelements, sel: &sel);
6700	indices.new_vector (sel, `2`, nelements);
6701	tree mask = vect_gen_perm_mask_any (vectype1, indices);
6702	new_name = gimple_build (seq: &stmts, code: VEC_PERM_EXPR, type: vectype1,
6703	ops: new_temp, ops: zero_vec, ops: mask);
6704	new_temp = gimple_build (seq: &stmts, code,
6705	type: vectype1, ops: new_name, ops: new_temp);
6706	}
6707	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6708
6709	/ 2.4 Extract the final scalar result. Create:*
6710	s_out3 = extract_field <v_out2, bitpos> /*
6711
6712	if (dump_enabled_p ())
6713	dump_printf_loc (MSG_NOTE, vect_location,
6714	"extract scalar result\n");
6715
6716	rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
6717	bitsize, bitsize_zero_node);
6718	epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
6719	new_temp = make_ssa_name (var: new_scalar_dest, stmt: epilog_stmt);
6720	gimple_assign_set_lhs (gs: epilog_stmt, lhs: new_temp);
6721	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6722	scalar_results.safe_push (obj: new_temp);
6723	}
6724	else
6725	{
6726	/ Case 3: Create:*
6727	s = extract_field <v_out2, 0>
6728	for (offset = element_size;
6729	offset < vector_size;
6730	offset += element_size;)
6731	{
6732	Create: s' = extract_field <v_out2, offset>
6733	Create: s = op <s, s'> // For non SLP cases
6734	} /*
6735
6736	if (dump_enabled_p ())
6737	dump_printf_loc (MSG_NOTE, vect_location,
6738	"Reduce using scalar code.\n");
6739
6740	int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
6741	int element_bitsize = tree_to_uhwi (bitsize);
6742	tree compute_type = TREE_TYPE (vectype);
6743	gimple_seq stmts = NULL;
6744	FOR_EACH_VEC_ELT (reduc_inputs, i, vec_temp)
6745	{
6746	int bit_offset;
6747	new_temp = gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: compute_type,
6748	ops: vec_temp, ops: bitsize, bitsize_zero_node);
6749
6750	/ In SLP we don't need to apply reduction operation, so we just*
6751	collect s' values in SCALAR_RESULTS. /*
6752	if (slp_reduc)
6753	scalar_results.safe_push (obj: new_temp);
6754
6755	for (bit_offset = element_bitsize;
6756	bit_offset < vec_size_in_bits;
6757	bit_offset += element_bitsize)
6758	{
6759	tree bitpos = bitsize_int (bit_offset);
6760	new_name = gimple_build (seq: &stmts, code: BIT_FIELD_REF,
6761	type: compute_type, ops: vec_temp,
6762	ops: bitsize, ops: bitpos);
6763	if (slp_reduc)
6764	{
6765	/ In SLP we don't need to apply reduction operation, so*
6766	we just collect s' values in SCALAR_RESULTS. /*
6767	new_temp = new_name;
6768	scalar_results.safe_push (obj: new_name);
6769	}
6770	else
6771	new_temp = gimple_build (seq: &stmts, code, type: compute_type,
6772	ops: new_name, ops: new_temp);
6773	}
6774	}
6775
6776	/ The only case where we need to reduce scalar results in SLP, is*
6777	unrolling. If the size of SCALAR_RESULTS is greater than
6778	REDUC_GROUP_SIZE, we reduce them combining elements modulo
6779	REDUC_GROUP_SIZE. /*
6780	if (slp_reduc)
6781	{
6782	tree res, first_res, new_res;
6783
6784	/ Reduce multiple scalar results in case of SLP unrolling. /
6785	for (j = group_size; scalar_results.iterate (ix: j, ptr: &res);
6786	j++)
6787	{
6788	first_res = scalar_results [j % group_size];
6789	new_res = gimple_build (seq: &stmts, code, type: compute_type,
6790	ops: first_res, ops: res);
6791	scalar_results [j % group_size] = new_res;
6792	}
6793	scalar_results.truncate (size: group_size);
6794	for (k = `0`; k < group_size; k++)
6795	scalar_results [k] = gimple_convert (seq: &stmts, type: scalar_type,
6796	op: scalar_results [k]);
6797	}
6798	else
6799	{
6800	/ Not SLP - we have one scalar to keep in SCALAR_RESULTS. /
6801	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6802	scalar_results.safe_push (obj: new_temp);
6803	}
6804
6805	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6806	}
6807
6808	if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
6809	&& induc_val)
6810	{
6811	/ Earlier we set the initial value to be a vector if induc_val*
6812	values. Check the result and if it is induc_val then replace
6813	with the original initial value, unless induc_val is
6814	the same as initial_def already. /*
6815	tree zcompare = make_ssa_name (boolean_type_node);
6816	epilog_stmt = gimple_build_assign (zcompare, EQ_EXPR, new_temp,
6817	induc_val);
6818	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6819	tree initial_def = reduc_info->reduc_initial_values [`0`];
6820	tree tmp = make_ssa_name (var: new_scalar_dest);
6821	epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
6822	initial_def, new_temp);
6823	gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
6824	scalar_results [`0`] = tmp;
6825	}
6826	}
6827
6828	/ 2.5 Adjust the final result by the initial value of the reduction*
6829	variable. (When such adjustment is not needed, then
6830	'adjustment_def' is zero). For example, if code is PLUS we create:
6831	new_temp = loop_exit_def + adjustment_def /*
6832
6833	if (adjustment_def)
6834	{
6835	gcc_assert (!slp_reduc);
6836	gimple_seq stmts = NULL;
6837	if (double_reduc)
6838	{
6839	gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
6840	adjustment_def = gimple_convert (seq: &stmts, type: vectype, op: adjustment_def);
6841	new_temp = gimple_build (seq: &stmts, code, type: vectype,
6842	ops: reduc_inputs [`0`], ops: adjustment_def);
6843	}
6844	else
6845	{
6846	new_temp = scalar_results [`0`];
6847	gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
6848	adjustment_def = gimple_convert (seq: &stmts, TREE_TYPE (vectype),
6849	op: adjustment_def);
6850	new_temp = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: new_temp);
6851	new_temp = gimple_build (seq: &stmts, code, TREE_TYPE (vectype),
6852	ops: new_temp, ops: adjustment_def);
6853	new_temp = gimple_convert (seq: &stmts, type: scalar_type, op: new_temp);
6854	}
6855
6856	epilog_stmt = gimple_seq_last_stmt (s: stmts);
6857	gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
6858	scalar_results [`0`] = new_temp;
6859	}
6860
6861	/ Record this operation if it could be reused by the epilogue loop. /
6862	if (STMT_VINFO_REDUC_TYPE (reduc_info) == TREE_CODE_REDUCTION
6863	&& reduc_inputs.length () == `1`)
6864	loop_vinfo->reusable_accumulators.put (k: scalar_results [`0`],
6865	v: { .reduc_input: orig_reduc_input, .reduc_info: reduc_info });
6866
6867	if (double_reduc)
6868	loop = outer_loop;
6869
6870	/ 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit*
6871	phis with new adjusted scalar results, i.e., replace use <s_out0>
6872	with use <s_out4>.
6873
6874	Transform:
6875	loop_exit:
6876	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
6877	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6878	v_out2 = reduce <v_out1>
6879	s_out3 = extract_field <v_out2, 0>
6880	s_out4 = adjust_result <s_out3>
6881	use <s_out0>
6882	use <s_out0>
6883
6884	into:
6885
6886	loop_exit:
6887	s_out0 = phi <s_loop> # (scalar) EXIT_PHI
6888	v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
6889	v_out2 = reduce <v_out1>
6890	s_out3 = extract_field <v_out2, 0>
6891	s_out4 = adjust_result <s_out3>
6892	use <s_out4>
6893	use <s_out4> /*
6894
6895	gcc_assert (live_out_stmts.size () == scalar_results.length ());
6896	auto_vec<gimple *> phis;
6897	for (k = `0`; k < live_out_stmts.size (); k++)
6898	{
6899	stmt_vec_info scalar_stmt_info = vect_orig_stmt (stmt_info: live_out_stmts [k]);
6900	scalar_dest = gimple_get_lhs (scalar_stmt_info->stmt);
6901
6902	/ Find the loop-closed-use at the loop exit of the original scalar*
6903	result. (The reduction result is expected to have two immediate uses,
6904	one at the latch block, and one at the loop exit). For double
6905	reductions we are looking for exit phis of the outer loop. /*
6906	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6907	{
6908	if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
6909	{
6910	if (!is_gimple_debug (USE_STMT (use_p))
6911	&& gimple_bb (USE_STMT (use_p)) == loop_exit->dest)
6912	phis.safe_push (USE_STMT (use_p));
6913	}
6914	else
6915	{
6916	if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
6917	{
6918	tree phi_res = PHI_RESULT (USE_STMT (use_p));
6919
6920	FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
6921	{
6922	if (!flow_bb_inside_loop_p (loop,
6923	gimple_bb (USE_STMT (phi_use_p)))
6924	&& !is_gimple_debug (USE_STMT (phi_use_p)))
6925	phis.safe_push (USE_STMT (phi_use_p));
6926	}
6927	}
6928	}
6929	}
6930
6931	FOR_EACH_VEC_ELT (phis, i, exit_phi)
6932	{
6933	/ Replace the uses: /
6934	orig_name = PHI_RESULT (exit_phi);
6935
6936	/ Look for a single use at the target of the skip edge. /
6937	if (unify_with_main_loop_p)
6938	{
6939	use_operand_p use_p;
6940	gimple *user;
6941	if (!single_imm_use (var: orig_name, use_p: &use_p, stmt: &user))
6942	gcc_unreachable ();
6943	orig_name = gimple_get_lhs (user);
6944	}
6945
6946	scalar_result = scalar_results [k];
6947	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
6948	{
6949	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6950	SET_USE (use_p, scalar_result);
6951	update_stmt (s: use_stmt);
6952	}
6953	}
6954
6955	phis.truncate (size: `0`);
6956	}
6957	}
6958
6959	/ Return a vector of type VECTYPE that is equal to the vector select*
6960	operation "MASK ? VEC : IDENTITY". Insert the select statements
6961	before GSI. /*
6962
6963	static tree
6964	merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
6965	tree vec, tree identity)
6966	{
6967	tree cond = make_temp_ssa_name (type: vectype, NULL, name: "cond");
6968	gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
6969	mask, vec, identity);
6970	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6971	return cond;
6972	}
6973
6974	/ Successively apply CODE to each element of VECTOR_RHS, in left-to-right*
6975	order, starting with LHS. Insert the extraction statements before GSI and
6976	associate the new scalar SSA names with variable SCALAR_DEST.
6977	If MASK is nonzero mask the input and then operate on it unconditionally.
6978	Return the SSA name for the result. /*
6979
6980	static tree
6981	vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
6982	tree_code code, tree lhs, tree vector_rhs,
6983	tree mask)
6984	{
6985	tree vectype = TREE_TYPE (vector_rhs);
6986	tree scalar_type = TREE_TYPE (vectype);
6987	tree bitsize = TYPE_SIZE (scalar_type);
6988	unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
6989	unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
6990
6991	/ Re-create a VEC_COND_EXPR to mask the input here in order to be able*
6992	to perform an unconditional element-wise reduction of it. /*
6993	if (mask)
6994	{
6995	tree masked_vector_rhs = make_temp_ssa_name (type: vectype, NULL,
6996	name: "masked_vector_rhs");
6997	tree neutral_op = neutral_op_for_reduction (scalar_type, code, NULL_TREE,
6998	as_initial: false);
6999	tree vector_identity = build_vector_from_val (vectype, neutral_op);
7000	gassign *select = gimple_build_assign (masked_vector_rhs, VEC_COND_EXPR,
7001	mask, vector_rhs, vector_identity);
7002	gsi_insert_before (gsi, select, GSI_SAME_STMT);
7003	vector_rhs = masked_vector_rhs;
7004	}
7005
7006	for (unsigned HOST_WIDE_INT bit_offset = `0`;
7007	bit_offset < vec_size_in_bits;
7008	bit_offset += element_bitsize)
7009	{
7010	tree bitpos = bitsize_int (bit_offset);
7011	tree rhs = build3 (BIT_FIELD_REF, scalar_type, vector_rhs,
7012	bitsize, bitpos);
7013
7014	gassign *stmt = gimple_build_assign (scalar_dest, rhs);
7015	rhs = make_ssa_name (var: scalar_dest, stmt);
7016	gimple_assign_set_lhs (gs: stmt, lhs: rhs);
7017	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
7018
7019	stmt = gimple_build_assign (scalar_dest, code, lhs, rhs);
7020	tree new_name = make_ssa_name (var: scalar_dest, stmt);
7021	gimple_assign_set_lhs (gs: stmt, lhs: new_name);
7022	gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
7023	lhs = new_name;
7024	}
7025	return lhs;
7026	}
7027
7028	/ Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the*
7029	type of the vector input. /*
7030
7031	static internal_fn
7032	get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in)
7033	{
7034	internal_fn mask_reduc_fn;
7035	internal_fn mask_len_reduc_fn;
7036
7037	switch (reduc_fn)
7038	{
7039	case IFN_FOLD_LEFT_PLUS:
7040	mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS;
7041	mask_len_reduc_fn = IFN_MASK_LEN_FOLD_LEFT_PLUS;
7042	break;
7043
7044	default:
7045	return IFN_LAST;
7046	}
7047
7048	if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in,
7049	OPTIMIZE_FOR_SPEED))
7050	return mask_reduc_fn;
7051	if (direct_internal_fn_supported_p (mask_len_reduc_fn, vectype_in,
7052	OPTIMIZE_FOR_SPEED))
7053	return mask_len_reduc_fn;
7054	return IFN_LAST;
7055	}
7056
7057	/ Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the*
7058	statement that sets the live-out value. REDUC_DEF_STMT is the phi
7059	statement. CODE is the operation performed by STMT_INFO and OPS are
7060	its scalar operands. REDUC_INDEX is the index of the operand in
7061	OPS that is set by REDUC_DEF_STMT. REDUC_FN is the function that
7062	implements in-order reduction, or IFN_LAST if we should open-code it.
7063	VECTYPE_IN is the type of the vector input. MASKS specifies the masks
7064	that should be used to control the operation in a fully-masked loop. /*
7065
7066	static bool
7067	vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
7068	stmt_vec_info stmt_info,
7069	gimple_stmt_iterator *gsi,
7070	gimple **vec_stmt, slp_tree slp_node,
7071	gimple *reduc_def_stmt,
7072	code_helper code, internal_fn reduc_fn,
7073	tree ops, int* num_ops, tree vectype_in,
7074	int reduc_index, vec_loop_masks *masks,
7075	vec_loop_lens *lens)
7076	{
7077	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7078	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7079	internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in);
7080
7081	int ncopies;
7082	if (slp_node)
7083	ncopies = `1`;
7084	else
7085	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
7086
7087	gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7088	gcc_assert (ncopies == `1`);
7089
7090	bool is_cond_op = false;
7091	if (!code.is_tree_code ())
7092	{
7093	code = conditional_internal_fn_code (internal_fn (code));
7094	gcc_assert (code != ERROR_MARK);
7095	is_cond_op = true;
7096	}
7097
7098	gcc_assert (TREE_CODE_LENGTH (tree_code (code)) == binary_op);
7099
7100	if (slp_node)
7101	{
7102	if (is_cond_op)
7103	{
7104	if (dump_enabled_p ())
7105	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7106	"fold-left reduction on SLP not supported.\n");
7107	return false;
7108	}
7109
7110	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
7111	TYPE_VECTOR_SUBPARTS (vectype_in)));
7112	}
7113
7114	/ The operands either come from a binary operation or an IFN_COND operation.*
7115	The former is a gimple assign with binary rhs and the latter is a
7116	gimple call with four arguments. /*
7117	gcc_assert (num_ops == `2` \|\| num_ops == `4`);
7118	tree op0, opmask;
7119	if (!is_cond_op)
7120	op0 = ops[`1` - reduc_index];
7121	else
7122	{
7123	op0 = ops[`2` + (`1` - reduc_index)];
7124	opmask = ops[`0`];
7125	gcc_assert (!slp_node);
7126	}
7127
7128	int group_size = `1`;
7129	stmt_vec_info scalar_dest_def_info;
7130	auto_vec<tree> vec_oprnds0, vec_opmask;
7131	if (slp_node)
7132	{
7133	auto_vec<vec<tree> > vec_defs (`2`);
7134	vect_get_slp_defs (loop_vinfo, slp_node, &vec_defs);
7135	vec_oprnds0.safe_splice (src: vec_defs [`1` - reduc_index]);
7136	vec_defs [`0`].release ();
7137	vec_defs [`1`].release ();
7138	group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
7139	scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - `1`];
7140	}
7141	else
7142	{
7143	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
7144	op: op0, &vec_oprnds0);
7145	scalar_dest_def_info = stmt_info;
7146
7147	/ For an IFN_COND_OP we also need the vector mask operand. /
7148	if (is_cond_op)
7149	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
7150	op: opmask, &vec_opmask);
7151	}
7152
7153	gimple *sdef = vect_orig_stmt (stmt_info: scalar_dest_def_info)->stmt;
7154	tree scalar_dest = gimple_get_lhs (sdef);
7155	tree scalar_type = TREE_TYPE (scalar_dest);
7156	tree reduc_var = gimple_phi_result (gs: reduc_def_stmt);
7157
7158	int vec_num = vec_oprnds0.length ();
7159	gcc_assert (vec_num == `1` \|\| slp_node);
7160	tree vec_elem_type = TREE_TYPE (vectype_out);
7161	gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type));
7162
7163	tree vector_identity = NULL_TREE;
7164	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
7165	{
7166	vector_identity = build_zero_cst (vectype_out);
7167	if (!HONOR_SIGNED_ZEROS (vectype_out))
7168	;
7169	else
7170	{
7171	gcc_assert (!HONOR_SIGN_DEPENDENT_ROUNDING (vectype_out));
7172	vector_identity = const_unop (NEGATE_EXPR, vectype_out,
7173	vector_identity);
7174	}
7175	}
7176
7177	tree scalar_dest_var = vect_create_destination_var (scalar_dest, NULL);
7178	int i;
7179	tree def0;
7180	FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
7181	{
7182	gimple *new_stmt;
7183	tree mask = NULL_TREE;
7184	tree len = NULL_TREE;
7185	tree bias = NULL_TREE;
7186	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
7187	mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i);
7188	else if (is_cond_op)
7189	mask = vec_opmask [`0`];
7190	if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
7191	{
7192	len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
7193	i, `1`);
7194	signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7195	bias = build_int_cst (intQI_type_node, biasval);
7196	if (!is_cond_op)
7197	mask = build_minus_one_cst (truth_type_for (vectype_in));
7198	}
7199
7200	/ Handle MINUS by adding the negative. /
7201	if (reduc_fn != IFN_LAST && code == MINUS_EXPR)
7202	{
7203	tree negated = make_ssa_name (var: vectype_out);
7204	new_stmt = gimple_build_assign (negated, NEGATE_EXPR, def0);
7205	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
7206	def0 = negated;
7207	}
7208
7209	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7210	&& mask && mask_reduc_fn == IFN_LAST)
7211	def0 = merge_with_identity (gsi, mask, vectype: vectype_out, vec: def0,
7212	identity: vector_identity);
7213
7214	/ On the first iteration the input is simply the scalar phi*
7215	result, and for subsequent iterations it is the output of
7216	the preceding operation. /*
7217	if (reduc_fn != IFN_LAST \|\| (mask && mask_reduc_fn != IFN_LAST))
7218	{
7219	if (mask && len && mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
7220	new_stmt = gimple_build_call_internal (mask_reduc_fn, `5`, reduc_var,
7221	def0, mask, len, bias);
7222	else if (mask && mask_reduc_fn == IFN_MASK_FOLD_LEFT_PLUS)
7223	new_stmt = gimple_build_call_internal (mask_reduc_fn, `3`, reduc_var,
7224	def0, mask);
7225	else
7226	new_stmt = gimple_build_call_internal (reduc_fn, `2`, reduc_var,
7227	def0);
7228	/ For chained SLP reductions the output of the previous reduction*
7229	operation serves as the input of the next. For the final statement
7230	the output cannot be a temporary - we reuse the original
7231	scalar destination of the last statement. /*
7232	if (i != vec_num - `1`)
7233	{
7234	gimple_set_lhs (new_stmt, scalar_dest_var);
7235	reduc_var = make_ssa_name (var: scalar_dest_var, stmt: new_stmt);
7236	gimple_set_lhs (new_stmt, reduc_var);
7237	}
7238	}
7239	else
7240	{
7241	reduc_var = vect_expand_fold_left (gsi, scalar_dest: scalar_dest_var,
7242	code: tree_code (code), lhs: reduc_var, vector_rhs: def0,
7243	mask);
7244	new_stmt = SSA_NAME_DEF_STMT (reduc_var);
7245	/ Remove the statement, so that we can use the same code paths*
7246	as for statements that we've just created. /*
7247	gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt);
7248	gsi_remove (&tmp_gsi, true);
7249	}
7250
7251	if (i == vec_num - `1`)
7252	{
7253	gimple_set_lhs (new_stmt, scalar_dest);
7254	vect_finish_replace_stmt (loop_vinfo,
7255	scalar_dest_def_info,
7256	new_stmt);
7257	}
7258	else
7259	vect_finish_stmt_generation (loop_vinfo,
7260	scalar_dest_def_info,
7261	new_stmt, gsi);
7262
7263	if (slp_node)
7264	slp_node->push_vec_def (def: new_stmt);
7265	else
7266	{
7267	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7268	*vec_stmt = new_stmt;
7269	}
7270	}
7271
7272	return true;
7273	}
7274
7275	/ Function is_nonwrapping_integer_induction.*
7276
7277	Check if STMT_VINO (which is part of loop LOOP) both increments and
7278	does not cause overflow. /*
7279
7280	static bool
7281	is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, class loop *loop)
7282	{
7283	gphi phi = as_a <gphi > (p: stmt_vinfo->stmt);
7284	tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
7285	tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
7286	tree lhs_type = TREE_TYPE (gimple_phi_result (phi));
7287	widest_int ni, max_loop_value, lhs_max;
7288	wi::overflow_type overflow = wi::OVF_NONE;
7289
7290	/ Make sure the loop is integer based. /
7291	if (TREE_CODE (base) != INTEGER_CST
7292	\|\| TREE_CODE (step) != INTEGER_CST)
7293	return false;
7294
7295	/ Check that the max size of the loop will not wrap. /
7296
7297	if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
7298	return true;
7299
7300	if (! max_stmt_executions (loop, &ni))
7301	return false;
7302
7303	max_loop_value = wi::mul (x: wi::to_widest (t: step), y: ni, TYPE_SIGN (lhs_type),
7304	overflow: &overflow);
7305	if (overflow)
7306	return false;
7307
7308	max_loop_value = wi::add (x: wi::to_widest (t: base), y: max_loop_value,
7309	TYPE_SIGN (lhs_type), overflow: &overflow);
7310	if (overflow)
7311	return false;
7312
7313	return (wi::min_precision (x: max_loop_value, TYPE_SIGN (lhs_type))
7314	<= TYPE_PRECISION (lhs_type));
7315	}
7316
7317	/ Check if masking can be supported by inserting a conditional expression.*
7318	CODE is the code for the operation. COND_FN is the conditional internal
7319	function, if it exists. VECTYPE_IN is the type of the vector input. /*
7320	static bool
7321	use_mask_by_cond_expr_p (code_helper code, internal_fn cond_fn,
7322	tree vectype_in)
7323	{
7324	if (cond_fn != IFN_LAST
7325	&& direct_internal_fn_supported_p (cond_fn, vectype_in,
7326	OPTIMIZE_FOR_SPEED))
7327	return false;
7328
7329	if (code.is_tree_code ())
7330	switch (tree_code (code))
7331	{
7332	case DOT_PROD_EXPR:
7333	case SAD_EXPR:
7334	return true;
7335
7336	default:
7337	break;
7338	}
7339	return false;
7340	}
7341
7342	/ Insert a conditional expression to enable masked vectorization. CODE is the*
7343	code for the operation. VOP is the array of operands. MASK is the loop
7344	mask. GSI is a statement iterator used to place the new conditional
7345	expression. /*
7346	static void
7347	build_vect_cond_expr (code_helper code, tree vop[`3`], tree mask,
7348	gimple_stmt_iterator *gsi)
7349	{
7350	switch (tree_code (code))
7351	{
7352	case DOT_PROD_EXPR:
7353	{
7354	tree vectype = TREE_TYPE (vop[`1`]);
7355	tree zero = build_zero_cst (vectype);
7356	tree masked_op1 = make_temp_ssa_name (type: vectype, NULL, name: "masked_op1");
7357	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
7358	mask, vop[`1`], zero);
7359	gsi_insert_before (gsi, select, GSI_SAME_STMT);
7360	vop[`1`] = masked_op1;
7361	break;
7362	}
7363
7364	case SAD_EXPR:
7365	{
7366	tree vectype = TREE_TYPE (vop[`1`]);
7367	tree masked_op1 = make_temp_ssa_name (type: vectype, NULL, name: "masked_op1");
7368	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
7369	mask, vop[`1`], vop[`0`]);
7370	gsi_insert_before (gsi, select, GSI_SAME_STMT);
7371	vop[`1`] = masked_op1;
7372	break;
7373	}
7374
7375	default:
7376	gcc_unreachable ();
7377	}
7378	}
7379
7380	/ Function vectorizable_reduction.*
7381
7382	Check if STMT_INFO performs a reduction operation that can be vectorized.
7383	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7384	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7385	Return true if STMT_INFO is vectorizable in this way.
7386
7387	This function also handles reduction idioms (patterns) that have been
7388	recognized in advance during vect_pattern_recog. In this case, STMT_INFO
7389	may be of this form:
7390	X = pattern_expr (arg0, arg1, ..., X)
7391	and its STMT_VINFO_RELATED_STMT points to the last stmt in the original
7392	sequence that had been detected and replaced by the pattern-stmt
7393	(STMT_INFO).
7394
7395	This function also handles reduction of condition expressions, for example:
7396	for (int i = 0; i < N; i++)
7397	if (a[i] < value)
7398	last = a[i];
7399	This is handled by vectorising the loop and creating an additional vector
7400	containing the loop indexes for which "a[i] < value" was true. In the
7401	function epilogue this is reduced to a single max value and then used to
7402	index into the vector of results.
7403
7404	In some cases of reduction patterns, the type of the reduction variable X is
7405	different than the type of the other arguments of STMT_INFO.
7406	In such cases, the vectype that is used when transforming STMT_INFO into
7407	a vector stmt is different than the vectype that is used to determine the
7408	vectorization factor, because it consists of a different number of elements
7409	than the actual number of elements that are being operated upon in parallel.
7410
7411	For example, consider an accumulation of shorts into an int accumulator.
7412	On some targets it's possible to vectorize this pattern operating on 8
7413	shorts at a time (hence, the vectype for purposes of determining the
7414	vectorization factor should be V8HI); on the other hand, the vectype that
7415	is used to create the vector form is actually V4SI (the type of the result).
7416
7417	Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
7418	indicates what is the actual level of parallelism (V8HI in the example), so
7419	that the right vectorization factor would be derived. This vectype
7420	corresponds to the type of arguments to the reduction stmt, and should NOT
7421	be used to create the vectorized stmt. The right vectype for the vectorized
7422	stmt is obtained from the type of the result X:
7423	get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
7424
7425	This means that, contrary to "regular" reductions (or "regular" stmts in
7426	general), the following equation:
7427	STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
7428	does NOT* necessarily hold for reduction patterns. /
7429
7430	bool
7431	vectorizable_reduction (loop_vec_info loop_vinfo,
7432	stmt_vec_info stmt_info, slp_tree slp_node,
7433	slp_instance slp_node_instance,
7434	stmt_vector_for_cost *cost_vec)
7435	{
7436	tree vectype_in = NULL_TREE;
7437	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7438	enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
7439	stmt_vec_info cond_stmt_vinfo = NULL;
7440	int i;
7441	int ncopies;
7442	bool single_defuse_cycle = false;
7443	bool nested_cycle = false;
7444	bool double_reduc = false;
7445	int vec_num;
7446	tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
7447	tree cond_reduc_val = NULL_TREE;
7448
7449	/ Make sure it was already recognized as a reduction computation. /
7450	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
7451	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
7452	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
7453	return false;
7454
7455	/ The stmt we store reduction analysis meta on. /
7456	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
7457	reduc_info->is_reduc_info = true;
7458
7459	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
7460	{
7461	if (is_a <gphi *> (p: stmt_info->stmt))
7462	{
7463	if (slp_node)
7464	{
7465	/ We eventually need to set a vector type on invariant*
7466	arguments. /*
7467	unsigned j;
7468	slp_tree child;
7469	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
7470	if (!vect_maybe_update_slp_op_vectype
7471	(child, SLP_TREE_VECTYPE (slp_node)))
7472	{
7473	if (dump_enabled_p ())
7474	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7475	"incompatible vector types for "
7476	"invariants\n");
7477	return false;
7478	}
7479	}
7480	/ Analysis for double-reduction is done on the outer*
7481	loop PHI, nested cycles have no further restrictions. /*
7482	STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
7483	}
7484	else
7485	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
7486	return true;
7487	}
7488
7489	stmt_vec_info orig_stmt_of_analysis = stmt_info;
7490	stmt_vec_info phi_info = stmt_info;
7491	if (!is_a <gphi *> (p: stmt_info->stmt))
7492	{
7493	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
7494	return true;
7495	}
7496	if (slp_node)
7497	{
7498	slp_node_instance->reduc_phis = slp_node;
7499	/ ??? We're leaving slp_node to point to the PHIs, we only*
7500	need it to get at the number of vector stmts which wasn't
7501	yet initialized for the instance root. /*
7502	}
7503	if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
7504	{
7505	use_operand_p use_p;
7506	gimple *use_stmt;
7507	bool res = single_imm_use (var: gimple_phi_result (gs: stmt_info->stmt),
7508	use_p: &use_p, stmt: &use_stmt);
7509	gcc_assert (res);
7510	phi_info = loop_vinfo->lookup_stmt (use_stmt);
7511	}
7512
7513	/ PHIs should not participate in patterns. /
7514	gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
7515	gphi reduc_def_phi = as_a <gphi > (p: phi_info->stmt);
7516
7517	/ Verify following REDUC_IDX from the latch def leads us back to the PHI*
7518	and compute the reduction chain length. Discover the real
7519	reduction operation stmt on the way (stmt_info and slp_for_stmt_info). /*
7520	tree reduc_def
7521	= PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
7522	loop_latch_edge
7523	(gimple_bb (reduc_def_phi)->loop_father));
7524	unsigned reduc_chain_length = `0`;
7525	bool only_slp_reduc_chain = true;
7526	stmt_info = NULL;
7527	slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL;
7528	while (reduc_def != PHI_RESULT (reduc_def_phi))
7529	{
7530	stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
7531	stmt_vec_info vdef = vect_stmt_to_vectorize (stmt_info: def);
7532	if (STMT_VINFO_REDUC_IDX (vdef) == -`1`)
7533	{
7534	if (dump_enabled_p ())
7535	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7536	"reduction chain broken by patterns.\n");
7537	return false;
7538	}
7539	if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
7540	only_slp_reduc_chain = false;
7541	/ For epilogue generation live members of the chain need*
7542	to point back to the PHI via their original stmt for
7543	info_for_reduction to work. For SLP we need to look at
7544	all lanes here - even though we only will vectorize from
7545	the SLP node with live lane zero the other live lanes also
7546	need to be identified as part of a reduction to be able
7547	to skip code generation for them. /*
7548	if (slp_for_stmt_info)
7549	{
7550	for (auto s : SLP_TREE_SCALAR_STMTS (slp_for_stmt_info))
7551	if (STMT_VINFO_LIVE_P (s))
7552	STMT_VINFO_REDUC_DEF (vect_orig_stmt (s)) = phi_info;
7553	}
7554	else if (STMT_VINFO_LIVE_P (vdef))
7555	STMT_VINFO_REDUC_DEF (def) = phi_info;
7556	gimple_match_op op;
7557	if (!gimple_extract_op (vdef->stmt, &op))
7558	{
7559	if (dump_enabled_p ())
7560	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7561	"reduction chain includes unsupported"
7562	" statement type.\n");
7563	return false;
7564	}
7565	if (CONVERT_EXPR_CODE_P (op.code))
7566	{
7567	if (!tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[`0`])))
7568	{
7569	if (dump_enabled_p ())
7570	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7571	"conversion in the reduction chain.\n");
7572	return false;
7573	}
7574	}
7575	else if (!stmt_info)
7576	/ First non-conversion stmt. /
7577	stmt_info = vdef;
7578	reduc_def = op.ops[STMT_VINFO_REDUC_IDX (vdef)];
7579	reduc_chain_length++;
7580	if (!stmt_info && slp_node)
7581	slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[`0`];
7582	}
7583	/ PHIs should not participate in patterns. /
7584	gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
7585
7586	if (nested_in_vect_loop_p (loop, stmt_info))
7587	{
7588	loop = loop->inner;
7589	nested_cycle = true;
7590	}
7591
7592	/ STMT_VINFO_REDUC_DEF doesn't point to the first but the last*
7593	element. /*
7594	if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7595	{
7596	gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
7597	stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
7598	}
7599	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7600	gcc_assert (slp_node
7601	&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
7602
7603	/ 1. Is vectorizable reduction? /
7604	/ Not supportable if the reduction variable is used in the loop, unless*
7605	it's a reduction chain. /*
7606	if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
7607	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
7608	return false;
7609
7610	/ Reductions that are not used even in an enclosing outer-loop,*
7611	are expected to be "live" (used out of the loop). /*
7612	if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
7613	&& !STMT_VINFO_LIVE_P (stmt_info))
7614	return false;
7615
7616	/ 2. Has this been recognized as a reduction pattern?*
7617
7618	Check if STMT represents a pattern that has been recognized
7619	in earlier analysis stages. For stmts that represent a pattern,
7620	the STMT_VINFO_RELATED_STMT field records the last stmt in
7621	the original sequence that constitutes the pattern. /*
7622
7623	stmt_vec_info orig_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
7624	if (orig_stmt_info)
7625	{
7626	gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
7627	gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
7628	}
7629
7630	/ 3. Check the operands of the operation. The first operands are defined*
7631	inside the loop body. The last operand is the reduction variable,
7632	which is defined by the loop-header-phi. /*
7633
7634	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7635	STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
7636	gimple_match_op op;
7637	if (!gimple_extract_op (stmt_info->stmt, &op))
7638	gcc_unreachable ();
7639	bool lane_reduc_code_p = (op.code == DOT_PROD_EXPR
7640	\|\| op.code == WIDEN_SUM_EXPR
7641	\|\| op.code == SAD_EXPR);
7642
7643	if (!POINTER_TYPE_P (op.type) && !INTEGRAL_TYPE_P (op.type)
7644	&& !SCALAR_FLOAT_TYPE_P (op.type))
7645	return false;
7646
7647	/ Do not try to vectorize bit-precision reductions. /
7648	if (!type_has_mode_precision_p (t: op.type))
7649	return false;
7650
7651	/ For lane-reducing ops we're reducing the number of reduction PHIs*
7652	which means the only use of that may be in the lane-reducing operation. /*
7653	if (lane_reduc_code_p
7654	&& reduc_chain_length != `1`
7655	&& !only_slp_reduc_chain)
7656	{
7657	if (dump_enabled_p ())
7658	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7659	"lane-reducing reduction with extra stmts.\n");
7660	return false;
7661	}
7662
7663	/ All uses but the last are expected to be defined in the loop.*
7664	The last use is the reduction variable. In case of nested cycle this
7665	assumption is not true: we use reduc_index to record the index of the
7666	reduction variable. /*
7667	slp_tree *slp_op = XALLOCAVEC (slp_tree, op.num_ops);
7668	tree *vectype_op = XALLOCAVEC (tree, op.num_ops);
7669	/ We need to skip an extra operand for COND_EXPRs with embedded*
7670	comparison. /*
7671	unsigned opno_adjust = `0`;
7672	if (op.code == COND_EXPR && COMPARISON_CLASS_P (op.ops[`0`]))
7673	opno_adjust = `1`;
7674	for (i = `0`; i < (int) op.num_ops; i++)
7675	{
7676	/ The condition of COND_EXPR is checked in vectorizable_condition(). /
7677	if (i == `0` && op.code == COND_EXPR)
7678	continue;
7679
7680	stmt_vec_info def_stmt_info;
7681	enum vect_def_type dt;
7682	if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
7683	i + opno_adjust, &op.ops[i], &slp_op[i], &dt,
7684	&vectype_op[i], &def_stmt_info))
7685	{
7686	if (dump_enabled_p ())
7687	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7688	"use not simple.\n");
7689	return false;
7690	}
7691	if (i == STMT_VINFO_REDUC_IDX (stmt_info))
7692	continue;
7693
7694	/ For an IFN_COND_OP we might hit the reduction definition operand*
7695	twice (once as definition, once as else). /*
7696	if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)])
7697	continue;
7698
7699	/ There should be only one cycle def in the stmt, the one*
7700	leading to reduc_def. /*
7701	if (VECTORIZABLE_CYCLE_DEF (dt))
7702	return false;
7703
7704	if (!vectype_op[i])
7705	vectype_op[i]
7706	= get_vectype_for_scalar_type (loop_vinfo,
7707	TREE_TYPE (op.ops[i]), slp_op[i]);
7708
7709	/ To properly compute ncopies we are interested in the widest*
7710	non-reduction input type in case we're looking at a widening
7711	accumulation that we later handle in vect_transform_reduction. /*
7712	if (lane_reduc_code_p
7713	&& vectype_op[i]
7714	&& (!vectype_in
7715	\|\| (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
7716	< GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_op[i]))))))
7717	vectype_in = vectype_op[i];
7718
7719	/ Record how the non-reduction-def value of COND_EXPR is defined.*
7720	??? For a chain of multiple CONDs we'd have to match them up all. /*
7721	if (op.code == COND_EXPR && reduc_chain_length == `1`)
7722	{
7723	if (dt == vect_constant_def)
7724	{
7725	cond_reduc_dt = dt;
7726	cond_reduc_val = op.ops[i];
7727	}
7728	else if (dt == vect_induction_def
7729	&& def_stmt_info
7730	&& is_nonwrapping_integer_induction (stmt_vinfo: def_stmt_info, loop))
7731	{
7732	cond_reduc_dt = dt;
7733	cond_stmt_vinfo = def_stmt_info;
7734	}
7735	}
7736	}
7737	if (!vectype_in)
7738	vectype_in = STMT_VINFO_VECTYPE (phi_info);
7739	STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
7740
7741	enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
7742	STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
7743	/ If we have a condition reduction, see if we can simplify it further. /
7744	if (v_reduc_type == COND_REDUCTION)
7745	{
7746	if (slp_node)
7747	return false;
7748
7749	/ When the condition uses the reduction value in the condition, fail. /
7750	if (STMT_VINFO_REDUC_IDX (stmt_info) == `0`)
7751	{
7752	if (dump_enabled_p ())
7753	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7754	"condition depends on previous iteration\n");
7755	return false;
7756	}
7757
7758	if (reduc_chain_length == `1`
7759	&& (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, vectype_in,
7760	OPTIMIZE_FOR_SPEED)
7761	\|\| direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
7762	vectype_in,
7763	OPTIMIZE_FOR_SPEED)))
7764	{
7765	if (dump_enabled_p ())
7766	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7767	"optimizing condition reduction with"
7768	" FOLD_EXTRACT_LAST.\n");
7769	STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
7770	}
7771	else if (cond_reduc_dt == vect_induction_def)
7772	{
7773	tree base
7774	= STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
7775	tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo);
7776
7777	gcc_assert (TREE_CODE (base) == INTEGER_CST
7778	&& TREE_CODE (step) == INTEGER_CST);
7779	cond_reduc_val = NULL_TREE;
7780	enum tree_code cond_reduc_op_code = ERROR_MARK;
7781	tree res = PHI_RESULT (STMT_VINFO_STMT (cond_stmt_vinfo));
7782	if (!types_compatible_p (TREE_TYPE (res), TREE_TYPE (base)))
7783	;
7784	/ Find a suitable value, for MAX_EXPR below base, for MIN_EXPR*
7785	above base; punt if base is the minimum value of the type for
7786	MAX_EXPR or maximum value of the type for MIN_EXPR for now. /*
7787	else if (tree_int_cst_sgn (step) == -`1`)
7788	{
7789	cond_reduc_op_code = MIN_EXPR;
7790	if (tree_int_cst_sgn (base) == -`1`)
7791	cond_reduc_val = build_int_cst (TREE_TYPE (base), `0`);
7792	else if (tree_int_cst_lt (t1: base,
7793	TYPE_MAX_VALUE (TREE_TYPE (base))))
7794	cond_reduc_val
7795	= int_const_binop (PLUS_EXPR, base, integer_one_node);
7796	}
7797	else
7798	{
7799	cond_reduc_op_code = MAX_EXPR;
7800	if (tree_int_cst_sgn (base) == `1`)
7801	cond_reduc_val = build_int_cst (TREE_TYPE (base), `0`);
7802	else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)),
7803	t2: base))
7804	cond_reduc_val
7805	= int_const_binop (MINUS_EXPR, base, integer_one_node);
7806	}
7807	if (cond_reduc_val)
7808	{
7809	if (dump_enabled_p ())
7810	dump_printf_loc (MSG_NOTE, vect_location,
7811	"condition expression based on "
7812	"integer induction.\n");
7813	STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
7814	STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
7815	= cond_reduc_val;
7816	STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
7817	}
7818	}
7819	else if (cond_reduc_dt == vect_constant_def)
7820	{
7821	enum vect_def_type cond_initial_dt;
7822	tree cond_initial_val = vect_phi_initial_value (phi: reduc_def_phi);
7823	vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
7824	if (cond_initial_dt == vect_constant_def
7825	&& types_compatible_p (TREE_TYPE (cond_initial_val),
7826	TREE_TYPE (cond_reduc_val)))
7827	{
7828	tree e = fold_binary (LE_EXPR, boolean_type_node,
7829	cond_initial_val, cond_reduc_val);
7830	if (e && (integer_onep (e) \|\| integer_zerop (e)))
7831	{
7832	if (dump_enabled_p ())
7833	dump_printf_loc (MSG_NOTE, vect_location,
7834	"condition expression based on "
7835	"compile time constant.\n");
7836	/ Record reduction code at analysis stage. /
7837	STMT_VINFO_REDUC_CODE (reduc_info)
7838	= integer_onep (e) ? MAX_EXPR : MIN_EXPR;
7839	STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
7840	}
7841	}
7842	}
7843	}
7844
7845	if (STMT_VINFO_LIVE_P (phi_info))
7846	return false;
7847
7848	if (slp_node)
7849	ncopies = `1`;
7850	else
7851	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
7852
7853	gcc_assert (ncopies >= `1`);
7854
7855	poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
7856
7857	if (nested_cycle)
7858	{
7859	gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
7860	== vect_double_reduction_def);
7861	double_reduc = true;
7862	}
7863
7864	/ 4.2. Check support for the epilog operation.*
7865
7866	If STMT represents a reduction pattern, then the type of the
7867	reduction variable may be different than the type of the rest
7868	of the arguments. For example, consider the case of accumulation
7869	of shorts into an int accumulator; The original code:
7870	S1: int_a = (int) short_a;
7871	orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
7872
7873	was replaced with:
7874	STMT: int_acc = widen_sum <short_a, int_acc>
7875
7876	This means that:
7877	1. The tree-code that is used to create the vector operation in the
7878	epilog code (that reduces the partial results) is not the
7879	tree-code of STMT, but is rather the tree-code of the original
7880	stmt from the pattern that STMT is replacing. I.e, in the example
7881	above we want to use 'widen_sum' in the loop, but 'plus' in the
7882	epilog.
7883	2. The type (mode) we use to check available target support
7884	for the vector operation to be created in the epilog, is
7885	determined by the type of the reduction variable (in the example
7886	above we'd check this: optab_handler (plus_optab, vect_int_mode])).
7887	However the type (mode) we use to check available target support
7888	for the vector operation to be created inside the loop, is
7889	determined by the type of the other arguments to STMT (in the
7890	example we'd check this: optab_handler (widen_sum_optab,
7891	vect_short_mode)).
7892
7893	This is contrary to "regular" reductions, in which the types of all
7894	the arguments are the same as the type of the reduction variable.
7895	For "regular" reductions we can therefore use the same vector type
7896	(and also the same tree-code) when generating the epilog code and
7897	when generating the code inside the loop. /*
7898
7899	code_helper orig_code = STMT_VINFO_REDUC_CODE (phi_info);
7900
7901	/ If conversion might have created a conditional operation like*
7902	IFN_COND_ADD already. Use the internal code for the following checks. /*
7903	if (orig_code.is_internal_fn ())
7904	{
7905	tree_code new_code = conditional_internal_fn_code (internal_fn (orig_code));
7906	orig_code = new_code != ERROR_MARK ? new_code : orig_code;
7907	}
7908
7909	STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
7910
7911	vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
7912	if (reduction_type == TREE_CODE_REDUCTION)
7913	{
7914	/ Check whether it's ok to change the order of the computation.*
7915	Generally, when vectorizing a reduction we change the order of the
7916	computation. This may change the behavior of the program in some
7917	cases, so we need to check that this is ok. One exception is when
7918	vectorizing an outer-loop: the inner-loop is executed sequentially,
7919	and therefore vectorizing reductions in the inner-loop during
7920	outer-loop vectorization is safe. Likewise when we are vectorizing
7921	a series of reductions using SLP and the VF is one the reductions
7922	are performed in scalar order. /*
7923	if (slp_node
7924	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
7925	&& known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), `1u`))
7926	;
7927	else if (needs_fold_left_reduction_p (type: op.type, code: orig_code))
7928	{
7929	/ When vectorizing a reduction chain w/o SLP the reduction PHI*
7930	is not directy used in stmt. /*
7931	if (!only_slp_reduc_chain
7932	&& reduc_chain_length != `1`)
7933	{
7934	if (dump_enabled_p ())
7935	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7936	"in-order reduction chain without SLP.\n");
7937	return false;
7938	}
7939	STMT_VINFO_REDUC_TYPE (reduc_info)
7940	= reduction_type = FOLD_LEFT_REDUCTION;
7941	}
7942	else if (!commutative_binary_op_p (orig_code, op.type)
7943	\|\| !associative_binary_op_p (orig_code, op.type))
7944	{
7945	if (dump_enabled_p ())
7946	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7947	"reduction: not commutative/associative\n");
7948	return false;
7949	}
7950	}
7951
7952	if ((double_reduc \|\| reduction_type != TREE_CODE_REDUCTION)
7953	&& ncopies > `1`)
7954	{
7955	if (dump_enabled_p ())
7956	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7957	"multiple types in double reduction or condition "
7958	"reduction or fold-left reduction.\n");
7959	return false;
7960	}
7961
7962	internal_fn reduc_fn = IFN_LAST;
7963	if (reduction_type == TREE_CODE_REDUCTION
7964	\|\| reduction_type == FOLD_LEFT_REDUCTION
7965	\|\| reduction_type == INTEGER_INDUC_COND_REDUCTION
7966	\|\| reduction_type == CONST_COND_REDUCTION)
7967	{
7968	if (reduction_type == FOLD_LEFT_REDUCTION
7969	? fold_left_reduction_fn (code: orig_code, reduc_fn: &reduc_fn)
7970	: reduction_fn_for_scalar_code (code: orig_code, reduc_fn: &reduc_fn))
7971	{
7972	if (reduc_fn != IFN_LAST
7973	&& !direct_internal_fn_supported_p (reduc_fn, vectype_out,
7974	OPTIMIZE_FOR_SPEED))
7975	{
7976	if (dump_enabled_p ())
7977	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7978	"reduc op not supported by target.\n");
7979
7980	reduc_fn = IFN_LAST;
7981	}
7982	}
7983	else
7984	{
7985	if (!nested_cycle \|\| double_reduc)
7986	{
7987	if (dump_enabled_p ())
7988	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7989	"no reduc code for scalar code.\n");
7990
7991	return false;
7992	}
7993	}
7994	}
7995	else if (reduction_type == COND_REDUCTION)
7996	{
7997	int scalar_precision
7998	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (op.type));
7999	cr_index_scalar_type = make_unsigned_type (scalar_precision);
8000	cr_index_vector_type = get_same_sized_vectype (cr_index_scalar_type,
8001	vectype_out);
8002
8003	if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
8004	OPTIMIZE_FOR_SPEED))
8005	reduc_fn = IFN_REDUC_MAX;
8006	}
8007	STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
8008
8009	if (reduction_type != EXTRACT_LAST_REDUCTION
8010	&& (!nested_cycle \|\| double_reduc)
8011	&& reduc_fn == IFN_LAST
8012	&& !nunits_out.is_constant ())
8013	{
8014	if (dump_enabled_p ())
8015	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8016	"missing target support for reduction on"
8017	" variable-length vectors.\n");
8018	return false;
8019	}
8020
8021	/ For SLP reductions, see if there is a neutral value we can use. /
8022	tree neutral_op = NULL_TREE;
8023	if (slp_node)
8024	{
8025	tree initial_value = NULL_TREE;
8026	if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL)
8027	initial_value = vect_phi_initial_value (phi: reduc_def_phi);
8028	neutral_op = neutral_op_for_reduction (TREE_TYPE (vectype_out),
8029	code: orig_code, initial_value);
8030	}
8031
8032	if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
8033	{
8034	/ We can't support in-order reductions of code such as this:*
8035
8036	for (int i = 0; i < n1; ++i)
8037	for (int j = 0; j < n2; ++j)
8038	l += a[j];
8039
8040	since GCC effectively transforms the loop when vectorizing:
8041
8042	for (int i = 0; i < n1 / VF; ++i)
8043	for (int j = 0; j < n2; ++j)
8044	for (int k = 0; k < VF; ++k)
8045	l += a[j];
8046
8047	which is a reassociation of the original operation. /*
8048	if (dump_enabled_p ())
8049	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8050	"in-order double reduction not supported.\n");
8051
8052	return false;
8053	}
8054
8055	if (reduction_type == FOLD_LEFT_REDUCTION
8056	&& slp_node
8057	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8058	{
8059	/ We cannot use in-order reductions in this case because there is*
8060	an implicit reassociation of the operations involved. /*
8061	if (dump_enabled_p ())
8062	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8063	"in-order unchained SLP reductions not supported.\n");
8064	return false;
8065	}
8066
8067	/ For double reductions, and for SLP reductions with a neutral value,*
8068	we construct a variable-length initial vector by loading a vector
8069	full of the neutral value and then shift-and-inserting the start
8070	values into the low-numbered elements. /*
8071	if ((double_reduc \|\| neutral_op)
8072	&& !nunits_out.is_constant ()
8073	&& !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
8074	vectype_out, OPTIMIZE_FOR_SPEED))
8075	{
8076	if (dump_enabled_p ())
8077	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8078	"reduction on variable-length vectors requires"
8079	" target support for a vector-shift-and-insert"
8080	" operation.\n");
8081	return false;
8082	}
8083
8084	/ Check extra constraints for variable-length unchained SLP reductions. /
8085	if (slp_node
8086	&& !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
8087	&& !nunits_out.is_constant ())
8088	{
8089	/ We checked above that we could build the initial vector when*
8090	there's a neutral element value. Check here for the case in
8091	which each SLP statement has its own initial value and in which
8092	that value needs to be repeated for every instance of the
8093	statement within the initial vector. /*
8094	unsigned int group_size = SLP_TREE_LANES (slp_node);
8095	if (!neutral_op
8096	&& !can_duplicate_and_interleave_p (loop_vinfo, group_size,
8097	TREE_TYPE (vectype_out)))
8098	{
8099	if (dump_enabled_p ())
8100	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8101	"unsupported form of SLP reduction for"
8102	" variable-length vectors: cannot build"
8103	" initial vector.\n");
8104	return false;
8105	}
8106	/ The epilogue code relies on the number of elements being a multiple*
8107	of the group size. The duplicate-and-interleave approach to setting
8108	up the initial vector does too. /*
8109	if (!multiple_p (a: nunits_out, b: group_size))
8110	{
8111	if (dump_enabled_p ())
8112	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8113	"unsupported form of SLP reduction for"
8114	" variable-length vectors: the vector size"
8115	" is not a multiple of the number of results.\n");
8116	return false;
8117	}
8118	}
8119
8120	if (reduction_type == COND_REDUCTION)
8121	{
8122	widest_int ni;
8123
8124	if (! max_loop_iterations (loop, &ni))
8125	{
8126	if (dump_enabled_p ())
8127	dump_printf_loc (MSG_NOTE, vect_location,
8128	"loop count not known, cannot create cond "
8129	"reduction.\n");
8130	return false;
8131	}
8132	/ Convert backedges to iterations. /
8133	ni += `1`;
8134
8135	/ The additional index will be the same type as the condition. Check*
8136	that the loop can fit into this less one (because we'll use up the
8137	zero slot for when there are no matches). /*
8138	tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
8139	if (wi::geu_p (x: ni, y: wi::to_widest (t: max_index)))
8140	{
8141	if (dump_enabled_p ())
8142	dump_printf_loc (MSG_NOTE, vect_location,
8143	"loop size is greater than data size.\n");
8144	return false;
8145	}
8146	}
8147
8148	/ In case the vectorization factor (VF) is bigger than the number*
8149	of elements that we can fit in a vectype (nunits), we have to generate
8150	more than one vector stmt - i.e - we need to "unroll" the
8151	vector stmt by a factor VF/nunits. For more details see documentation
8152	in vectorizable_operation. /*
8153
8154	/ If the reduction is used in an outer loop we need to generate*
8155	VF intermediate results, like so (e.g. for ncopies=2):
8156	r0 = phi (init, r0)
8157	r1 = phi (init, r1)
8158	r0 = x0 + r0;
8159	r1 = x1 + r1;
8160	(i.e. we generate VF results in 2 registers).
8161	In this case we have a separate def-use cycle for each copy, and therefore
8162	for each copy we get the vector def for the reduction variable from the
8163	respective phi node created for this copy.
8164
8165	Otherwise (the reduction is unused in the loop nest), we can combine
8166	together intermediate results, like so (e.g. for ncopies=2):
8167	r = phi (init, r)
8168	r = x0 + r;
8169	r = x1 + r;
8170	(i.e. we generate VF/2 results in a single register).
8171	In this case for each copy we get the vector def for the reduction variable
8172	from the vectorized reduction operation generated in the previous iteration.
8173
8174	This only works when we see both the reduction PHI and its only consumer
8175	in vectorizable_reduction and there are no intermediate stmts
8176	participating. When unrolling we want each unrolled iteration to have its
8177	own reduction accumulator since one of the main goals of unrolling a
8178	reduction is to reduce the aggregate loop-carried latency. /*
8179	if (ncopies > `1`
8180	&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
8181	&& reduc_chain_length == `1`
8182	&& loop_vinfo->suggested_unroll_factor == `1`)
8183	single_defuse_cycle = true;
8184
8185	if (single_defuse_cycle \|\| lane_reduc_code_p)
8186	{
8187	gcc_assert (op.code != COND_EXPR);
8188
8189	/ 4. Supportable by target? /
8190	bool ok = true;
8191
8192	/ 4.1. check support for the operation in the loop*
8193
8194	This isn't necessary for the lane reduction codes, since they
8195	can only be produced by pattern matching, and it's up to the
8196	pattern matcher to test for support. The main reason for
8197	specifically skipping this step is to avoid rechecking whether
8198	mixed-sign dot-products can be implemented using signed
8199	dot-products. /*
8200	machine_mode vec_mode = TYPE_MODE (vectype_in);
8201	if (!lane_reduc_code_p
8202	&& !directly_supported_p (op.code, vectype_in, optab_vector))
8203	{
8204	if (dump_enabled_p ())
8205	dump_printf (MSG_NOTE, "op not supported by target.\n");
8206	if (maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD)
8207	\|\| !vect_can_vectorize_without_simd_p (op.code))
8208	ok = false;
8209	else
8210	if (dump_enabled_p ())
8211	dump_printf (MSG_NOTE, "proceeding using word mode.\n");
8212	}
8213
8214	if (vect_emulated_vector_p (vectype_in)
8215	&& !vect_can_vectorize_without_simd_p (op.code))
8216	{
8217	if (dump_enabled_p ())
8218	dump_printf (MSG_NOTE, "using word mode not possible.\n");
8219	return false;
8220	}
8221
8222	/ lane-reducing operations have to go through vect_transform_reduction.*
8223	For the other cases try without the single cycle optimization. /*
8224	if (!ok)
8225	{
8226	if (lane_reduc_code_p)
8227	return false;
8228	else
8229	single_defuse_cycle = false;
8230	}
8231	}
8232	STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
8233
8234	/ If the reduction stmt is one of the patterns that have lane*
8235	reduction embedded we cannot handle the case of ! single_defuse_cycle. /*
8236	if ((ncopies > `1` && ! single_defuse_cycle)
8237	&& lane_reduc_code_p)
8238	{
8239	if (dump_enabled_p ())
8240	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8241	"multi def-use cycle not possible for lane-reducing "
8242	"reduction operation\n");
8243	return false;
8244	}
8245
8246	if (slp_node
8247	&& !(!single_defuse_cycle
8248	&& !lane_reduc_code_p
8249	&& reduction_type != FOLD_LEFT_REDUCTION))
8250	for (i = `0`; i < (int) op.num_ops; i++)
8251	if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_op[i]))
8252	{
8253	if (dump_enabled_p ())
8254	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8255	"incompatible vector types for invariants\n");
8256	return false;
8257	}
8258
8259	if (slp_node)
8260	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8261	else
8262	vec_num = `1`;
8263
8264	vect_model_reduction_cost (loop_vinfo, stmt_info, reduc_fn,
8265	reduction_type, ncopies, cost_vec);
8266	/ Cost the reduction op inside the loop if transformed via*
8267	vect_transform_reduction. Otherwise this is costed by the
8268	separate vectorizable_ routines. /
8269	if (single_defuse_cycle \|\| lane_reduc_code_p)
8270	{
8271	int factor = `1`;
8272	if (vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info))
8273	/ Three dot-products and a subtraction. /
8274	factor = `4`;
8275	record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * factor, kind: vector_stmt,
8276	stmt_info, misalign: `0`, where: vect_body);
8277	}
8278
8279	if (dump_enabled_p ()
8280	&& reduction_type == FOLD_LEFT_REDUCTION)
8281	dump_printf_loc (MSG_NOTE, vect_location,
8282	"using an in-order (fold-left) reduction.\n");
8283	STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
8284	/ All but single defuse-cycle optimized, lane-reducing and fold-left*
8285	reductions go through their own vectorizable_ routines. /
8286	if (!single_defuse_cycle
8287	&& !lane_reduc_code_p
8288	&& reduction_type != FOLD_LEFT_REDUCTION)
8289	{
8290	stmt_vec_info tem
8291	= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
8292	if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
8293	{
8294	gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
8295	tem = REDUC_GROUP_FIRST_ELEMENT (tem);
8296	}
8297	STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
8298	STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
8299	}
8300	else if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8301	{
8302	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
8303	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
8304	internal_fn cond_fn = get_conditional_internal_fn (op.code, op.type);
8305
8306	if (reduction_type != FOLD_LEFT_REDUCTION
8307	&& !use_mask_by_cond_expr_p (code: op.code, cond_fn, vectype_in)
8308	&& (cond_fn == IFN_LAST
8309	\|\| !direct_internal_fn_supported_p (cond_fn, vectype_in,
8310	OPTIMIZE_FOR_SPEED)))
8311	{
8312	if (dump_enabled_p ())
8313	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8314	"can't operate on partial vectors because"
8315	" no conditional operation is available.\n");
8316	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8317	}
8318	else if (reduction_type == FOLD_LEFT_REDUCTION
8319	&& reduc_fn == IFN_LAST
8320	&& !expand_vec_cond_expr_p (vectype_in,
8321	truth_type_for (vectype_in),
8322	SSA_NAME))
8323	{
8324	if (dump_enabled_p ())
8325	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8326	"can't operate on partial vectors because"
8327	" no conditional operation is available.\n");
8328	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8329	}
8330	else if (reduction_type == FOLD_LEFT_REDUCTION
8331	&& internal_fn_mask_index (reduc_fn) == -`1`
8332	&& FLOAT_TYPE_P (vectype_in)
8333	&& HONOR_SIGN_DEPENDENT_ROUNDING (vectype_in))
8334	{
8335	if (dump_enabled_p ())
8336	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8337	"can't operate on partial vectors because"
8338	" signed zeros cannot be preserved.\n");
8339	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
8340	}
8341	else
8342	{
8343	internal_fn mask_reduc_fn
8344	= get_masked_reduction_fn (reduc_fn, vectype_in);
8345
8346	if (mask_reduc_fn == IFN_MASK_LEN_FOLD_LEFT_PLUS)
8347	vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num,
8348	vectype_in, `1`);
8349	else
8350	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
8351	vectype_in, NULL);
8352	}
8353	}
8354	return true;
8355	}
8356
8357	/ STMT_INFO is a dot-product reduction whose multiplication operands*
8358	have different signs. Emit a sequence to emulate the operation
8359	using a series of signed DOT_PROD_EXPRs and return the last
8360	statement generated. VEC_DEST is the result of the vector operation
8361	and VOP lists its inputs. /*
8362
8363	static gassign *
8364	vect_emulate_mixed_dot_prod (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
8365	gimple_stmt_iterator *gsi, tree vec_dest,
8366	tree vop[`3`])
8367	{
8368	tree wide_vectype = signed_type_for (TREE_TYPE (vec_dest));
8369	tree narrow_vectype = signed_type_for (TREE_TYPE (vop[`0`]));
8370	tree narrow_elttype = TREE_TYPE (narrow_vectype);
8371	gimple *new_stmt;
8372
8373	/ Make VOP[0] the unsigned operand VOP[1] the signed operand. /
8374	if (!TYPE_UNSIGNED (TREE_TYPE (vop[`0`])))
8375	std::swap (a&: vop[`0`], b&: vop[`1`]);
8376
8377	/ Convert all inputs to signed types. /
8378	for (int i = `0`; i < `3`; ++i)
8379	if (TYPE_UNSIGNED (TREE_TYPE (vop[i])))
8380	{
8381	tree tmp = make_ssa_name (var: signed_type_for (TREE_TYPE (vop[i])));
8382	new_stmt = gimple_build_assign (tmp, NOP_EXPR, vop[i]);
8383	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8384	vop[i] = tmp;
8385	}
8386
8387	/ In the comments below we assume 8-bit inputs for simplicity,*
8388	but the approach works for any full integer type. /*
8389
8390	/ Create a vector of -128. /
8391	tree min_narrow_elttype = TYPE_MIN_VALUE (narrow_elttype);
8392	tree min_narrow = build_vector_from_val (narrow_vectype,
8393	min_narrow_elttype);
8394
8395	/ Create a vector of 64. /
8396	auto half_wi = wi::lrshift (x: wi::to_wide (t: min_narrow_elttype), y: `1`);
8397	tree half_narrow = wide_int_to_tree (type: narrow_elttype, cst: half_wi);
8398	half_narrow = build_vector_from_val (narrow_vectype, half_narrow);
8399
8400	/ Emit: SUB_RES = VOP[0] - 128. /
8401	tree sub_res = make_ssa_name (var: narrow_vectype);
8402	new_stmt = gimple_build_assign (sub_res, PLUS_EXPR, vop[`0`], min_narrow);
8403	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8404
8405	/ Emit:*
8406
8407	STAGE1 = DOT_PROD_EXPR <VOP[1], 64, VOP[2]>;
8408	STAGE2 = DOT_PROD_EXPR <VOP[1], 64, STAGE1>;
8409	STAGE3 = DOT_PROD_EXPR <SUB_RES, -128, STAGE2>;
8410
8411	on the basis that x y == (x - 128) * y + 64 * y + 64 * y*
8412	Doing the two 64 y steps first allows more time to compute x. /
8413	tree stage1 = make_ssa_name (var: wide_vectype);
8414	new_stmt = gimple_build_assign (stage1, DOT_PROD_EXPR,
8415	vop[`1`], half_narrow, vop[`2`]);
8416	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8417
8418	tree stage2 = make_ssa_name (var: wide_vectype);
8419	new_stmt = gimple_build_assign (stage2, DOT_PROD_EXPR,
8420	vop[`1`], half_narrow, stage1);
8421	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8422
8423	tree stage3 = make_ssa_name (var: wide_vectype);
8424	new_stmt = gimple_build_assign (stage3, DOT_PROD_EXPR,
8425	sub_res, vop[`1`], stage2);
8426	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8427
8428	/ Convert STAGE3 to the reduction type. /
8429	return gimple_build_assign (vec_dest, CONVERT_EXPR, stage3);
8430	}
8431
8432	/ Transform the definition stmt STMT_INFO of a reduction PHI backedge*
8433	value. /*
8434
8435	bool
8436	vect_transform_reduction (loop_vec_info loop_vinfo,
8437	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8438	gimple **vec_stmt, slp_tree slp_node)
8439	{
8440	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
8441	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8442	int i;
8443	int ncopies;
8444	int vec_num;
8445
8446	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
8447	gcc_assert (reduc_info->is_reduc_info);
8448
8449	if (nested_in_vect_loop_p (loop, stmt_info))
8450	{
8451	loop = loop->inner;
8452	gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
8453	}
8454
8455	gimple_match_op op;
8456	if (!gimple_extract_op (stmt_info->stmt, &op))
8457	gcc_unreachable ();
8458
8459	/ All uses but the last are expected to be defined in the loop.*
8460	The last use is the reduction variable. In case of nested cycle this
8461	assumption is not true: we use reduc_index to record the index of the
8462	reduction variable. /*
8463	stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
8464	gphi reduc_def_phi = as_a <gphi > (p: phi_info->stmt);
8465	int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
8466	tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
8467
8468	if (slp_node)
8469	{
8470	ncopies = `1`;
8471	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8472	}
8473	else
8474	{
8475	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
8476	vec_num = `1`;
8477	}
8478
8479	code_helper code = canonicalize_code (op.code, op.type);
8480	internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
8481
8482	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
8483	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
8484	bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
8485
8486	/ Transform. /
8487	tree new_temp = NULL_TREE;
8488	auto_vec<tree> vec_oprnds0;
8489	auto_vec<tree> vec_oprnds1;
8490	auto_vec<tree> vec_oprnds2;
8491	tree def0;
8492
8493	if (dump_enabled_p ())
8494	dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
8495
8496	/ FORNOW: Multiple types are not supported for condition. /
8497	if (code == COND_EXPR)
8498	gcc_assert (ncopies == `1`);
8499
8500	/ A binary COND_OP reduction must have the same definition and else*
8501	value. /*
8502	bool cond_fn_p = code.is_internal_fn ()
8503	&& conditional_internal_fn_code (internal_fn (code)) != ERROR_MARK;
8504	if (cond_fn_p)
8505	{
8506	gcc_assert (code == IFN_COND_ADD \|\| code == IFN_COND_SUB
8507	\|\| code == IFN_COND_MUL \|\| code == IFN_COND_AND
8508	\|\| code == IFN_COND_IOR \|\| code == IFN_COND_XOR);
8509	gcc_assert (op.num_ops == `4`
8510	&& (op.ops[reduc_index]
8511	== op.ops[internal_fn_else_index ((internal_fn) code)]));
8512	}
8513
8514	bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
8515
8516	vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
8517	if (reduction_type == FOLD_LEFT_REDUCTION)
8518	{
8519	internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
8520	gcc_assert (code.is_tree_code () \|\| cond_fn_p);
8521	return vectorize_fold_left_reduction
8522	(loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_stmt: reduc_def_phi,
8523	code, reduc_fn, ops: op.ops, num_ops: op.num_ops, vectype_in,
8524	reduc_index, masks, lens);
8525	}
8526
8527	bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
8528	gcc_assert (single_defuse_cycle
8529	\|\| code == DOT_PROD_EXPR
8530	\|\| code == WIDEN_SUM_EXPR
8531	\|\| code == SAD_EXPR);
8532
8533	/ Create the destination vector /
8534	tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8535	tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
8536
8537	/ Get NCOPIES vector definitions for all operands except the reduction*
8538	definition. /*
8539	if (!cond_fn_p)
8540	{
8541	vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
8542	single_defuse_cycle && reduc_index == `0`
8543	? NULL_TREE : op.ops[`0`], &vec_oprnds0,
8544	single_defuse_cycle && reduc_index == `1`
8545	? NULL_TREE : op.ops[`1`], &vec_oprnds1,
8546	op.num_ops == `3`
8547	&& !(single_defuse_cycle && reduc_index == `2`)
8548	? op.ops[`2`] : NULL_TREE, &vec_oprnds2);
8549	}
8550	else
8551	{
8552	/ For a conditional operation pass the truth type as mask*
8553	vectype. /*
8554	gcc_assert (single_defuse_cycle
8555	&& (reduc_index == `1` \|\| reduc_index == `2`));
8556	vect_get_vec_defs (loop_vinfo, stmt_info, slp_node, ncopies,
8557	op.ops[`0`], truth_type_for (vectype_in), &vec_oprnds0,
8558	reduc_index == `1` ? NULL_TREE : op.ops[`1`],
8559	NULL_TREE, &vec_oprnds1,
8560	reduc_index == `2` ? NULL_TREE : op.ops[`2`],
8561	NULL_TREE, &vec_oprnds2);
8562	}
8563
8564	/ For single def-use cycles get one copy of the vectorized reduction*
8565	definition. /*
8566	if (single_defuse_cycle)
8567	{
8568	gcc_assert (!slp_node);
8569	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
8570	op: op.ops[reduc_index],
8571	reduc_index == `0` ? &vec_oprnds0
8572	: (reduc_index == `1` ? &vec_oprnds1
8573	: &vec_oprnds2));
8574	}
8575
8576	bool emulated_mixed_dot_prod
8577	= vect_is_emulated_mixed_dot_prod (loop_vinfo, stmt_info);
8578	FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
8579	{
8580	gimple *new_stmt;
8581	tree vop[`3`] = { def0, vec_oprnds1 [i], NULL_TREE };
8582	if (masked_loop_p && !mask_by_cond_expr)
8583	{
8584	/ No conditional ifns have been defined for dot-product yet. /
8585	gcc_assert (code != DOT_PROD_EXPR);
8586
8587	/ Make sure that the reduction accumulator is vop[0]. /
8588	if (reduc_index == `1`)
8589	{
8590	gcc_assert (commutative_binary_op_p (code, op.type));
8591	std::swap (a&: vop[`0`], b&: vop[`1`]);
8592	}
8593	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
8594	vec_num * ncopies, vectype_in, i);
8595	gcall *call = gimple_build_call_internal (cond_fn, `4`, mask,
8596	vop[`0`], vop[`1`], vop[`0`]);
8597	new_temp = make_ssa_name (var: vec_dest, stmt: call);
8598	gimple_call_set_lhs (gs: call, lhs: new_temp);
8599	gimple_call_set_nothrow (s: call, nothrow_p: true);
8600	vect_finish_stmt_generation (loop_vinfo, stmt_info, call, gsi);
8601	new_stmt = call;
8602	}
8603	else
8604	{
8605	if (op.num_ops >= `3`)
8606	vop[`2`] = vec_oprnds2 [i];
8607
8608	if (masked_loop_p && mask_by_cond_expr)
8609	{
8610	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
8611	vec_num * ncopies, vectype_in, i);
8612	build_vect_cond_expr (code, vop, mask, gsi);
8613	}
8614
8615	if (emulated_mixed_dot_prod)
8616	new_stmt = vect_emulate_mixed_dot_prod (loop_vinfo, stmt_info, gsi,
8617	vec_dest, vop);
8618
8619	else if (code.is_internal_fn () && !cond_fn_p)
8620	new_stmt = gimple_build_call_internal (internal_fn (code),
8621	op.num_ops,
8622	vop[`0`], vop[`1`], vop[`2`]);
8623	else if (code.is_internal_fn () && cond_fn_p)
8624	new_stmt = gimple_build_call_internal (internal_fn (code),
8625	op.num_ops,
8626	vop[`0`], vop[`1`], vop[`2`],
8627	vop[`1`]);
8628	else
8629	new_stmt = gimple_build_assign (vec_dest, tree_code (op.code),
8630	vop[`0`], vop[`1`], vop[`2`]);
8631	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
8632	gimple_set_lhs (new_stmt, new_temp);
8633	vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, gsi);
8634	}
8635
8636	if (slp_node)
8637	slp_node->push_vec_def (def: new_stmt);
8638	else if (single_defuse_cycle
8639	&& i < ncopies - `1`)
8640	{
8641	if (reduc_index == `0`)
8642	vec_oprnds0.safe_push (obj: gimple_get_lhs (new_stmt));
8643	else if (reduc_index == `1`)
8644	vec_oprnds1.safe_push (obj: gimple_get_lhs (new_stmt));
8645	else if (reduc_index == `2`)
8646	vec_oprnds2.safe_push (obj: gimple_get_lhs (new_stmt));
8647	}
8648	else
8649	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8650	}
8651
8652	if (!slp_node)
8653	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
8654
8655	return true;
8656	}
8657
8658	/ Transform phase of a cycle PHI. /
8659
8660	bool
8661	vect_transform_cycle_phi (loop_vec_info loop_vinfo,
8662	stmt_vec_info stmt_info, gimple **vec_stmt,
8663	slp_tree slp_node, slp_instance slp_node_instance)
8664	{
8665	tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
8666	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8667	int i;
8668	int ncopies;
8669	int j;
8670	bool nested_cycle = false;
8671	int vec_num;
8672
8673	if (nested_in_vect_loop_p (loop, stmt_info))
8674	{
8675	loop = loop->inner;
8676	nested_cycle = true;
8677	}
8678
8679	stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
8680	reduc_stmt_info = vect_stmt_to_vectorize (stmt_info: reduc_stmt_info);
8681	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
8682	gcc_assert (reduc_info->is_reduc_info);
8683
8684	if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
8685	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
8686	/ Leave the scalar phi in place. /
8687	return true;
8688
8689	tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
8690	/ For a nested cycle we do not fill the above. /
8691	if (!vectype_in)
8692	vectype_in = STMT_VINFO_VECTYPE (stmt_info);
8693	gcc_assert (vectype_in);
8694
8695	if (slp_node)
8696	{
8697	/ The size vect_schedule_slp_instance computes is off for us. /
8698	vec_num = vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
8699	* SLP_TREE_LANES (slp_node), vectype: vectype_in);
8700	ncopies = `1`;
8701	}
8702	else
8703	{
8704	vec_num = `1`;
8705	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
8706	}
8707
8708	/ Check whether we should use a single PHI node and accumulate*
8709	vectors to one before the backedge. /*
8710	if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
8711	ncopies = `1`;
8712
8713	/ Create the destination vector /
8714	gphi phi = as_a <gphi > (p: stmt_info->stmt);
8715	tree vec_dest = vect_create_destination_var (gimple_phi_result (gs: phi),
8716	vectype_out);
8717
8718	/ Get the loop-entry arguments. /
8719	tree vec_initial_def = NULL_TREE;
8720	auto_vec<tree> vec_initial_defs;
8721	if (slp_node)
8722	{
8723	vec_initial_defs.reserve (nelems: vec_num);
8724	if (nested_cycle)
8725	{
8726	unsigned phi_idx = loop_preheader_edge (loop)->dest_idx;
8727	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[phi_idx],
8728	&vec_initial_defs);
8729	}
8730	else
8731	{
8732	gcc_assert (slp_node == slp_node_instance->reduc_phis);
8733	vec<tree> &initial_values = reduc_info->reduc_initial_values;
8734	vec<stmt_vec_info> &stmts = SLP_TREE_SCALAR_STMTS (slp_node);
8735
8736	unsigned int num_phis = stmts.length ();
8737	if (REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
8738	num_phis = `1`;
8739	initial_values.reserve (nelems: num_phis);
8740	for (unsigned int i = `0`; i < num_phis; ++i)
8741	{
8742	gphi this_phi = as_a<gphi > (p: stmts [i]->stmt);
8743	initial_values.quick_push (obj: vect_phi_initial_value (phi: this_phi));
8744	}
8745	if (vec_num == `1`)
8746	vect_find_reusable_accumulator (loop_vinfo, reduc_info);
8747	if (!initial_values.is_empty ())
8748	{
8749	tree initial_value
8750	= (num_phis == `1` ? initial_values [`0`] : NULL_TREE);
8751	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
8752	tree neutral_op
8753	= neutral_op_for_reduction (TREE_TYPE (vectype_out),
8754	code, initial_value);
8755	get_initial_defs_for_reduction (loop_vinfo, reduc_info,
8756	vec_oprnds: &vec_initial_defs, number_of_vectors: vec_num,
8757	group_size: stmts.length (), neutral_op);
8758	}
8759	}
8760	}
8761	else
8762	{
8763	/ Get at the scalar def before the loop, that defines the initial*
8764	value of the reduction variable. /*
8765	tree initial_def = vect_phi_initial_value (phi);
8766	reduc_info->reduc_initial_values.safe_push (obj: initial_def);
8767	/ Optimize: if initial_def is for REDUC_MAX smaller than the base*
8768	and we can't use zero for induc_val, use initial_def. Similarly
8769	for REDUC_MIN and initial_def larger than the base. /*
8770	if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
8771	{
8772	tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
8773	if (TREE_CODE (initial_def) == INTEGER_CST
8774	&& !integer_zerop (induc_val)
8775	&& ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
8776	&& tree_int_cst_lt (t1: initial_def, t2: induc_val))
8777	\|\| (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
8778	&& tree_int_cst_lt (t1: induc_val, t2: initial_def))))
8779	{
8780	induc_val = initial_def;
8781	/ Communicate we used the initial_def to epilouge*
8782	generation. /*
8783	STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
8784	}
8785	vec_initial_def = build_vector_from_val (vectype_out, induc_val);
8786	}
8787	else if (nested_cycle)
8788	{
8789	/ Do not use an adjustment def as that case is not supported*
8790	correctly if ncopies is not one. /*
8791	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, reduc_stmt_info,
8792	ncopies, op: initial_def,
8793	&vec_initial_defs);
8794	}
8795	else if (STMT_VINFO_REDUC_TYPE (reduc_info) == CONST_COND_REDUCTION
8796	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
8797	/ Fill the initial vector with the initial scalar value. /
8798	vec_initial_def
8799	= get_initial_def_for_reduction (loop_vinfo, reduc_info: reduc_stmt_info,
8800	init_val: initial_def, neutral_op: initial_def);
8801	else
8802	{
8803	if (ncopies == `1`)
8804	vect_find_reusable_accumulator (loop_vinfo, reduc_info);
8805	if (!reduc_info->reduc_initial_values.is_empty ())
8806	{
8807	initial_def = reduc_info->reduc_initial_values [`0`];
8808	code_helper code = STMT_VINFO_REDUC_CODE (reduc_info);
8809	tree neutral_op
8810	= neutral_op_for_reduction (TREE_TYPE (initial_def),
8811	code, initial_value: initial_def);
8812	gcc_assert (neutral_op);
8813	/ Try to simplify the vector initialization by applying an*
8814	adjustment after the reduction has been performed. /*
8815	if (!reduc_info->reused_accumulator
8816	&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
8817	&& !operand_equal_p (neutral_op, initial_def))
8818	{
8819	STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info)
8820	= initial_def;
8821	initial_def = neutral_op;
8822	}
8823	vec_initial_def
8824	= get_initial_def_for_reduction (loop_vinfo, reduc_info,
8825	init_val: initial_def, neutral_op);
8826	}
8827	}
8828	}
8829
8830	if (vec_initial_def)
8831	{
8832	vec_initial_defs.create (nelems: ncopies);
8833	for (i = `0`; i < ncopies; ++i)
8834	vec_initial_defs.quick_push (obj: vec_initial_def);
8835	}
8836
8837	if (auto *accumulator = reduc_info->reused_accumulator)
8838	{
8839	tree def = accumulator->reduc_input;
8840	if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
8841	{
8842	unsigned int nreduc;
8843	bool res = constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
8844	(TREE_TYPE (def)),
8845	b: TYPE_VECTOR_SUBPARTS (node: vectype_out),
8846	multiple: &nreduc);
8847	gcc_assert (res);
8848	gimple_seq stmts = NULL;
8849	/ Reduce the single vector to a smaller one. /
8850	if (nreduc != `1`)
8851	{
8852	/ Perform the reduction in the appropriate type. /
8853	tree rvectype = vectype_out;
8854	if (!useless_type_conversion_p (TREE_TYPE (vectype_out),
8855	TREE_TYPE (TREE_TYPE (def))))
8856	rvectype = build_vector_type (TREE_TYPE (TREE_TYPE (def)),
8857	TYPE_VECTOR_SUBPARTS
8858	(node: vectype_out));
8859	def = vect_create_partial_epilog (vec_def: def, vectype: rvectype,
8860	STMT_VINFO_REDUC_CODE
8861	(reduc_info),
8862	seq: &stmts);
8863	}
8864	/ The epilogue loop might use a different vector mode, like*
8865	VNx2DI vs. V2DI. /*
8866	if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def)))
8867	{
8868	tree reduc_type = build_vector_type_for_mode
8869	(TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out));
8870	def = gimple_convert (seq: &stmts, type: reduc_type, op: def);
8871	}
8872	/ Adjust the input so we pick up the partially reduced value*
8873	for the skip edge in vect_create_epilog_for_reduction. /*
8874	accumulator->reduc_input = def;
8875	/ And the reduction could be carried out using a different sign. /
8876	if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
8877	def = gimple_convert (seq: &stmts, type: vectype_out, op: def);
8878	if (loop_vinfo->main_loop_edge)
8879	{
8880	/ While we'd like to insert on the edge this will split*
8881	blocks and disturb bookkeeping, we also will eventually
8882	need this on the skip edge. Rely on sinking to
8883	fixup optimal placement and insert in the pred. /*
8884	gimple_stmt_iterator gsi
8885	= gsi_last_bb (bb: loop_vinfo->main_loop_edge->src);
8886	/ Insert before a cond that eventually skips the*
8887	epilogue. /*
8888	if (!gsi_end_p (i: gsi) && stmt_ends_bb_p (gsi_stmt (i: gsi)))
8889	gsi_prev (i: &gsi);
8890	gsi_insert_seq_after (&gsi, stmts, GSI_CONTINUE_LINKING);
8891	}
8892	else
8893	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
8894	stmts);
8895	}
8896	if (loop_vinfo->main_loop_edge)
8897	vec_initial_defs [`0`]
8898	= vect_get_main_loop_result (loop_vinfo, def,
8899	vec_initial_defs [`0`]);
8900	else
8901	vec_initial_defs.safe_push (obj: def);
8902	}
8903
8904	/ Generate the reduction PHIs upfront. /
8905	for (i = `0`; i < vec_num; i++)
8906	{
8907	tree vec_init_def = vec_initial_defs [i];
8908	for (j = `0`; j < ncopies; j++)
8909	{
8910	/ Create the reduction-phi that defines the reduction*
8911	operand. /*
8912	gphi *new_phi = create_phi_node (vec_dest, loop->header);
8913
8914	/ Set the loop-entry arg of the reduction-phi. /
8915	if (j != `0` && nested_cycle)
8916	vec_init_def = vec_initial_defs [j];
8917	add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
8918	UNKNOWN_LOCATION);
8919
8920	/ The loop-latch arg is set in epilogue processing. /
8921
8922	if (slp_node)
8923	slp_node->push_vec_def (def: new_phi);
8924	else
8925	{
8926	if (j == `0`)
8927	*vec_stmt = new_phi;
8928	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_phi);
8929	}
8930	}
8931	}
8932
8933	return true;
8934	}
8935
8936	/ Vectorizes LC PHIs. /
8937
8938	bool
8939	vectorizable_lc_phi (loop_vec_info loop_vinfo,
8940	stmt_vec_info stmt_info, gimple **vec_stmt,
8941	slp_tree slp_node)
8942	{
8943	if (!loop_vinfo
8944	\|\| !is_a <gphi *> (p: stmt_info->stmt)
8945	\|\| gimple_phi_num_args (gs: stmt_info->stmt) != `1`)
8946	return false;
8947
8948	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8949	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
8950	return false;
8951
8952	if (!vec_stmt) / transformation not required. /
8953	{
8954	/ Deal with copies from externs or constants that disguise as*
8955	loop-closed PHI nodes (PR97886). /*
8956	if (slp_node
8957	&& !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[`0`],
8958	SLP_TREE_VECTYPE (slp_node)))
8959	{
8960	if (dump_enabled_p ())
8961	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8962	"incompatible vector types for invariants\n");
8963	return false;
8964	}
8965	STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
8966	return true;
8967	}
8968
8969	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8970	tree scalar_dest = gimple_phi_result (gs: stmt_info->stmt);
8971	basic_block bb = gimple_bb (g: stmt_info->stmt);
8972	edge e = single_pred_edge (bb);
8973	tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
8974	auto_vec<tree> vec_oprnds;
8975	vect_get_vec_defs (loop_vinfo, stmt_info, slp_node,
8976	!slp_node ? vect_get_num_copies (loop_vinfo, vectype) : `1`,
8977	gimple_phi_arg_def (gs: stmt_info->stmt, index: `0`), &vec_oprnds);
8978	for (unsigned i = `0`; i < vec_oprnds.length (); i++)
8979	{
8980	/ Create the vectorized LC PHI node. /
8981	gphi *new_phi = create_phi_node (vec_dest, bb);
8982	add_phi_arg (new_phi, vec_oprnds [i], e, UNKNOWN_LOCATION);
8983	if (slp_node)
8984	slp_node->push_vec_def (def: new_phi);
8985	else
8986	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_phi);
8987	}
8988	if (!slp_node)
8989	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
8990
8991	return true;
8992	}
8993
8994	/ Vectorizes PHIs. /
8995
8996	bool
8997	vectorizable_phi (vec_info *,
8998	stmt_vec_info stmt_info, gimple **vec_stmt,
8999	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9000	{
9001	if (!is_a <gphi *> (p: stmt_info->stmt) \|\| !slp_node)
9002	return false;
9003
9004	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9005	return false;
9006
9007	tree vectype = SLP_TREE_VECTYPE (slp_node);
9008
9009	if (!vec_stmt) / transformation not required. /
9010	{
9011	slp_tree child;
9012	unsigned i;
9013	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), i, child)
9014	if (!child)
9015	{
9016	if (dump_enabled_p ())
9017	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9018	"PHI node with unvectorized backedge def\n");
9019	return false;
9020	}
9021	else if (!vect_maybe_update_slp_op_vectype (child, vectype))
9022	{
9023	if (dump_enabled_p ())
9024	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9025	"incompatible vector types for invariants\n");
9026	return false;
9027	}
9028	else if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
9029	&& !useless_type_conversion_p (vectype,
9030	SLP_TREE_VECTYPE (child)))
9031	{
9032	/ With bools we can have mask and non-mask precision vectors*
9033	or different non-mask precisions. while pattern recog is
9034	supposed to guarantee consistency here bugs in it can cause
9035	mismatches (PR103489 and PR103800 for example).
9036	Deal with them here instead of ICEing later. /*
9037	if (dump_enabled_p ())
9038	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9039	"incompatible vector type setup from "
9040	"bool pattern detection\n");
9041	return false;
9042	}
9043
9044	/ For single-argument PHIs assume coalescing which means zero cost*
9045	for the scalar and the vector PHIs. This avoids artificially
9046	favoring the vector path (but may pessimize it in some cases). /*
9047	if (gimple_phi_num_args (gs: as_a <gphi *> (p: stmt_info->stmt)) > `1`)
9048	record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
9049	vector_stmt, stmt_info, vectype, `0`, vect_body);
9050	STMT_VINFO_TYPE (stmt_info) = phi_info_type;
9051	return true;
9052	}
9053
9054	tree scalar_dest = gimple_phi_result (gs: stmt_info->stmt);
9055	basic_block bb = gimple_bb (g: stmt_info->stmt);
9056	tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
9057	auto_vec<gphi *> new_phis;
9058	for (unsigned i = `0`; i < gimple_phi_num_args (gs: stmt_info->stmt); ++i)
9059	{
9060	slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
9061
9062	/ Skip not yet vectorized defs. /
9063	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def
9064	&& SLP_TREE_VEC_DEFS (child).is_empty ())
9065	continue;
9066
9067	auto_vec<tree> vec_oprnds;
9068	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[i], &vec_oprnds);
9069	if (!new_phis.exists ())
9070	{
9071	new_phis.create (nelems: vec_oprnds.length ());
9072	for (unsigned j = `0`; j < vec_oprnds.length (); j++)
9073	{
9074	/ Create the vectorized LC PHI node. /
9075	new_phis.quick_push (obj: create_phi_node (vec_dest, bb));
9076	slp_node->push_vec_def (def: new_phis [j]);
9077	}
9078	}
9079	edge e = gimple_phi_arg_edge (phi: as_a <gphi *> (p: stmt_info->stmt), i);
9080	for (unsigned j = `0`; j < vec_oprnds.length (); j++)
9081	add_phi_arg (new_phis [j], vec_oprnds [j], e, UNKNOWN_LOCATION);
9082	}
9083	/ We should have at least one already vectorized child. /
9084	gcc_assert (new_phis.exists ());
9085
9086	return true;
9087	}
9088
9089	/ Vectorizes first order recurrences. An overview of the transformation*
9090	is described below. Suppose we have the following loop.
9091
9092	int t = 0;
9093	for (int i = 0; i < n; ++i)
9094	{
9095	b[i] = a[i] - t;
9096	t = a[i];
9097	}
9098
9099	There is a first-order recurrence on 'a'. For this loop, the scalar IR
9100	looks (simplified) like:
9101
9102	scalar.preheader:
9103	init = 0;
9104
9105	scalar.body:
9106	i = PHI <0(scalar.preheader), i+1(scalar.body)>
9107	_2 = PHI <(init(scalar.preheader), <_1(scalar.body)>
9108	_1 = a[i]
9109	b[i] = _1 - _2
9110	if (i < n) goto scalar.body
9111
9112	In this example, _2 is a recurrence because it's value depends on the
9113	previous iteration. We vectorize this as (VF = 4)
9114
9115	vector.preheader:
9116	vect_init = vect_cst(..., ..., ..., 0)
9117
9118	vector.body
9119	i = PHI <0(vector.preheader), i+4(vector.body)>
9120	vect_1 = PHI <vect_init(vector.preheader), v2(vector.body)>
9121	vect_2 = a[i, i+1, i+2, i+3];
9122	vect_3 = vec_perm (vect_1, vect_2, { 3, 4, 5, 6 })
9123	b[i, i+1, i+2, i+3] = vect_2 - vect_3
9124	if (..) goto vector.body
9125
9126	In this function, vectorizable_recurr, we code generate both the
9127	vector PHI node and the permute since those together compute the
9128	vectorized value of the scalar PHI. We do not yet have the
9129	backedge value to fill in there nor into the vec_perm. Those
9130	are filled in maybe_set_vectorized_backedge_value and
9131	vect_schedule_scc.
9132
9133	TODO: Since the scalar loop does not have a use of the recurrence
9134	outside of the loop the natural way to implement peeling via
9135	vectorizing the live value doesn't work. For now peeling of loops
9136	with a recurrence is not implemented. For SLP the supported cases
9137	are restricted to those requiring a single vector recurrence PHI. /*
9138
9139	bool
9140	vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
9141	gimple **vec_stmt, slp_tree slp_node,
9142	stmt_vector_for_cost *cost_vec)
9143	{
9144	if (!loop_vinfo \|\| !is_a<gphi *> (p: stmt_info->stmt))
9145	return false;
9146
9147	gphi phi = as_a<gphi > (p: stmt_info->stmt);
9148
9149	/ So far we only support first-order recurrence auto-vectorization. /
9150	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
9151	return false;
9152
9153	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9154	unsigned ncopies;
9155	if (slp_node)
9156	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9157	else
9158	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9159	poly_int64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9160	unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : `1`;
9161	/ We need to be able to make progress with a single vector. /
9162	if (maybe_gt (dist * `2`, nunits))
9163	{
9164	if (dump_enabled_p ())
9165	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9166	"first order recurrence exceeds half of "
9167	"a vector\n");
9168	return false;
9169	}
9170
9171	/ First-order recurrence autovectorization needs to handle permutation*
9172	with indices = [nunits-1, nunits, nunits+1, ...]. /*
9173	vec_perm_builder sel (nunits, `1`, `3`);
9174	for (int i = `0`; i < `3`; ++i)
9175	sel.quick_push (obj: nunits - dist + i);
9176	vec_perm_indices indices (sel, `2`, nunits);
9177
9178	if (!vec_stmt) / transformation not required. /
9179	{
9180	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
9181	indices))
9182	return false;
9183
9184	if (slp_node)
9185	{
9186	/ We eventually need to set a vector type on invariant*
9187	arguments. /*
9188	unsigned j;
9189	slp_tree child;
9190	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
9191	if (!vect_maybe_update_slp_op_vectype
9192	(child, SLP_TREE_VECTYPE (slp_node)))
9193	{
9194	if (dump_enabled_p ())
9195	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9196	"incompatible vector types for "
9197	"invariants\n");
9198	return false;
9199	}
9200	}
9201
9202	/ Verify we have set up compatible types. /
9203	edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
9204	tree latch_vectype = NULL_TREE;
9205	if (slp_node)
9206	{
9207	slp_tree latch_def = SLP_TREE_CHILDREN (slp_node)[le->dest_idx];
9208	latch_vectype = SLP_TREE_VECTYPE (latch_def);
9209	}
9210	else
9211	{
9212	tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, le);
9213	if (TREE_CODE (latch_def) == SSA_NAME)
9214	{
9215	stmt_vec_info latch_def_info = loop_vinfo->lookup_def (latch_def);
9216	latch_def_info = vect_stmt_to_vectorize (stmt_info: latch_def_info);
9217	latch_vectype = STMT_VINFO_VECTYPE (latch_def_info);
9218	}
9219	}
9220	if (!types_compatible_p (type1: latch_vectype, type2: vectype))
9221	return false;
9222
9223	/ The recurrence costs the initialization vector and one permute*
9224	for each copy. /*
9225	unsigned prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
9226	stmt_info, misalign: `0`, where: vect_prologue);
9227	unsigned inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
9228	stmt_info, misalign: `0`, where: vect_body);
9229	if (dump_enabled_p ())
9230	dump_printf_loc (MSG_NOTE, vect_location,
9231	"vectorizable_recurr: inside_cost = %d, "
9232	"prologue_cost = %d .\n", inside_cost,
9233	prologue_cost);
9234
9235	STMT_VINFO_TYPE (stmt_info) = recurr_info_type;
9236	return true;
9237	}
9238
9239	edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
9240	basic_block bb = gimple_bb (g: phi);
9241	tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
9242	if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (preheader)))
9243	{
9244	gimple_seq stmts = NULL;
9245	preheader = gimple_convert (seq: &stmts, TREE_TYPE (vectype), op: preheader);
9246	gsi_insert_seq_on_edge_immediate (pe, stmts);
9247	}
9248	tree vec_init = build_vector_from_val (vectype, preheader);
9249	vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
9250
9251	/ Create the vectorized first-order PHI node. /
9252	tree vec_dest = vect_get_new_vect_var (vectype,
9253	vect_simple_var, "vec_recur_");
9254	gphi *new_phi = create_phi_node (vec_dest, bb);
9255	add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);
9256
9257	/ Insert shuffles the first-order recurrence autovectorization.*
9258	result = VEC_PERM <vec_recur, vect_1, index[nunits-1, nunits, ...]>. /*
9259	tree perm = vect_gen_perm_mask_checked (vectype, indices);
9260
9261	/ Insert the required permute after the latch definition. The*
9262	second and later operands are tentative and will be updated when we have
9263	vectorized the latch definition. /*
9264	edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
9265	gimple *latch_def = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, le));
9266	gimple_stmt_iterator gsi2 = gsi_for_stmt (latch_def);
9267	gsi_next (i: &gsi2);
9268
9269	for (unsigned i = `0`; i < ncopies; ++i)
9270	{
9271	vec_dest = make_ssa_name (var: vectype);
9272	gassign *vperm
9273	= gimple_build_assign (vec_dest, VEC_PERM_EXPR,
9274	i == `0` ? gimple_phi_result (gs: new_phi) : NULL,
9275	NULL, perm);
9276	vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2);
9277
9278	if (slp_node)
9279	slp_node->push_vec_def (def: vperm);
9280	else
9281	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: vperm);
9282	}
9283
9284	if (!slp_node)
9285	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
9286	return true;
9287	}
9288
9289	/ Return true if VECTYPE represents a vector that requires lowering*
9290	by the vector lowering pass. /*
9291
9292	bool
9293	vect_emulated_vector_p (tree vectype)
9294	{
9295	return (!VECTOR_MODE_P (TYPE_MODE (vectype))
9296	&& (!VECTOR_BOOLEAN_TYPE_P (vectype)
9297	\|\| TYPE_PRECISION (TREE_TYPE (vectype)) != `1`));
9298	}
9299
9300	/ Return true if we can emulate CODE on an integer mode representation*
9301	of a vector. /*
9302
9303	bool
9304	vect_can_vectorize_without_simd_p (tree_code code)
9305	{
9306	switch (code)
9307	{
9308	case PLUS_EXPR:
9309	case MINUS_EXPR:
9310	case NEGATE_EXPR:
9311	case BIT_AND_EXPR:
9312	case BIT_IOR_EXPR:
9313	case BIT_XOR_EXPR:
9314	case BIT_NOT_EXPR:
9315	return true;
9316
9317	default:
9318	return false;
9319	}
9320	}
9321
9322	/ Likewise, but taking a code_helper. /
9323
9324	bool
9325	vect_can_vectorize_without_simd_p (code_helper code)
9326	{
9327	return (code.is_tree_code ()
9328	&& vect_can_vectorize_without_simd_p (code: tree_code (code)));
9329	}
9330
9331	/ Create vector init for vectorized iv. /
9332	static tree
9333	vect_create_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
9334	tree step_expr, poly_uint64 nunits,
9335	tree vectype,
9336	enum vect_induction_op_type induction_type)
9337	{
9338	unsigned HOST_WIDE_INT const_nunits;
9339	tree vec_shift, vec_init, new_name;
9340	unsigned i;
9341	tree itype = TREE_TYPE (vectype);
9342
9343	/ iv_loop is the loop to be vectorized. Create:*
9344	vec_init = [X, X+S, X+2S, X+3S] (S = step_expr, X = init_expr). /*
9345	new_name = gimple_convert (seq: stmts, type: itype, op: init_expr);
9346	switch (induction_type)
9347	{
9348	case vect_step_op_shr:
9349	case vect_step_op_shl:
9350	/ Build the Initial value from shift_expr. /
9351	vec_init = gimple_build_vector_from_val (seq: stmts,
9352	type: vectype,
9353	op: new_name);
9354	vec_shift = gimple_build (seq: stmts, code: VEC_SERIES_EXPR, type: vectype,
9355	ops: build_zero_cst (itype), ops: step_expr);
9356	vec_init = gimple_build (seq: stmts,
9357	code: (induction_type == vect_step_op_shr
9358	? RSHIFT_EXPR : LSHIFT_EXPR),
9359	type: vectype, ops: vec_init, ops: vec_shift);
9360	break;
9361
9362	case vect_step_op_neg:
9363	{
9364	vec_init = gimple_build_vector_from_val (seq: stmts,
9365	type: vectype,
9366	op: new_name);
9367	tree vec_neg = gimple_build (seq: stmts, code: NEGATE_EXPR,
9368	type: vectype, ops: vec_init);
9369	/ The encoding has 2 interleaved stepped patterns. /
9370	vec_perm_builder sel (nunits, `2`, `3`);
9371	sel.quick_grow (len: `6`);
9372	for (i = `0`; i < `3`; i++)
9373	{
9374	sel [`2` * i] = i;
9375	sel [`2` * i + `1`] = i + nunits;
9376	}
9377	vec_perm_indices indices (sel, `2`, nunits);
9378	/ Don't use vect_gen_perm_mask_checked since can_vec_perm_const_p may*
9379	fail when vec_init is const vector. In that situation vec_perm is not
9380	really needed. /*
9381	tree perm_mask_even
9382	= vect_gen_perm_mask_any (vectype, indices);
9383	vec_init = gimple_build (seq: stmts, code: VEC_PERM_EXPR,
9384	type: vectype,
9385	ops: vec_init, ops: vec_neg,
9386	ops: perm_mask_even);
9387	}
9388	break;
9389
9390	case vect_step_op_mul:
9391	{
9392	/ Use unsigned mult to avoid UD integer overflow. /
9393	gcc_assert (nunits.is_constant (&const_nunits));
9394	tree utype = unsigned_type_for (itype);
9395	tree uvectype = build_vector_type (utype,
9396	TYPE_VECTOR_SUBPARTS (node: vectype));
9397	new_name = gimple_convert (seq: stmts, type: utype, op: new_name);
9398	vec_init = gimple_build_vector_from_val (seq: stmts,
9399	type: uvectype,
9400	op: new_name);
9401	tree_vector_builder elts (uvectype, const_nunits, `1`);
9402	tree elt_step = build_one_cst (utype);
9403
9404	elts.quick_push (obj: elt_step);
9405	for (i = `1`; i < const_nunits; i++)
9406	{
9407	/ Create: new_name_i = new_name + step_expr. /
9408	elt_step = gimple_build (seq: stmts, code: MULT_EXPR,
9409	type: utype, ops: elt_step, ops: step_expr);
9410	elts.quick_push (obj: elt_step);
9411	}
9412	/ Create a vector from [new_name_0, new_name_1, ...,*
9413	new_name_nunits-1]. /*
9414	tree vec_mul = gimple_build_vector (seq: stmts, builder: &elts);
9415	vec_init = gimple_build (seq: stmts, code: MULT_EXPR, type: uvectype,
9416	ops: vec_init, ops: vec_mul);
9417	vec_init = gimple_convert (seq: stmts, type: vectype, op: vec_init);
9418	}
9419	break;
9420
9421	default:
9422	gcc_unreachable ();
9423	}
9424
9425	return vec_init;
9426	}
9427
9428	/ Peel init_expr by skip_niter for induction_type. /
9429	tree
9430	vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
9431	tree skip_niters, tree step_expr,
9432	enum vect_induction_op_type induction_type)
9433	{
9434	gcc_assert (TREE_CODE (skip_niters) == INTEGER_CST);
9435	tree type = TREE_TYPE (init_expr);
9436	unsigned prec = TYPE_PRECISION (type);
9437	switch (induction_type)
9438	{
9439	case vect_step_op_neg:
9440	if (TREE_INT_CST_LOW (skip_niters) % `2`)
9441	init_expr = gimple_build (seq: stmts, code: NEGATE_EXPR, type, ops: init_expr);
9442	/ else no change. /
9443	break;
9444
9445	case vect_step_op_shr:
9446	case vect_step_op_shl:
9447	skip_niters = gimple_convert (seq: stmts, type, op: skip_niters);
9448	step_expr = gimple_build (seq: stmts, code: MULT_EXPR, type, ops: step_expr, ops: skip_niters);
9449	/ When shift mount >= precision, need to avoid UD.*
9450	In the original loop, there's no UD, and according to semantic,
9451	init_expr should be 0 for lshr, ashl, and >>= (prec - 1) for ashr. /*
9452	if (!tree_fits_uhwi_p (step_expr)
9453	\|\| tree_to_uhwi (step_expr) >= prec)
9454	{
9455	if (induction_type == vect_step_op_shl
9456	\|\| TYPE_UNSIGNED (type))
9457	init_expr = build_zero_cst (type);
9458	else
9459	init_expr = gimple_build (seq: stmts, code: RSHIFT_EXPR, type,
9460	ops: init_expr,
9461	ops: wide_int_to_tree (type, cst: prec - `1`));
9462	}
9463	else
9464	init_expr = gimple_build (seq: stmts, code: (induction_type == vect_step_op_shr
9465	? RSHIFT_EXPR : LSHIFT_EXPR),
9466	type, ops: init_expr, ops: step_expr);
9467	break;
9468
9469	case vect_step_op_mul:
9470	{
9471	tree utype = unsigned_type_for (type);
9472	init_expr = gimple_convert (seq: stmts, type: utype, op: init_expr);
9473	wide_int skipn = wi::to_wide (t: skip_niters);
9474	wide_int begin = wi::to_wide (t: step_expr);
9475	auto_mpz base, exp, mod, res;
9476	wi::to_mpz (begin, base, TYPE_SIGN (type));
9477	wi::to_mpz (skipn, exp, UNSIGNED);
9478	mpz_ui_pow_ui (mod, `2`, TYPE_PRECISION (type));
9479	mpz_powm (res, base, exp, mod);
9480	begin = wi::from_mpz (utype, res, true);
9481	tree mult_expr = wide_int_to_tree (type: utype, cst: begin);
9482	init_expr = gimple_build (seq: stmts, code: MULT_EXPR, type: utype,
9483	ops: init_expr, ops: mult_expr);
9484	init_expr = gimple_convert (seq: stmts, type, op: init_expr);
9485	}
9486	break;
9487
9488	default:
9489	gcc_unreachable ();
9490	}
9491
9492	return init_expr;
9493	}
9494
9495	/ Create vector step for vectorized iv. /
9496	static tree
9497	vect_create_nonlinear_iv_step (gimple_seq* stmts, tree step_expr,
9498	poly_uint64 vf,
9499	enum vect_induction_op_type induction_type)
9500	{
9501	tree expr = build_int_cst (TREE_TYPE (step_expr), vf);
9502	tree new_name = NULL;
9503	/ Step should be pow (step, vf) for mult induction. /
9504	if (induction_type == vect_step_op_mul)
9505	{
9506	gcc_assert (vf.is_constant ());
9507	wide_int begin = wi::to_wide (t: step_expr);
9508
9509	for (unsigned i = `0`; i != vf.to_constant () - `1`; i++)
9510	begin = wi::mul (x: begin, y: wi::to_wide (t: step_expr));
9511
9512	new_name = wide_int_to_tree (TREE_TYPE (step_expr), cst: begin);
9513	}
9514	else if (induction_type == vect_step_op_neg)
9515	/ Do nothing. /
9516	;
9517	else
9518	new_name = gimple_build (seq: stmts, code: MULT_EXPR, TREE_TYPE (step_expr),
9519	ops: expr, ops: step_expr);
9520	return new_name;
9521	}
9522
9523	static tree
9524	vect_create_nonlinear_iv_vec_step (loop_vec_info loop_vinfo,
9525	stmt_vec_info stmt_info,
9526	tree new_name, tree vectype,
9527	enum vect_induction_op_type induction_type)
9528	{
9529	/ No step is needed for neg induction. /
9530	if (induction_type == vect_step_op_neg)
9531	return NULL;
9532
9533	tree t = unshare_expr (new_name);
9534	gcc_assert (CONSTANT_CLASS_P (new_name)
9535	\|\| TREE_CODE (new_name) == SSA_NAME);
9536	tree new_vec = build_vector_from_val (vectype, t);
9537	tree vec_step = vect_init_vector (loop_vinfo, stmt_info,
9538	new_vec, vectype, NULL);
9539	return vec_step;
9540	}
9541
9542	/ Update vectorized iv with vect_step, induc_def is init. /
9543	static tree
9544	vect_update_nonlinear_iv (gimple_seq* stmts, tree vectype,
9545	tree induc_def, tree vec_step,
9546	enum vect_induction_op_type induction_type)
9547	{
9548	tree vec_def = induc_def;
9549	switch (induction_type)
9550	{
9551	case vect_step_op_mul:
9552	{
9553	/ Use unsigned mult to avoid UD integer overflow. /
9554	tree uvectype
9555	= build_vector_type (unsigned_type_for (TREE_TYPE (vectype)),
9556	TYPE_VECTOR_SUBPARTS (node: vectype));
9557	vec_def = gimple_convert (seq: stmts, type: uvectype, op: vec_def);
9558	vec_step = gimple_convert (seq: stmts, type: uvectype, op: vec_step);
9559	vec_def = gimple_build (seq: stmts, code: MULT_EXPR, type: uvectype,
9560	ops: vec_def, ops: vec_step);
9561	vec_def = gimple_convert (seq: stmts, type: vectype, op: vec_def);
9562	}
9563	break;
9564
9565	case vect_step_op_shr:
9566	vec_def = gimple_build (seq: stmts, code: RSHIFT_EXPR, type: vectype,
9567	ops: vec_def, ops: vec_step);
9568	break;
9569
9570	case vect_step_op_shl:
9571	vec_def = gimple_build (seq: stmts, code: LSHIFT_EXPR, type: vectype,
9572	ops: vec_def, ops: vec_step);
9573	break;
9574	case vect_step_op_neg:
9575	vec_def = induc_def;
9576	/ Do nothing. /
9577	break;
9578	default:
9579	gcc_unreachable ();
9580	}
9581
9582	return vec_def;
9583
9584	}
9585
9586	/ Function vectorizable_induction*
9587
9588	Check if STMT_INFO performs an nonlinear induction computation that can be
9589	vectorized. If VEC_STMT is also passed, vectorize the induction PHI: create
9590	a vectorized phi to replace it, put it in VEC_STMT, and add it to the same
9591	basic block.
9592	Return true if STMT_INFO is vectorizable in this way. /*
9593
9594	static bool
9595	vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
9596	stmt_vec_info stmt_info,
9597	gimple **vec_stmt, slp_tree slp_node,
9598	stmt_vector_for_cost *cost_vec)
9599	{
9600	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
9601	unsigned ncopies;
9602	bool nested_in_vect_loop = false;
9603	class loop *iv_loop;
9604	tree vec_def;
9605	edge pe = loop_preheader_edge (loop);
9606	basic_block new_bb;
9607	tree vec_init, vec_step;
9608	tree new_name;
9609	gimple *new_stmt;
9610	gphi *induction_phi;
9611	tree induc_def, vec_dest;
9612	tree init_expr, step_expr;
9613	tree niters_skip;
9614	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9615	unsigned i;
9616	gimple_stmt_iterator si;
9617
9618	gphi phi = dyn_cast <gphi > (p: stmt_info->stmt);
9619
9620	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9621	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9622	enum vect_induction_op_type induction_type
9623	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
9624
9625	gcc_assert (induction_type > vect_step_op_add);
9626
9627	if (slp_node)
9628	ncopies = `1`;
9629	else
9630	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9631	gcc_assert (ncopies >= `1`);
9632
9633	/ FORNOW. Only handle nonlinear induction in the same loop. /
9634	if (nested_in_vect_loop_p (loop, stmt_info))
9635	{
9636	if (dump_enabled_p ())
9637	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9638	"nonlinear induction in nested loop.\n");
9639	return false;
9640	}
9641
9642	iv_loop = loop;
9643	gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
9644
9645	/ TODO: Support slp for nonlinear iv. There should be separate vector iv*
9646	update for each iv and a permutation to generate wanted vector iv. /*
9647	if (slp_node)
9648	{
9649	if (dump_enabled_p ())
9650	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9651	"SLP induction not supported for nonlinear"
9652	" induction.\n");
9653	return false;
9654	}
9655
9656	if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
9657	{
9658	if (dump_enabled_p ())
9659	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9660	"floating point nonlinear induction vectorization"
9661	" not supported.\n");
9662	return false;
9663	}
9664
9665	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
9666	init_expr = vect_phi_initial_value (phi);
9667	gcc_assert (step_expr != NULL_TREE && init_expr != NULL
9668	&& TREE_CODE (step_expr) == INTEGER_CST);
9669	/ step_expr should be aligned with init_expr,*
9670	.i.e. uint64 a >> 1, step is int, but vector<uint64> shift is used. /*
9671	step_expr = fold_convert (TREE_TYPE (vectype), step_expr);
9672
9673	if (TREE_CODE (init_expr) == INTEGER_CST)
9674	init_expr = fold_convert (TREE_TYPE (vectype), init_expr);
9675	else if (!tree_nop_conversion_p (TREE_TYPE (vectype), TREE_TYPE (init_expr)))
9676	{
9677	/ INIT_EXPR could be a bit_field, bail out for such case. /
9678	if (dump_enabled_p ())
9679	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9680	"nonlinear induction vectorization failed:"
9681	" component type of vectype is not a nop conversion"
9682	" from type of init_expr.\n");
9683	return false;
9684	}
9685
9686	switch (induction_type)
9687	{
9688	case vect_step_op_neg:
9689	if (maybe_eq (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: `1u`))
9690	return false;
9691	if (TREE_CODE (init_expr) != INTEGER_CST
9692	&& TREE_CODE (init_expr) != REAL_CST)
9693	{
9694	/ Check for backend support of NEGATE_EXPR and vec_perm. /
9695	if (!directly_supported_p (NEGATE_EXPR, vectype))
9696	return false;
9697
9698	/ The encoding has 2 interleaved stepped patterns. /
9699	vec_perm_builder sel (nunits, `2`, `3`);
9700	machine_mode mode = TYPE_MODE (vectype);
9701	sel.quick_grow (len: `6`);
9702	for (i = `0`; i < `3`; i++)
9703	{
9704	sel [i * `2`] = i;
9705	sel [i * `2` + `1`] = i + nunits;
9706	}
9707	vec_perm_indices indices (sel, `2`, nunits);
9708	if (!can_vec_perm_const_p (mode, mode, indices))
9709	return false;
9710	}
9711	break;
9712
9713	case vect_step_op_mul:
9714	{
9715	/ Check for backend support of MULT_EXPR. /
9716	if (!directly_supported_p (MULT_EXPR, vectype))
9717	return false;
9718
9719	/ ?? How to construct vector step for variable number vector.*
9720	[ 1, step, pow (step, 2), pow (step, 4), .. ]. /*
9721	if (!vf.is_constant ())
9722	return false;
9723	}
9724	break;
9725
9726	case vect_step_op_shr:
9727	/ Check for backend support of RSHIFT_EXPR. /
9728	if (!directly_supported_p (RSHIFT_EXPR, vectype, optab_vector))
9729	return false;
9730
9731	/ Don't shift more than type precision to avoid UD. /
9732	if (!tree_fits_uhwi_p (step_expr)
9733	\|\| maybe_ge (nunits * tree_to_uhwi (step_expr),
9734	TYPE_PRECISION (TREE_TYPE (init_expr))))
9735	return false;
9736	break;
9737
9738	case vect_step_op_shl:
9739	/ Check for backend support of RSHIFT_EXPR. /
9740	if (!directly_supported_p (LSHIFT_EXPR, vectype, optab_vector))
9741	return false;
9742
9743	/ Don't shift more than type precision to avoid UD. /
9744	if (!tree_fits_uhwi_p (step_expr)
9745	\|\| maybe_ge (nunits * tree_to_uhwi (step_expr),
9746	TYPE_PRECISION (TREE_TYPE (init_expr))))
9747	return false;
9748
9749	break;
9750
9751	default:
9752	gcc_unreachable ();
9753	}
9754
9755	if (!vec_stmt) / transformation not required. /
9756	{
9757	unsigned inside_cost = `0`, prologue_cost = `0`;
9758	/ loop cost for vec_loop. Neg induction doesn't have any*
9759	inside_cost. /*
9760	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
9761	stmt_info, misalign: `0`, where: vect_body);
9762
9763	/ loop cost for vec_loop. Neg induction doesn't have any*
9764	inside_cost. /*
9765	if (induction_type == vect_step_op_neg)
9766	inside_cost = `0`;
9767
9768	/ prologue cost for vec_init and vec_step. /
9769	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_to_vec,
9770	stmt_info, misalign: `0`, where: vect_prologue);
9771
9772	if (dump_enabled_p ())
9773	dump_printf_loc (MSG_NOTE, vect_location,
9774	"vect_model_induction_cost: inside_cost = %d, "
9775	"prologue_cost = %d. \n", inside_cost,
9776	prologue_cost);
9777
9778	STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
9779	DUMP_VECT_SCOPE ("vectorizable_nonlinear_induction");
9780	return true;
9781	}
9782
9783	/ Transform. /
9784
9785	/ Compute a vector variable, initialized with the first VF values of*
9786	the induction variable. E.g., for an iv with IV_PHI='X' and
9787	evolution S, for a vector of 4 units, we want to compute:
9788	[X, X + S, X + 2S, X + 3S]. /*
9789
9790	if (dump_enabled_p ())
9791	dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
9792
9793	pe = loop_preheader_edge (iv_loop);
9794	/ Find the first insertion point in the BB. /
9795	basic_block bb = gimple_bb (g: phi);
9796	si = gsi_after_labels (bb);
9797
9798	gimple_seq stmts = NULL;
9799
9800	niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
9801	/ If we are using the loop mask to "peel" for alignment then we need*
9802	to adjust the start value here. /*
9803	if (niters_skip != NULL_TREE)
9804	init_expr = vect_peel_nonlinear_iv_init (stmts: &stmts, init_expr, skip_niters: niters_skip,
9805	step_expr, induction_type);
9806
9807	vec_init = vect_create_nonlinear_iv_init (stmts: &stmts, init_expr,
9808	step_expr, nunits, vectype,
9809	induction_type);
9810	if (stmts)
9811	{
9812	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
9813	gcc_assert (!new_bb);
9814	}
9815
9816	stmts = NULL;
9817	new_name = vect_create_nonlinear_iv_step (stmts: &stmts, step_expr,
9818	vf, induction_type);
9819	if (stmts)
9820	{
9821	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
9822	gcc_assert (!new_bb);
9823	}
9824
9825	vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
9826	new_name, vectype,
9827	induction_type);
9828	/ Create the following def-use cycle:*
9829	loop prolog:
9830	vec_init = ...
9831	vec_step = ...
9832	loop:
9833	vec_iv = PHI <vec_init, vec_loop>
9834	...
9835	STMT
9836	...
9837	vec_loop = vec_iv + vec_step; /*
9838
9839	/ Create the induction-phi that defines the induction-operand. /
9840	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
9841	induction_phi = create_phi_node (vec_dest, iv_loop->header);
9842	induc_def = PHI_RESULT (induction_phi);
9843
9844	/ Create the iv update inside the loop. /
9845	stmts = NULL;
9846	vec_def = vect_update_nonlinear_iv (stmts: &stmts, vectype,
9847	induc_def, vec_step,
9848	induction_type);
9849
9850	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
9851	new_stmt = SSA_NAME_DEF_STMT (vec_def);
9852
9853	/ Set the arguments of the phi node: /
9854	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
9855	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
9856	UNKNOWN_LOCATION);
9857
9858	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: induction_phi);
9859	*vec_stmt = induction_phi;
9860
9861	/ In case that vectorization factor (VF) is bigger than the number*
9862	of elements that we can fit in a vectype (nunits), we have to generate
9863	more than one vector stmt - i.e - we need to "unroll" the
9864	vector stmt by a factor VF/nunits. For more details see documentation
9865	in vectorizable_operation. /*
9866
9867	if (ncopies > `1`)
9868	{
9869	stmts = NULL;
9870	/ FORNOW. This restriction should be relaxed. /
9871	gcc_assert (!nested_in_vect_loop);
9872
9873	new_name = vect_create_nonlinear_iv_step (stmts: &stmts, step_expr,
9874	vf: nunits, induction_type);
9875
9876	vec_step = vect_create_nonlinear_iv_vec_step (loop_vinfo, stmt_info,
9877	new_name, vectype,
9878	induction_type);
9879	vec_def = induc_def;
9880	for (i = `1`; i < ncopies; i++)
9881	{
9882	/ vec_i = vec_prev + vec_step. /
9883	stmts = NULL;
9884	vec_def = vect_update_nonlinear_iv (stmts: &stmts, vectype,
9885	induc_def: vec_def, vec_step,
9886	induction_type);
9887	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
9888	new_stmt = SSA_NAME_DEF_STMT (vec_def);
9889	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9890	}
9891	}
9892
9893	if (dump_enabled_p ())
9894	dump_printf_loc (MSG_NOTE, vect_location,
9895	"transform induction: created def-use cycle: %G%G",
9896	(gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
9897
9898	return true;
9899	}
9900
9901	/ Function vectorizable_induction*
9902
9903	Check if STMT_INFO performs an induction computation that can be vectorized.
9904	If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
9905	phi to replace it, put it in VEC_STMT, and add it to the same basic block.
9906	Return true if STMT_INFO is vectorizable in this way. /*
9907
9908	bool
9909	vectorizable_induction (loop_vec_info loop_vinfo,
9910	stmt_vec_info stmt_info,
9911	gimple **vec_stmt, slp_tree slp_node,
9912	stmt_vector_for_cost *cost_vec)
9913	{
9914	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
9915	unsigned ncopies;
9916	bool nested_in_vect_loop = false;
9917	class loop *iv_loop;
9918	tree vec_def;
9919	edge pe = loop_preheader_edge (loop);
9920	basic_block new_bb;
9921	tree new_vec, vec_init, vec_step, t;
9922	tree new_name;
9923	gimple *new_stmt;
9924	gphi *induction_phi;
9925	tree induc_def, vec_dest;
9926	tree init_expr, step_expr;
9927	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9928	unsigned i;
9929	tree expr;
9930	gimple_stmt_iterator si;
9931	enum vect_induction_op_type induction_type
9932	= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
9933
9934	gphi phi = dyn_cast <gphi > (p: stmt_info->stmt);
9935	if (!phi)
9936	return false;
9937
9938	if (!STMT_VINFO_RELEVANT_P (stmt_info))
9939	return false;
9940
9941	/ Make sure it was recognized as induction computation. /
9942	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
9943	return false;
9944
9945	/ Handle nonlinear induction in a separate place. /
9946	if (induction_type != vect_step_op_add)
9947	return vectorizable_nonlinear_induction (loop_vinfo, stmt_info,
9948	vec_stmt, slp_node, cost_vec);
9949
9950	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9951	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9952
9953	if (slp_node)
9954	ncopies = `1`;
9955	else
9956	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9957	gcc_assert (ncopies >= `1`);
9958
9959	/ FORNOW. These restrictions should be relaxed. /
9960	if (nested_in_vect_loop_p (loop, stmt_info))
9961	{
9962	imm_use_iterator imm_iter;
9963	use_operand_p use_p;
9964	gimple *exit_phi;
9965	edge latch_e;
9966	tree loop_arg;
9967
9968	if (ncopies > `1`)
9969	{
9970	if (dump_enabled_p ())
9971	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9972	"multiple types in nested loop.\n");
9973	return false;
9974	}
9975
9976	exit_phi = NULL;
9977	latch_e = loop_latch_edge (loop->inner);
9978	loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
9979	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
9980	{
9981	gimple *use_stmt = USE_STMT (use_p);
9982	if (is_gimple_debug (gs: use_stmt))
9983	continue;
9984
9985	if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (g: use_stmt)))
9986	{
9987	exit_phi = use_stmt;
9988	break;
9989	}
9990	}
9991	if (exit_phi)
9992	{
9993	stmt_vec_info exit_phi_vinfo = loop_vinfo->lookup_stmt (exit_phi);
9994	if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
9995	&& !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
9996	{
9997	if (dump_enabled_p ())
9998	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9999	"inner-loop induction only used outside "
10000	"of the outer vectorized loop.\n");
10001	return false;
10002	}
10003	}
10004
10005	nested_in_vect_loop = true;
10006	iv_loop = loop->inner;
10007	}
10008	else
10009	iv_loop = loop;
10010	gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
10011
10012	if (slp_node && !nunits.is_constant ())
10013	{
10014	/ The current SLP code creates the step value element-by-element. /
10015	if (dump_enabled_p ())
10016	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10017	"SLP induction not supported for variable-length"
10018	" vectors.\n");
10019	return false;
10020	}
10021
10022	if (FLOAT_TYPE_P (vectype) && !param_vect_induction_float)
10023	{
10024	if (dump_enabled_p ())
10025	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10026	"floating point induction vectorization disabled\n");
10027	return false;
10028	}
10029
10030	step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
10031	gcc_assert (step_expr != NULL_TREE);
10032	if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
10033	&& !type_has_mode_precision_p (TREE_TYPE (step_expr)))
10034	{
10035	if (dump_enabled_p ())
10036	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10037	"bit-precision induction vectorization not "
10038	"supported.\n");
10039	return false;
10040	}
10041	tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype);
10042
10043	/ Check for backend support of PLUS/MINUS_EXPR. /
10044	if (!directly_supported_p (PLUS_EXPR, step_vectype)
10045	\|\| !directly_supported_p (MINUS_EXPR, step_vectype))
10046	return false;
10047
10048	if (!vec_stmt) / transformation not required. /
10049	{
10050	unsigned inside_cost = `0`, prologue_cost = `0`;
10051	if (slp_node)
10052	{
10053	/ We eventually need to set a vector type on invariant*
10054	arguments. /*
10055	unsigned j;
10056	slp_tree child;
10057	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
10058	if (!vect_maybe_update_slp_op_vectype
10059	(child, SLP_TREE_VECTYPE (slp_node)))
10060	{
10061	if (dump_enabled_p ())
10062	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10063	"incompatible vector types for "
10064	"invariants\n");
10065	return false;
10066	}
10067	/ loop cost for vec_loop. /
10068	inside_cost
10069	= record_stmt_cost (body_cost_vec: cost_vec,
10070	SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
10071	kind: vector_stmt, stmt_info, misalign: `0`, where: vect_body);
10072	/ prologue cost for vec_init (if not nested) and step. /
10073	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1` + !nested_in_vect_loop,
10074	kind: scalar_to_vec,
10075	stmt_info, misalign: `0`, where: vect_prologue);
10076	}
10077	else / if (!slp_node) /
10078	{
10079	/ loop cost for vec_loop. /
10080	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_stmt,
10081	stmt_info, misalign: `0`, where: vect_body);
10082	/ prologue cost for vec_init and vec_step. /
10083	prologue_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_to_vec,
10084	stmt_info, misalign: `0`, where: vect_prologue);
10085	}
10086	if (dump_enabled_p ())
10087	dump_printf_loc (MSG_NOTE, vect_location,
10088	"vect_model_induction_cost: inside_cost = %d, "
10089	"prologue_cost = %d .\n", inside_cost,
10090	prologue_cost);
10091
10092	STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
10093	DUMP_VECT_SCOPE ("vectorizable_induction");
10094	return true;
10095	}
10096
10097	/ Transform. /
10098
10099	/ Compute a vector variable, initialized with the first VF values of*
10100	the induction variable. E.g., for an iv with IV_PHI='X' and
10101	evolution S, for a vector of 4 units, we want to compute:
10102	[X, X + S, X + 2S, X + 3S]. /*
10103
10104	if (dump_enabled_p ())
10105	dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
10106
10107	pe = loop_preheader_edge (iv_loop);
10108	/ Find the first insertion point in the BB. /
10109	basic_block bb = gimple_bb (g: phi);
10110	si = gsi_after_labels (bb);
10111
10112	/ For SLP induction we have to generate several IVs as for example*
10113	with group size 3 we need
10114	[i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2S0, i1 + 2S1]
10115	[i2 + 2S2, i0 + 3S0, i1 + 3S1, i2 + 3S2]. /*
10116	if (slp_node)
10117	{
10118	/ Enforced above. /
10119	unsigned int const_nunits = nunits.to_constant ();
10120
10121	/ The initial values are vectorized, but any lanes > group_size*
10122	need adjustment. /*
10123	slp_tree init_node
10124	= SLP_TREE_CHILDREN (slp_node)[pe->dest_idx];
10125
10126	/ Gather steps. Since we do not vectorize inductions as*
10127	cycles we have to reconstruct the step from SCEV data. /*
10128	unsigned group_size = SLP_TREE_LANES (slp_node);
10129	tree *steps = XALLOCAVEC (tree, group_size);
10130	tree *inits = XALLOCAVEC (tree, group_size);
10131	stmt_vec_info phi_info;
10132	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info)
10133	{
10134	steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
10135	if (!init_node)
10136	inits[i] = gimple_phi_arg_def (gs: as_a<gphi *> (p: phi_info->stmt),
10137	index: pe->dest_idx);
10138	}
10139
10140	/ Now generate the IVs. /
10141	unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10142	gcc_assert ((const_nunits * nvects) % group_size == `0`);
10143	unsigned nivs;
10144	if (nested_in_vect_loop)
10145	nivs = nvects;
10146	else
10147	{
10148	/ Compute the number of distinct IVs we need. First reduce*
10149	group_size if it is a multiple of const_nunits so we get
10150	one IV for a group_size of 4 but const_nunits 2. /*
10151	unsigned group_sizep = group_size;
10152	if (group_sizep % const_nunits == `0`)
10153	group_sizep = group_sizep / const_nunits;
10154	nivs = least_common_multiple (group_sizep,
10155	const_nunits) / const_nunits;
10156	}
10157	tree stept = TREE_TYPE (step_vectype);
10158	tree lupdate_mul = NULL_TREE;
10159	if (!nested_in_vect_loop)
10160	{
10161	/ The number of iterations covered in one vector iteration. /
10162	unsigned lup_mul = (nvects * const_nunits) / group_size;
10163	lupdate_mul
10164	= build_vector_from_val (step_vectype,
10165	SCALAR_FLOAT_TYPE_P (stept)
10166	? build_real_from_wide (stept, lup_mul,
10167	UNSIGNED)
10168	: build_int_cstu (type: stept, lup_mul));
10169	}
10170	tree peel_mul = NULL_TREE;
10171	gimple_seq init_stmts = NULL;
10172	if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
10173	{
10174	if (SCALAR_FLOAT_TYPE_P (stept))
10175	peel_mul = gimple_build (seq: &init_stmts, code: FLOAT_EXPR, type: stept,
10176	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
10177	else
10178	peel_mul = gimple_convert (seq: &init_stmts, type: stept,
10179	LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
10180	peel_mul = gimple_build_vector_from_val (seq: &init_stmts,
10181	type: step_vectype, op: peel_mul);
10182	}
10183	unsigned ivn;
10184	auto_vec<tree> vec_steps;
10185	for (ivn = `0`; ivn < nivs; ++ivn)
10186	{
10187	tree_vector_builder step_elts (step_vectype, const_nunits, `1`);
10188	tree_vector_builder init_elts (vectype, const_nunits, `1`);
10189	tree_vector_builder mul_elts (step_vectype, const_nunits, `1`);
10190	for (unsigned eltn = `0`; eltn < const_nunits; ++eltn)
10191	{
10192	/ The scalar steps of the IVs. /
10193	tree elt = steps[(ivn*const_nunits + eltn) % group_size];
10194	elt = gimple_convert (seq: &init_stmts, TREE_TYPE (step_vectype), op: elt);
10195	step_elts.quick_push (obj: elt);
10196	if (!init_node)
10197	{
10198	/ The scalar inits of the IVs if not vectorized. /
10199	elt = inits[(ivn*const_nunits + eltn) % group_size];
10200	if (!useless_type_conversion_p (TREE_TYPE (vectype),
10201	TREE_TYPE (elt)))
10202	elt = gimple_build (seq: &init_stmts, code: VIEW_CONVERT_EXPR,
10203	TREE_TYPE (vectype), ops: elt);
10204	init_elts.quick_push (obj: elt);
10205	}
10206	/ The number of steps to add to the initial values. /
10207	unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
10208	mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
10209	? build_real_from_wide (stept,
10210	mul_elt, UNSIGNED)
10211	: build_int_cstu (type: stept, mul_elt));
10212	}
10213	vec_step = gimple_build_vector (seq: &init_stmts, builder: &step_elts);
10214	vec_steps.safe_push (obj: vec_step);
10215	tree step_mul = gimple_build_vector (seq: &init_stmts, builder: &mul_elts);
10216	if (peel_mul)
10217	step_mul = gimple_build (seq: &init_stmts, code: PLUS_EXPR, type: step_vectype,
10218	ops: step_mul, ops: peel_mul);
10219	if (!init_node)
10220	vec_init = gimple_build_vector (seq: &init_stmts, builder: &init_elts);
10221
10222	/ Create the induction-phi that defines the induction-operand. /
10223	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
10224	"vec_iv_");
10225	induction_phi = create_phi_node (vec_dest, iv_loop->header);
10226	induc_def = PHI_RESULT (induction_phi);
10227
10228	/ Create the iv update inside the loop /
10229	tree up = vec_step;
10230	if (lupdate_mul)
10231	up = gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10232	ops: vec_step, ops: lupdate_mul);
10233	gimple_seq stmts = NULL;
10234	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: induc_def);
10235	vec_def = gimple_build (seq: &stmts,
10236	code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: up);
10237	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10238	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10239	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10240	UNKNOWN_LOCATION);
10241
10242	if (init_node)
10243	vec_init = vect_get_slp_vect_def (init_node, ivn);
10244	if (!nested_in_vect_loop
10245	&& !integer_zerop (step_mul))
10246	{
10247	vec_def = gimple_convert (seq: &init_stmts, type: step_vectype, op: vec_init);
10248	up = gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10249	ops: vec_step, ops: step_mul);
10250	vec_def = gimple_build (seq: &init_stmts, code: PLUS_EXPR, type: step_vectype,
10251	ops: vec_def, ops: up);
10252	vec_init = gimple_convert (seq: &init_stmts, type: vectype, op: vec_def);
10253	}
10254
10255	/ Set the arguments of the phi node: /
10256	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
10257
10258	slp_node->push_vec_def (def: induction_phi);
10259	}
10260	if (!nested_in_vect_loop)
10261	{
10262	/ Fill up to the number of vectors we need for the whole group. /
10263	nivs = least_common_multiple (group_size,
10264	const_nunits) / const_nunits;
10265	vec_steps.reserve (nelems: nivs-ivn);
10266	for (; ivn < nivs; ++ivn)
10267	{
10268	slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[`0`]);
10269	vec_steps.quick_push (obj: vec_steps [`0`]);
10270	}
10271	}
10272
10273	/ Re-use IVs when we can. We are generating further vector*
10274	stmts by adding VF' stride to the IVs generated above. /
10275	if (ivn < nvects)
10276	{
10277	unsigned vfp
10278	= least_common_multiple (group_size, const_nunits) / group_size;
10279	tree lupdate_mul
10280	= build_vector_from_val (step_vectype,
10281	SCALAR_FLOAT_TYPE_P (stept)
10282	? build_real_from_wide (stept,
10283	vfp, UNSIGNED)
10284	: build_int_cstu (type: stept, vfp));
10285	for (; ivn < nvects; ++ivn)
10286	{
10287	gimple *iv
10288	= SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]);
10289	tree def = gimple_get_lhs (iv);
10290	if (ivn < `2`*nivs)
10291	vec_steps [ivn - nivs]
10292	= gimple_build (seq: &init_stmts, code: MULT_EXPR, type: step_vectype,
10293	ops: vec_steps [ivn - nivs], ops: lupdate_mul);
10294	gimple_seq stmts = NULL;
10295	def = gimple_convert (seq: &stmts, type: step_vectype, op: def);
10296	def = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype,
10297	ops: def, ops: vec_steps [ivn % nivs]);
10298	def = gimple_convert (seq: &stmts, type: vectype, op: def);
10299	if (gimple_code (g: iv) == GIMPLE_PHI)
10300	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10301	else
10302	{
10303	gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
10304	gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING);
10305	}
10306	slp_node->push_vec_def (def);
10307	}
10308	}
10309
10310	new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts);
10311	gcc_assert (!new_bb);
10312
10313	return true;
10314	}
10315
10316	init_expr = vect_phi_initial_value (phi);
10317
10318	gimple_seq stmts = NULL;
10319	if (!nested_in_vect_loop)
10320	{
10321	/ Convert the initial value to the IV update type. /
10322	tree new_type = TREE_TYPE (step_expr);
10323	init_expr = gimple_convert (seq: &stmts, type: new_type, op: init_expr);
10324
10325	/ If we are using the loop mask to "peel" for alignment then we need*
10326	to adjust the start value here. /*
10327	tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
10328	if (skip_niters != NULL_TREE)
10329	{
10330	if (FLOAT_TYPE_P (vectype))
10331	skip_niters = gimple_build (seq: &stmts, code: FLOAT_EXPR, type: new_type,
10332	ops: skip_niters);
10333	else
10334	skip_niters = gimple_convert (seq: &stmts, type: new_type, op: skip_niters);
10335	tree skip_step = gimple_build (seq: &stmts, code: MULT_EXPR, type: new_type,
10336	ops: skip_niters, ops: step_expr);
10337	init_expr = gimple_build (seq: &stmts, code: MINUS_EXPR, type: new_type,
10338	ops: init_expr, ops: skip_step);
10339	}
10340	}
10341
10342	if (stmts)
10343	{
10344	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
10345	gcc_assert (!new_bb);
10346	}
10347
10348	/ Create the vector that holds the initial_value of the induction. /
10349	if (nested_in_vect_loop)
10350	{
10351	/ iv_loop is nested in the loop to be vectorized. init_expr had already*
10352	been created during vectorization of previous stmts. We obtain it
10353	from the STMT_VINFO_VEC_STMT of the defining stmt. /*
10354	auto_vec<tree> vec_inits;
10355	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_info, `1`,
10356	op: init_expr, &vec_inits);
10357	vec_init = vec_inits [`0`];
10358	/ If the initial value is not of proper type, convert it. /
10359	if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
10360	{
10361	new_stmt
10362	= gimple_build_assign (vect_get_new_ssa_name (vectype,
10363	vect_simple_var,
10364	"vec_iv_"),
10365	VIEW_CONVERT_EXPR,
10366	build1 (VIEW_CONVERT_EXPR, vectype,
10367	vec_init));
10368	vec_init = gimple_assign_lhs (gs: new_stmt);
10369	new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
10370	new_stmt);
10371	gcc_assert (!new_bb);
10372	}
10373	}
10374	else
10375	{
10376	/ iv_loop is the loop to be vectorized. Create:*
10377	vec_init = [X, X+S, X+2S, X+3S] (S = step_expr, X = init_expr) /*
10378	stmts = NULL;
10379	new_name = gimple_convert (seq: &stmts, TREE_TYPE (step_expr), op: init_expr);
10380
10381	unsigned HOST_WIDE_INT const_nunits;
10382	if (nunits.is_constant (const_value: &const_nunits))
10383	{
10384	tree_vector_builder elts (step_vectype, const_nunits, `1`);
10385	elts.quick_push (obj: new_name);
10386	for (i = `1`; i < const_nunits; i++)
10387	{
10388	/ Create: new_name_i = new_name + step_expr /
10389	new_name = gimple_build (seq: &stmts, code: PLUS_EXPR, TREE_TYPE (new_name),
10390	ops: new_name, ops: step_expr);
10391	elts.quick_push (obj: new_name);
10392	}
10393	/ Create a vector from [new_name_0, new_name_1, ...,*
10394	new_name_nunits-1] /*
10395	vec_init = gimple_build_vector (seq: &stmts, builder: &elts);
10396	}
10397	else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
10398	/ Build the initial value directly from a VEC_SERIES_EXPR. /
10399	vec_init = gimple_build (seq: &stmts, code: VEC_SERIES_EXPR, type: step_vectype,
10400	ops: new_name, ops: step_expr);
10401	else
10402	{
10403	/ Build:*
10404	[base, base, base, ...]
10405	+ (vectype) [0, 1, 2, ...] [step, step, step, ...]. /
10406	gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
10407	gcc_assert (flag_associative_math);
10408	tree index = build_index_vector (step_vectype, `0`, `1`);
10409	tree base_vec = gimple_build_vector_from_val (seq: &stmts, type: step_vectype,
10410	op: new_name);
10411	tree step_vec = gimple_build_vector_from_val (seq: &stmts, type: step_vectype,
10412	op: step_expr);
10413	vec_init = gimple_build (seq: &stmts, code: FLOAT_EXPR, type: step_vectype, ops: index);
10414	vec_init = gimple_build (seq: &stmts, code: MULT_EXPR, type: step_vectype,
10415	ops: vec_init, ops: step_vec);
10416	vec_init = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype,
10417	ops: vec_init, ops: base_vec);
10418	}
10419	vec_init = gimple_convert (seq: &stmts, type: vectype, op: vec_init);
10420
10421	if (stmts)
10422	{
10423	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
10424	gcc_assert (!new_bb);
10425	}
10426	}
10427
10428
10429	/ Create the vector that holds the step of the induction. /
10430	gimple_stmt_iterator *step_iv_si = NULL;
10431	if (nested_in_vect_loop)
10432	/ iv_loop is nested in the loop to be vectorized. Generate:*
10433	vec_step = [S, S, S, S] /*
10434	new_name = step_expr;
10435	else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
10436	{
10437	/ When we're using loop_len produced by SELEC_VL, the non-final*
10438	iterations are not always processing VF elements. So vectorize
10439	induction variable instead of
10440
10441	_21 = vect_vec_iv_.6_22 + { VF, ... };
10442
10443	We should generate:
10444
10445	_35 = .SELECT_VL (ivtmp_33, VF);
10446	vect_cst__22 = [vec_duplicate_expr] _35;
10447	_21 = vect_vec_iv_.6_22 + vect_cst__22; /*
10448	gcc_assert (!slp_node);
10449	gimple_seq seq = NULL;
10450	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
10451	tree len = vect_get_loop_len (loop_vinfo, NULL, lens, `1`, vectype, `0`, `0`);
10452	expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
10453	unshare_expr (len)),
10454	&seq, true, NULL_TREE);
10455	new_name = gimple_build (seq: &seq, code: MULT_EXPR, TREE_TYPE (step_expr), ops: expr,
10456	ops: step_expr);
10457	gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
10458	step_iv_si = &si;
10459	}
10460	else
10461	{
10462	/ iv_loop is the loop to be vectorized. Generate:*
10463	vec_step = [VFS, VFS, VFS, VFS] /*
10464	gimple_seq seq = NULL;
10465	if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
10466	{
10467	expr = build_int_cst (integer_type_node, vf);
10468	expr = gimple_build (seq: &seq, code: FLOAT_EXPR, TREE_TYPE (step_expr), ops: expr);
10469	}
10470	else
10471	expr = build_int_cst (TREE_TYPE (step_expr), vf);
10472	new_name = gimple_build (seq: &seq, code: MULT_EXPR, TREE_TYPE (step_expr),
10473	ops: expr, ops: step_expr);
10474	if (seq)
10475	{
10476	new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
10477	gcc_assert (!new_bb);
10478	}
10479	}
10480
10481	t = unshare_expr (new_name);
10482	gcc_assert (CONSTANT_CLASS_P (new_name)
10483	\|\| TREE_CODE (new_name) == SSA_NAME);
10484	new_vec = build_vector_from_val (step_vectype, t);
10485	vec_step = vect_init_vector (loop_vinfo, stmt_info,
10486	new_vec, step_vectype, step_iv_si);
10487
10488
10489	/ Create the following def-use cycle:*
10490	loop prolog:
10491	vec_init = ...
10492	vec_step = ...
10493	loop:
10494	vec_iv = PHI <vec_init, vec_loop>
10495	...
10496	STMT
10497	...
10498	vec_loop = vec_iv + vec_step; /*
10499
10500	/ Create the induction-phi that defines the induction-operand. /
10501	vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
10502	induction_phi = create_phi_node (vec_dest, iv_loop->header);
10503	induc_def = PHI_RESULT (induction_phi);
10504
10505	/ Create the iv update inside the loop /
10506	stmts = NULL;
10507	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: induc_def);
10508	vec_def = gimple_build (seq: &stmts, code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: vec_step);
10509	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10510	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10511	new_stmt = SSA_NAME_DEF_STMT (vec_def);
10512
10513	/ Set the arguments of the phi node: /
10514	add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
10515	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10516	UNKNOWN_LOCATION);
10517
10518	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: induction_phi);
10519	*vec_stmt = induction_phi;
10520
10521	/ In case that vectorization factor (VF) is bigger than the number*
10522	of elements that we can fit in a vectype (nunits), we have to generate
10523	more than one vector stmt - i.e - we need to "unroll" the
10524	vector stmt by a factor VF/nunits. For more details see documentation
10525	in vectorizable_operation. /*
10526
10527	if (ncopies > `1`)
10528	{
10529	gimple_seq seq = NULL;
10530	/ FORNOW. This restriction should be relaxed. /
10531	gcc_assert (!nested_in_vect_loop);
10532	/ We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. /
10533	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10534
10535	/ Create the vector that holds the step of the induction. /
10536	if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
10537	{
10538	expr = build_int_cst (integer_type_node, nunits);
10539	expr = gimple_build (seq: &seq, code: FLOAT_EXPR, TREE_TYPE (step_expr), ops: expr);
10540	}
10541	else
10542	expr = build_int_cst (TREE_TYPE (step_expr), nunits);
10543	new_name = gimple_build (seq: &seq, code: MULT_EXPR, TREE_TYPE (step_expr),
10544	ops: expr, ops: step_expr);
10545	if (seq)
10546	{
10547	new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
10548	gcc_assert (!new_bb);
10549	}
10550
10551	t = unshare_expr (new_name);
10552	gcc_assert (CONSTANT_CLASS_P (new_name)
10553	\|\| TREE_CODE (new_name) == SSA_NAME);
10554	new_vec = build_vector_from_val (step_vectype, t);
10555	vec_step = vect_init_vector (loop_vinfo, stmt_info,
10556	new_vec, step_vectype, NULL);
10557
10558	vec_def = induc_def;
10559	for (i = `1`; i < ncopies + `1`; i++)
10560	{
10561	/ vec_i = vec_prev + vec_step /
10562	gimple_seq stmts = NULL;
10563	vec_def = gimple_convert (seq: &stmts, type: step_vectype, op: vec_def);
10564	vec_def = gimple_build (seq: &stmts,
10565	code: PLUS_EXPR, type: step_vectype, ops: vec_def, ops: vec_step);
10566	vec_def = gimple_convert (seq: &stmts, type: vectype, op: vec_def);
10567
10568	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10569	if (i < ncopies)
10570	{
10571	new_stmt = SSA_NAME_DEF_STMT (vec_def);
10572	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10573	}
10574	else
10575	{
10576	/ vec_1 = vec_iv + (VF/n * S)*
10577	vec_2 = vec_1 + (VF/n S)*
10578	...
10579	vec_n = vec_prev + (VF/n S) = vec_iv + VF * S = vec_loop*
10580
10581	vec_n is used as vec_loop to save the large step register and
10582	related operations. /*
10583	add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
10584	UNKNOWN_LOCATION);
10585	}
10586	}
10587	}
10588
10589	if (dump_enabled_p ())
10590	dump_printf_loc (MSG_NOTE, vect_location,
10591	"transform induction: created def-use cycle: %G%G",
10592	(gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def));
10593
10594	return true;
10595	}
10596
10597	/ Function vectorizable_live_operation_1.*
10598
10599	helper function for vectorizable_live_operation. /*
10600
10601	static tree
10602	vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
10603	stmt_vec_info stmt_info, basic_block exit_bb,
10604	tree vectype, int ncopies, slp_tree slp_node,
10605	tree bitsize, tree bitstart, tree vec_lhs,
10606	tree lhs_type, gimple_stmt_iterator *exit_gsi)
10607	{
10608	gcc_assert (single_pred_p (exit_bb) \|\| LOOP_VINFO_EARLY_BREAKS (loop_vinfo));
10609
10610	tree vec_lhs_phi = copy_ssa_name (var: vec_lhs);
10611	gimple *phi = create_phi_node (vec_lhs_phi, exit_bb);
10612	for (unsigned i = `0`; i < gimple_phi_num_args (gs: phi); i++)
10613	SET_PHI_ARG_DEF (phi, i, vec_lhs);
10614
10615	gimple_seq stmts = NULL;
10616	tree new_tree;
10617
10618	/ If bitstart is 0 then we can use a BIT_FIELD_REF /
10619	if (integer_zerop (bitstart))
10620	{
10621	tree scalar_res = gimple_build (seq: &stmts, code: BIT_FIELD_REF, TREE_TYPE (vectype),
10622	ops: vec_lhs_phi, ops: bitsize, ops: bitstart);
10623
10624	/ Convert the extracted vector element to the scalar type. /
10625	new_tree = gimple_convert (seq: &stmts, type: lhs_type, op: scalar_res);
10626	}
10627	else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
10628	{
10629	/ Emit:*
10630
10631	SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>
10632
10633	where VEC_LHS is the vectorized live-out result and MASK is
10634	the loop mask for the final iteration. /*
10635	gcc_assert (ncopies == `1` && !slp_node);
10636	gimple_seq tem = NULL;
10637	gimple_stmt_iterator gsi = gsi_last (seq&: tem);
10638	tree len = vect_get_loop_len (loop_vinfo, &gsi,
10639	&LOOP_VINFO_LENS (loop_vinfo),
10640	`1`, vectype, `0`, `0`);
10641
10642	/ BIAS - 1. /
10643	signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10644	tree bias_minus_one
10645	= int_const_binop (MINUS_EXPR,
10646	build_int_cst (TREE_TYPE (len), biasval),
10647	build_one_cst (TREE_TYPE (len)));
10648
10649	/ LAST_INDEX = LEN + (BIAS - 1). /
10650	tree last_index = gimple_build (seq: &stmts, code: PLUS_EXPR, TREE_TYPE (len),
10651	ops: len, ops: bias_minus_one);
10652
10653	/ SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN + BIAS - 1>. /
10654	tree scalar_res
10655	= gimple_build (seq: &stmts, fn: CFN_VEC_EXTRACT, TREE_TYPE (vectype),
10656	args: vec_lhs_phi, args: last_index);
10657
10658	/ Convert the extracted vector element to the scalar type. /
10659	new_tree = gimple_convert (seq: &stmts, type: lhs_type, op: scalar_res);
10660	}
10661	else if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10662	{
10663	/ Emit:*
10664
10665	SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
10666
10667	where VEC_LHS is the vectorized live-out result and MASK is
10668	the loop mask for the final iteration. /*
10669	gcc_assert (!slp_node);
10670	tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
10671	gimple_seq tem = NULL;
10672	gimple_stmt_iterator gsi = gsi_last (seq&: tem);
10673	tree mask = vect_get_loop_mask (loop_vinfo, &gsi,
10674	&LOOP_VINFO_MASKS (loop_vinfo),
10675	`1`, vectype, `0`);
10676	tree scalar_res;
10677	gimple_seq_add_seq (&stmts, tem);
10678
10679	scalar_res = gimple_build (seq: &stmts, fn: CFN_EXTRACT_LAST, type: scalar_type,
10680	args: mask, args: vec_lhs_phi);
10681
10682	/ Convert the extracted vector element to the scalar type. /
10683	new_tree = gimple_convert (seq: &stmts, type: lhs_type, op: scalar_res);
10684	}
10685	else
10686	{
10687	tree bftype = TREE_TYPE (vectype);
10688	if (VECTOR_BOOLEAN_TYPE_P (vectype))
10689	bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), `1`);
10690	new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs_phi, bitsize, bitstart);
10691	new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
10692	&stmts, true, NULL_TREE);
10693	}
10694
10695	*exit_gsi = gsi_after_labels (bb: exit_bb);
10696	if (stmts)
10697	gsi_insert_seq_before (exit_gsi, stmts, GSI_SAME_STMT);
10698
10699	return new_tree;
10700	}
10701
10702	/ Function vectorizable_live_operation.*
10703
10704	STMT_INFO computes a value that is used outside the loop. Check if
10705	it can be supported. /*
10706
10707	bool
10708	vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
10709	slp_tree slp_node, slp_instance slp_node_instance,
10710	int slp_index, bool vec_stmt_p,
10711	stmt_vector_for_cost *cost_vec)
10712	{
10713	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
10714	imm_use_iterator imm_iter;
10715	tree lhs, lhs_type, bitsize;
10716	tree vectype = (slp_node
10717	? SLP_TREE_VECTYPE (slp_node)
10718	: STMT_VINFO_VECTYPE (stmt_info));
10719	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
10720	int ncopies;
10721	gimple *use_stmt;
10722	use_operand_p use_p;
10723	auto_vec<tree> vec_oprnds;
10724	int vec_entry = `0`;
10725	poly_uint64 vec_index = `0`;
10726
10727	gcc_assert (STMT_VINFO_LIVE_P (stmt_info)
10728	\|\| LOOP_VINFO_EARLY_BREAKS (loop_vinfo));
10729
10730	/ If a stmt of a reduction is live, vectorize it via*
10731	vect_create_epilog_for_reduction. vectorizable_reduction assessed
10732	validity so just trigger the transform here. /*
10733	if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
10734	{
10735	if (!vec_stmt_p)
10736	return true;
10737	/ For SLP reductions we vectorize the epilogue for all involved stmts*
10738	together. /*
10739	if (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && slp_index != `0`)
10740	return true;
10741	stmt_vec_info reduc_info = info_for_reduction (vinfo: loop_vinfo, stmt_info);
10742	gcc_assert (reduc_info->is_reduc_info);
10743	if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
10744	\|\| STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
10745	return true;
10746
10747	if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
10748	\|\| !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
10749	vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node,
10750	slp_node_instance,
10751	LOOP_VINFO_IV_EXIT (loop_vinfo));
10752
10753	/ If early break we only have to materialize the reduction on the merge*
10754	block, but we have to find an alternate exit first. /*
10755	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
10756	{
10757	slp_tree phis_node = slp_node ? slp_node_instance->reduc_phis : NULL;
10758	for (auto exit : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo)))
10759	if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo))
10760	{
10761	vect_create_epilog_for_reduction (loop_vinfo, stmt_info: reduc_info,
10762	slp_node: phis_node, slp_node_instance,
10763	loop_exit: exit);
10764	break;
10765	}
10766	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
10767	vect_create_epilog_for_reduction (loop_vinfo, stmt_info: reduc_info,
10768	slp_node: phis_node, slp_node_instance,
10769	LOOP_VINFO_IV_EXIT (loop_vinfo));
10770	}
10771
10772	return true;
10773	}
10774
10775	/ If STMT is not relevant and it is a simple assignment and its inputs are*
10776	invariant then it can remain in place, unvectorized. The original last
10777	scalar value that it computes will be used. /*
10778	if (!STMT_VINFO_RELEVANT_P (stmt_info))
10779	{
10780	gcc_assert (is_simple_and_all_uses_invariant (stmt_info, loop_vinfo));
10781	if (dump_enabled_p ())
10782	dump_printf_loc (MSG_NOTE, vect_location,
10783	"statement is simple and uses invariant. Leaving in "
10784	"place.\n");
10785	return true;
10786	}
10787
10788	if (slp_node)
10789	ncopies = `1`;
10790	else
10791	ncopies = vect_get_num_copies (loop_vinfo, vectype);
10792
10793	if (slp_node)
10794	{
10795	gcc_assert (slp_index >= `0`);
10796
10797	/ Get the last occurrence of the scalar index from the concatenation of*
10798	all the slp vectors. Calculate which slp vector it is and the index
10799	within. /*
10800	int num_scalar = SLP_TREE_LANES (slp_node);
10801	int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10802	poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
10803
10804	/ Calculate which vector contains the result, and which lane of*
10805	that vector we need. /*
10806	if (!can_div_trunc_p (a: pos, b: nunits, quotient: &vec_entry, remainder: &vec_index))
10807	{
10808	if (dump_enabled_p ())
10809	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10810	"Cannot determine which vector holds the"
10811	" final result.\n");
10812	return false;
10813	}
10814	}
10815
10816	if (!vec_stmt_p)
10817	{
10818	/ No transformation required. /
10819	if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10820	{
10821	if (slp_node)
10822	{
10823	if (dump_enabled_p ())
10824	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10825	"can't operate on partial vectors "
10826	"because an SLP statement is live after "
10827	"the loop.\n");
10828	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10829	}
10830	else if (ncopies > `1`)
10831	{
10832	if (dump_enabled_p ())
10833	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10834	"can't operate on partial vectors "
10835	"because ncopies is greater than 1.\n");
10836	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10837	}
10838	else
10839	{
10840	gcc_assert (ncopies == `1` && !slp_node);
10841	if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
10842	OPTIMIZE_FOR_SPEED))
10843	vect_record_loop_mask (loop_vinfo,
10844	&LOOP_VINFO_MASKS (loop_vinfo),
10845	`1`, vectype, NULL);
10846	else if (can_vec_extract_var_idx_p (
10847	TYPE_MODE (vectype), TYPE_MODE (TREE_TYPE (vectype))))
10848	vect_record_loop_len (loop_vinfo,
10849	&LOOP_VINFO_LENS (loop_vinfo),
10850	`1`, vectype, `1`);
10851	else
10852	{
10853	if (dump_enabled_p ())
10854	dump_printf_loc (
10855	MSG_MISSED_OPTIMIZATION, vect_location,
10856	"can't operate on partial vectors "
10857	"because the target doesn't support extract "
10858	"last reduction.\n");
10859	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10860	}
10861	}
10862	}
10863	/ ??? Enable for loop costing as well. /
10864	if (!loop_vinfo)
10865	record_stmt_cost (cost_vec, `1`, vec_to_scalar, stmt_info, NULL_TREE,
10866	`0`, vect_epilogue);
10867	return true;
10868	}
10869
10870	/ Use the lhs of the original scalar statement. /
10871	gimple *stmt = vect_orig_stmt (stmt_info)->stmt;
10872	if (dump_enabled_p ())
10873	dump_printf_loc (MSG_NOTE, vect_location, "extracting lane for live "
10874	"stmt %G", stmt);
10875
10876	lhs = gimple_get_lhs (stmt);
10877	lhs_type = TREE_TYPE (lhs);
10878
10879	bitsize = vector_element_bits_tree (vectype);
10880
10881	/ Get the vectorized lhs of STMT and the lane to use (counted in bits). /
10882	tree vec_lhs, vec_lhs0, bitstart;
10883	gimple vec_stmt, vec_stmt0;
10884	if (slp_node)
10885	{
10886	gcc_assert (!loop_vinfo
10887	\|\| (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10888	&& !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
10889
10890	/ Get the correct slp vectorized stmt. /
10891	vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];
10892	vec_stmt = SSA_NAME_DEF_STMT (vec_lhs);
10893
10894	/ In case we need to early break vectorize also get the first stmt. /
10895	vec_lhs0 = SLP_TREE_VEC_DEFS (slp_node)[`0`];
10896	vec_stmt0 = SSA_NAME_DEF_STMT (vec_lhs0);
10897
10898	/ Get entry to use. /
10899	bitstart = bitsize_int (vec_index);
10900	bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
10901	}
10902	else
10903	{
10904	/ For multiple copies, get the last copy. /
10905	vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info).last ();
10906	vec_lhs = gimple_get_lhs (vec_stmt);
10907
10908	/ In case we need to early break vectorize also get the first stmt. /
10909	vec_stmt0 = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
10910	vec_lhs0 = gimple_get_lhs (vec_stmt0);
10911
10912	/ Get the last lane in the vector. /
10913	bitstart = int_const_binop (MULT_EXPR, bitsize, bitsize_int (nunits - `1`));
10914	}
10915
10916	if (loop_vinfo)
10917	{
10918	/ Ensure the VEC_LHS for lane extraction stmts satisfy loop-closed PHI*
10919	requirement, insert one phi node for it. It looks like:
10920	loop;
10921	BB:
10922	# lhs' = PHI <lhs>
10923	==>
10924	loop;
10925	BB:
10926	# vec_lhs' = PHI <vec_lhs>
10927	new_tree = lane_extract <vec_lhs', ...>;
10928	lhs' = new_tree; /*
10929
10930	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
10931	/ Check if we have a loop where the chosen exit is not the main exit,*
10932	in these cases for an early break we restart the iteration the vector code
10933	did. For the live values we want the value at the start of the iteration
10934	rather than at the end. /*
10935	edge main_e = LOOP_VINFO_IV_EXIT (loop_vinfo);
10936	bool all_exits_as_early_p = LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo);
10937	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
10938	if (!is_gimple_debug (gs: use_stmt)
10939	&& !flow_bb_inside_loop_p (loop, gimple_bb (g: use_stmt)))
10940	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
10941	{
10942	edge e = gimple_phi_arg_edge (phi: as_a <gphi *> (p: use_stmt),
10943	i: phi_arg_index_from_use (use: use_p));
10944	gcc_assert (loop_exit_edge_p (loop, e));
10945	bool main_exit_edge = e == main_e;
10946	tree tmp_vec_lhs = vec_lhs;
10947	tree tmp_bitstart = bitstart;
10948
10949	/ For early exit where the exit is not in the BB that leads*
10950	to the latch then we're restarting the iteration in the
10951	scalar loop. So get the first live value. /*
10952	if ((all_exits_as_early_p \|\| !main_exit_edge)
10953	&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
10954	{
10955	tmp_vec_lhs = vec_lhs0;
10956	tmp_bitstart = build_zero_cst (TREE_TYPE (bitstart));
10957	}
10958
10959	gimple_stmt_iterator exit_gsi;
10960	tree new_tree
10961	= vectorizable_live_operation_1 (loop_vinfo, stmt_info,
10962	exit_bb: e->dest, vectype, ncopies,
10963	slp_node, bitsize,
10964	bitstart: tmp_bitstart, vec_lhs: tmp_vec_lhs,
10965	lhs_type, exit_gsi: &exit_gsi);
10966
10967	auto gsi = gsi_for_stmt (use_stmt);
10968	tree lhs_phi = gimple_phi_result (gs: use_stmt);
10969	remove_phi_node (&gsi, false);
10970	gimple *copy = gimple_build_assign (lhs_phi, new_tree);
10971	gsi_insert_before (&exit_gsi, copy, GSI_SAME_STMT);
10972	break;
10973	}
10974
10975	/ There a no further out-of-loop uses of lhs by LC-SSA construction. /
10976	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
10977	gcc_assert (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)));
10978	}
10979	else
10980	{
10981	/ For basic-block vectorization simply insert the lane-extraction. /
10982	tree bftype = TREE_TYPE (vectype);
10983	if (VECTOR_BOOLEAN_TYPE_P (vectype))
10984	bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), `1`);
10985	tree new_tree = build3 (BIT_FIELD_REF, bftype,
10986	vec_lhs, bitsize, bitstart);
10987	gimple_seq stmts = NULL;
10988	new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
10989	&stmts, true, NULL_TREE);
10990	if (TREE_CODE (new_tree) == SSA_NAME
10991	&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
10992	SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree) = `1`;
10993	if (is_a <gphi *> (p: vec_stmt))
10994	{
10995	gimple_stmt_iterator si = gsi_after_labels (bb: gimple_bb (g: vec_stmt));
10996	gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT);
10997	}
10998	else
10999	{
11000	gimple_stmt_iterator si = gsi_for_stmt (vec_stmt);
11001	gsi_insert_seq_after (&si, stmts, GSI_SAME_STMT);
11002	}
11003
11004	/ Replace use of lhs with newly computed result. If the use stmt is a*
11005	single arg PHI, just replace all uses of PHI result. It's necessary
11006	because lcssa PHI defining lhs may be before newly inserted stmt. /*
11007	use_operand_p use_p;
11008	stmt_vec_info use_stmt_info;
11009	FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
11010	if (!is_gimple_debug (gs: use_stmt)
11011	&& (!(use_stmt_info = vinfo->lookup_stmt (use_stmt))
11012	\|\| !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))))
11013	{
11014	/ ??? This can happen when the live lane ends up being*
11015	rooted in a vector construction code-generated by an
11016	external SLP node (and code-generation for that already
11017	happened). See gcc.dg/vect/bb-slp-47.c.
11018	Doing this is what would happen if that vector CTOR
11019	were not code-generated yet so it is not too bad.
11020	??? In fact we'd likely want to avoid this situation
11021	in the first place. /*
11022	if (TREE_CODE (new_tree) == SSA_NAME
11023	&& !SSA_NAME_IS_DEFAULT_DEF (new_tree)
11024	&& gimple_code (g: use_stmt) != GIMPLE_PHI
11025	&& !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (new_tree),
11026	use_stmt))
11027	{
11028	if (dump_enabled_p ())
11029	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11030	"Using original scalar computation for "
11031	"live lane because use preceeds vector "
11032	"def\n");
11033	continue;
11034	}
11035	/ ??? It can also happen that we end up pulling a def into*
11036	a loop where replacing out-of-loop uses would require
11037	a new LC SSA PHI node. Retain the original scalar in
11038	those cases as well. PR98064. /*
11039	if (TREE_CODE (new_tree) == SSA_NAME
11040	&& !SSA_NAME_IS_DEFAULT_DEF (new_tree)
11041	&& (gimple_bb (g: use_stmt)->loop_father
11042	!= gimple_bb (g: vec_stmt)->loop_father)
11043	&& !flow_loop_nested_p (gimple_bb (g: vec_stmt)->loop_father,
11044	gimple_bb (g: use_stmt)->loop_father))
11045	{
11046	if (dump_enabled_p ())
11047	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11048	"Using original scalar computation for "
11049	"live lane because there is an out-of-loop "
11050	"definition for it\n");
11051	continue;
11052	}
11053	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
11054	SET_USE (use_p, new_tree);
11055	update_stmt (s: use_stmt);
11056	}
11057	}
11058
11059	return true;
11060	}
11061
11062	/ Kill any debug uses outside LOOP of SSA names defined in STMT_INFO. /
11063
11064	static void
11065	vect_loop_kill_debug_uses (class loop *loop, stmt_vec_info stmt_info)
11066	{
11067	ssa_op_iter op_iter;
11068	imm_use_iterator imm_iter;
11069	def_operand_p def_p;
11070	gimple *ustmt;
11071
11072	FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
11073	{
11074	FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
11075	{
11076	basic_block bb;
11077
11078	if (!is_gimple_debug (gs: ustmt))
11079	continue;
11080
11081	bb = gimple_bb (g: ustmt);
11082
11083	if (!flow_bb_inside_loop_p (loop, bb))
11084	{
11085	if (gimple_debug_bind_p (s: ustmt))
11086	{
11087	if (dump_enabled_p ())
11088	dump_printf_loc (MSG_NOTE, vect_location,
11089	"killing debug use\n");
11090
11091	gimple_debug_bind_reset_value (dbg: ustmt);
11092	update_stmt (s: ustmt);
11093	}
11094	else
11095	gcc_unreachable ();
11096	}
11097	}
11098	}
11099	}
11100
11101	/ Given loop represented by LOOP_VINFO, return true if computation of*
11102	LOOP_VINFO_NITERS (= LOOP_VINFO_NITERSM1 + 1) doesn't overflow, false
11103	otherwise. /*
11104
11105	static bool
11106	loop_niters_no_overflow (loop_vec_info loop_vinfo)
11107	{
11108	/ Constant case. /
11109	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
11110	{
11111	tree cst_niters = LOOP_VINFO_NITERS (loop_vinfo);
11112	tree cst_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
11113
11114	gcc_assert (TREE_CODE (cst_niters) == INTEGER_CST);
11115	gcc_assert (TREE_CODE (cst_nitersm1) == INTEGER_CST);
11116	if (wi::to_widest (t: cst_nitersm1) < wi::to_widest (t: cst_niters))
11117	return true;
11118	}
11119
11120	widest_int max;
11121	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
11122	/ Check the upper bound of loop niters. /
11123	if (get_max_loop_iterations (loop, nit: &max))
11124	{
11125	tree type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
11126	signop sgn = TYPE_SIGN (type);
11127	widest_int type_max = widest_int::from (x: wi::max_value (type), sgn);
11128	if (max < type_max)
11129	return true;
11130	}
11131	return false;
11132	}
11133
11134	/ Return a mask type with half the number of elements as OLD_TYPE,*
11135	given that it should have mode NEW_MODE. /*
11136
11137	tree
11138	vect_halve_mask_nunits (tree old_type, machine_mode new_mode)
11139	{
11140	poly_uint64 nunits = exact_div (a: TYPE_VECTOR_SUBPARTS (node: old_type), b: `2`);
11141	return build_truth_vector_type_for_mode (nunits, new_mode);
11142	}
11143
11144	/ Return a mask type with twice as many elements as OLD_TYPE,*
11145	given that it should have mode NEW_MODE. /*
11146
11147	tree
11148	vect_double_mask_nunits (tree old_type, machine_mode new_mode)
11149	{
11150	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: old_type) * `2`;
11151	return build_truth_vector_type_for_mode (nunits, new_mode);
11152	}
11153
11154	/ Record that a fully-masked version of LOOP_VINFO would need MASKS to*
11155	contain a sequence of NVECTORS masks that each control a vector of type
11156	VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
11157	these vector masks with the vector version of SCALAR_MASK. /*
11158
11159	void
11160	vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
11161	unsigned int nvectors, tree vectype, tree scalar_mask)
11162	{
11163	gcc_assert (nvectors != `0`);
11164
11165	if (scalar_mask)
11166	{
11167	scalar_cond_masked_key cond (scalar_mask, nvectors);
11168	loop_vinfo->scalar_cond_masked_set.add (k: cond);
11169	}
11170
11171	masks->mask_set.add (k: std::make_pair (x&: vectype, y&: nvectors));
11172	}
11173
11174	/ Given a complete set of masks MASKS, extract mask number INDEX*
11175	for an rgroup that operates on NVECTORS vectors of type VECTYPE,
11176	where 0 <= INDEX < NVECTORS. Insert any set-up statements before GSI.
11177
11178	See the comment above vec_loop_masks for more details about the mask
11179	arrangement. /*
11180
11181	tree
11182	vect_get_loop_mask (loop_vec_info loop_vinfo,
11183	gimple_stmt_iterator gsi, vec_loop_masks masks,
11184	unsigned int nvectors, tree vectype, unsigned int index)
11185	{
11186	if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
11187	== vect_partial_vectors_while_ult)
11188	{
11189	rgroup_controls *rgm = &(masks->rgc_vec)[nvectors - `1`];
11190	tree mask_type = rgm->type;
11191
11192	/ Populate the rgroup's mask array, if this is the first time we've*
11193	used it. /*
11194	if (rgm->controls.is_empty ())
11195	{
11196	rgm->controls.safe_grow_cleared (len: nvectors, exact: true);
11197	for (unsigned int i = `0`; i < nvectors; ++i)
11198	{
11199	tree mask = make_temp_ssa_name (type: mask_type, NULL, name: "loop_mask");
11200	/ Provide a dummy definition until the real one is available. /
11201	SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
11202	rgm->controls [i] = mask;
11203	}
11204	}
11205
11206	tree mask = rgm->controls [index];
11207	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_type),
11208	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
11209	{
11210	/ A loop mask for data type X can be reused for data type Y*
11211	if X has N times more elements than Y and if Y's elements
11212	are N times bigger than X's. In this case each sequence
11213	of N elements in the loop mask will be all-zero or all-one.
11214	We can then view-convert the mask so that each sequence of
11215	N elements is replaced by a single element. /*
11216	gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
11217	TYPE_VECTOR_SUBPARTS (vectype)));
11218	gimple_seq seq = NULL;
11219	mask_type = truth_type_for (vectype);
11220	mask = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR, type: mask_type, ops: mask);
11221	if (seq)
11222	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
11223	}
11224	return mask;
11225	}
11226	else if (LOOP_VINFO_PARTIAL_VECTORS_STYLE (loop_vinfo)
11227	== vect_partial_vectors_avx512)
11228	{
11229	/ The number of scalars per iteration and the number of vectors are*
11230	both compile-time constants. /*
11231	unsigned int nscalars_per_iter
11232	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
11233	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
11234
11235	rgroup_controls *rgm = &masks->rgc_vec [nscalars_per_iter - `1`];
11236
11237	/ The stored nV is dependent on the mask type produced. /
11238	gcc_assert (exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
11239	TYPE_VECTOR_SUBPARTS (rgm->type)).to_constant ()
11240	== rgm->factor);
11241	nvectors = rgm->factor;
11242
11243	/ Populate the rgroup's mask array, if this is the first time we've*
11244	used it. /*
11245	if (rgm->controls.is_empty ())
11246	{
11247	rgm->controls.safe_grow_cleared (len: nvectors, exact: true);
11248	for (unsigned int i = `0`; i < nvectors; ++i)
11249	{
11250	tree mask = make_temp_ssa_name (type: rgm->type, NULL, name: "loop_mask");
11251	/ Provide a dummy definition until the real one is available. /
11252	SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
11253	rgm->controls [i] = mask;
11254	}
11255	}
11256	if (known_eq (TYPE_VECTOR_SUBPARTS (rgm->type),
11257	TYPE_VECTOR_SUBPARTS (vectype)))
11258	return rgm->controls [index];
11259
11260	/ Split the vector if needed. Since we are dealing with integer mode*
11261	masks with AVX512 we can operate on the integer representation
11262	performing the whole vector shifting. /*
11263	unsigned HOST_WIDE_INT factor;
11264	bool ok = constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: rgm->type),
11265	b: TYPE_VECTOR_SUBPARTS (node: vectype), multiple: &factor);
11266	gcc_assert (ok);
11267	gcc_assert (GET_MODE_CLASS (TYPE_MODE (rgm->type)) == MODE_INT);
11268	tree mask_type = truth_type_for (vectype);
11269	gcc_assert (GET_MODE_CLASS (TYPE_MODE (mask_type)) == MODE_INT);
11270	unsigned vi = index / factor;
11271	unsigned vpart = index % factor;
11272	tree vec = rgm->controls [vi];
11273	gimple_seq seq = NULL;
11274	vec = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR,
11275	type: lang_hooks.types.type_for_mode
11276	(TYPE_MODE (rgm->type), `1`), ops: vec);
11277	/ For integer mode masks simply shift the right bits into position. /
11278	if (vpart != `0`)
11279	vec = gimple_build (seq: &seq, code: RSHIFT_EXPR, TREE_TYPE (vec), ops: vec,
11280	ops: build_int_cst (integer_type_node,
11281	(TYPE_VECTOR_SUBPARTS (node: vectype)
11282	* vpart)));
11283	vec = gimple_convert (seq: &seq, type: lang_hooks.types.type_for_mode
11284	(TYPE_MODE (mask_type), `1`), op: vec);
11285	vec = gimple_build (seq: &seq, code: VIEW_CONVERT_EXPR, type: mask_type, ops: vec);
11286	if (seq)
11287	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
11288	return vec;
11289	}
11290	else
11291	gcc_unreachable ();
11292	}
11293
11294	/ Record that LOOP_VINFO would need LENS to contain a sequence of NVECTORS*
11295	lengths for controlling an operation on VECTYPE. The operation splits
11296	each element of VECTYPE into FACTOR separate subelements, measuring the
11297	length as a number of these subelements. /*
11298
11299	void
11300	vect_record_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens,
11301	unsigned int nvectors, tree vectype, unsigned int factor)
11302	{
11303	gcc_assert (nvectors != `0`);
11304	if (lens->length () < nvectors)
11305	lens->safe_grow_cleared (len: nvectors, exact: true);
11306	rgroup_controls rgl = &(lens)[nvectors - `1`];
11307
11308	/ The number of scalars per iteration, scalar occupied bytes and*
11309	the number of vectors are both compile-time constants. /*
11310	unsigned int nscalars_per_iter
11311	= exact_div (a: nvectors * TYPE_VECTOR_SUBPARTS (node: vectype),
11312	LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
11313
11314	if (rgl->max_nscalars_per_iter < nscalars_per_iter)
11315	{
11316	/ For now, we only support cases in which all loads and stores fall back*
11317	to VnQI or none do. /*
11318	gcc_assert (!rgl->max_nscalars_per_iter
11319	\|\| (rgl->factor == `1` && factor == `1`)
11320	\|\| (rgl->max_nscalars_per_iter * rgl->factor
11321	== nscalars_per_iter * factor));
11322	rgl->max_nscalars_per_iter = nscalars_per_iter;
11323	rgl->type = vectype;
11324	rgl->factor = factor;
11325	}
11326	}
11327
11328	/ Given a complete set of lengths LENS, extract length number INDEX*
11329	for an rgroup that operates on NVECTORS vectors of type VECTYPE,
11330	where 0 <= INDEX < NVECTORS. Return a value that contains FACTOR
11331	multipled by the number of elements that should be processed.
11332	Insert any set-up statements before GSI. /*
11333
11334	tree
11335	vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
11336	vec_loop_lens lens, unsigned* int nvectors, tree vectype,
11337	unsigned int index, unsigned int factor)
11338	{
11339	rgroup_controls rgl = &(lens)[nvectors - `1`];
11340	bool use_bias_adjusted_len =
11341	LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) != `0`;
11342
11343	/ Populate the rgroup's len array, if this is the first time we've*
11344	used it. /*
11345	if (rgl->controls.is_empty ())
11346	{
11347	rgl->controls.safe_grow_cleared (len: nvectors, exact: true);
11348	for (unsigned int i = `0`; i < nvectors; ++i)
11349	{
11350	tree len_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
11351	gcc_assert (len_type != NULL_TREE);
11352
11353	tree len = make_temp_ssa_name (type: len_type, NULL, name: "loop_len");
11354
11355	/ Provide a dummy definition until the real one is available. /
11356	SSA_NAME_DEF_STMT (len) = gimple_build_nop ();
11357	rgl->controls [i] = len;
11358
11359	if (use_bias_adjusted_len)
11360	{
11361	gcc_assert (i == `0`);
11362	tree adjusted_len =
11363	make_temp_ssa_name (type: len_type, NULL, name: "adjusted_loop_len");
11364	SSA_NAME_DEF_STMT (adjusted_len) = gimple_build_nop ();
11365	rgl->bias_adjusted_ctrl = adjusted_len;
11366	}
11367	}
11368	}
11369
11370	if (use_bias_adjusted_len)
11371	return rgl->bias_adjusted_ctrl;
11372
11373	tree loop_len = rgl->controls [index];
11374	if (rgl->factor == `1` && factor == `1`)
11375	{
11376	poly_int64 nunits1 = TYPE_VECTOR_SUBPARTS (node: rgl->type);
11377	poly_int64 nunits2 = TYPE_VECTOR_SUBPARTS (node: vectype);
11378	if (maybe_ne (a: nunits1, b: nunits2))
11379	{
11380	/ A loop len for data type X can be reused for data type Y*
11381	if X has N times more elements than Y and if Y's elements
11382	are N times bigger than X's. /*
11383	gcc_assert (multiple_p (nunits1, nunits2));
11384	factor = exact_div (a: nunits1, b: nunits2).to_constant ();
11385	tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
11386	gimple_seq seq = NULL;
11387	loop_len = gimple_build (seq: &seq, code: RDIV_EXPR, type: iv_type, ops: loop_len,
11388	ops: build_int_cst (iv_type, factor));
11389	if (seq)
11390	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
11391	}
11392	}
11393	return loop_len;
11394	}
11395
11396	/ Scale profiling counters by estimation for LOOP which is vectorized*
11397	by factor VF.
11398	If FLAT is true, the loop we started with had unrealistically flat
11399	profile. /*
11400
11401	static void
11402	scale_profile_for_vect_loop (class loop loop, edge exit_e, unsigned* vf, bool flat)
11403	{
11404	/ For flat profiles do not scale down proportionally by VF and only*
11405	cap by known iteration count bounds. /*
11406	if (flat)
11407	{
11408	if (dump_file && (dump_flags & TDF_DETAILS))
11409	fprintf (stream: dump_file,
11410	format: "Vectorized loop profile seems flat; not scaling iteration "
11411	"count down by the vectorization factor %i\n", vf);
11412	scale_loop_profile (loop, profile_probability::always (),
11413	get_likely_max_loop_iterations_int (loop));
11414	return;
11415	}
11416	/ Loop body executes VF fewer times and exit increases VF times. /
11417	profile_count entry_count = loop_preheader_edge (loop)->count ();
11418
11419	/ If we have unreliable loop profile avoid dropping entry*
11420	count bellow header count. This can happen since loops
11421	has unrealistically low trip counts. /*
11422	while (vf > `1`
11423	&& loop->header->count > entry_count
11424	&& loop->header->count < entry_count * vf)
11425	{
11426	if (dump_file && (dump_flags & TDF_DETAILS))
11427	fprintf (stream: dump_file,
11428	format: "Vectorization factor %i seems too large for profile "
11429	"prevoiusly believed to be consistent; reducing.\n", vf);
11430	vf /= `2`;
11431	}
11432
11433	if (entry_count.nonzero_p ())
11434	set_edge_probability_and_rescale_others
11435	(exit_e,
11436	entry_count.probability_in (overall: loop->header->count / vf));
11437	/ Avoid producing very large exit probability when we do not have*
11438	sensible profile. /*
11439	else if (exit_e->probability < profile_probability::always () / (vf * `2`))
11440	set_edge_probability_and_rescale_others (exit_e, exit_e->probability * vf);
11441	loop->latch->count = single_pred_edge (bb: loop->latch)->count ();
11442
11443	scale_loop_profile (loop, profile_probability::always () / vf,
11444	get_likely_max_loop_iterations_int (loop));
11445	}
11446
11447	/ For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI*
11448	latch edge values originally defined by it. /*
11449
11450	static void
11451	maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
11452	stmt_vec_info def_stmt_info)
11453	{
11454	tree def = gimple_get_lhs (vect_orig_stmt (stmt_info: def_stmt_info)->stmt);
11455	if (!def \|\| TREE_CODE (def) != SSA_NAME)
11456	return;
11457	stmt_vec_info phi_info;
11458	imm_use_iterator iter;
11459	use_operand_p use_p;
11460	FOR_EACH_IMM_USE_FAST (use_p, iter, def)
11461	{
11462	gphi phi = dyn_cast <gphi > (USE_STMT (use_p));
11463	if (!phi)
11464	continue;
11465	if (!(gimple_bb (g: phi)->loop_father->header == gimple_bb (g: phi)
11466	&& (phi_info = loop_vinfo->lookup_stmt (phi))
11467	&& STMT_VINFO_RELEVANT_P (phi_info)))
11468	continue;
11469	loop_p loop = gimple_bb (g: phi)->loop_father;
11470	edge e = loop_latch_edge (loop);
11471	if (PHI_ARG_DEF_FROM_EDGE (phi, e) != def)
11472	continue;
11473
11474	if (VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
11475	&& STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
11476	&& STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
11477	{
11478	vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
11479	vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
11480	gcc_assert (phi_defs.length () == latch_defs.length ());
11481	for (unsigned i = `0`; i < phi_defs.length (); ++i)
11482	add_phi_arg (as_a <gphi *> (p: phi_defs [i]),
11483	gimple_get_lhs (latch_defs [i]), e,
11484	gimple_phi_arg_location (phi, i: e->dest_idx));
11485	}
11486	else if (STMT_VINFO_DEF_TYPE (phi_info) == vect_first_order_recurrence)
11487	{
11488	/ For first order recurrences we have to update both uses of*
11489	the latch definition, the one in the PHI node and the one
11490	in the generated VEC_PERM_EXPR. /*
11491	vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
11492	vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
11493	gcc_assert (phi_defs.length () == latch_defs.length ());
11494	tree phidef = gimple_assign_rhs1 (gs: phi_defs [`0`]);
11495	gphi vphi = as_a <gphi > (SSA_NAME_DEF_STMT (phidef));
11496	for (unsigned i = `0`; i < phi_defs.length (); ++i)
11497	{
11498	gassign perm = as_a <gassign > (p: phi_defs [i]);
11499	if (i > `0`)
11500	gimple_assign_set_rhs1 (gs: perm, rhs: gimple_get_lhs (latch_defs [i-`1`]));
11501	gimple_assign_set_rhs2 (gs: perm, rhs: gimple_get_lhs (latch_defs [i]));
11502	update_stmt (s: perm);
11503	}
11504	add_phi_arg (vphi, gimple_get_lhs (latch_defs.last ()), e,
11505	gimple_phi_arg_location (phi, i: e->dest_idx));
11506	}
11507	}
11508	}
11509
11510	/ Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.*
11511	When vectorizing STMT_INFO as a store, set SEEN_STORE to its*
11512	stmt_vec_info. /*
11513
11514	static bool
11515	vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
11516	gimple_stmt_iterator gsi, stmt_vec_info seen_store)
11517	{
11518	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
11519	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
11520
11521	if (dump_enabled_p ())
11522	dump_printf_loc (MSG_NOTE, vect_location,
11523	"------>vectorizing statement: %G", stmt_info->stmt);
11524
11525	if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
11526	vect_loop_kill_debug_uses (loop, stmt_info);
11527
11528	if (!STMT_VINFO_RELEVANT_P (stmt_info)
11529	&& !STMT_VINFO_LIVE_P (stmt_info))
11530	{
11531	if (is_gimple_call (gs: stmt_info->stmt)
11532	&& gimple_call_internal_p (gs: stmt_info->stmt, fn: IFN_MASK_CALL))
11533	{
11534	gcc_assert (!gimple_call_lhs (stmt_info->stmt));
11535	*seen_store = stmt_info;
11536	return false;
11537	}
11538	return false;
11539	}
11540
11541	if (STMT_VINFO_VECTYPE (stmt_info))
11542	{
11543	poly_uint64 nunits
11544	= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
11545	if (!STMT_SLP_TYPE (stmt_info)
11546	&& maybe_ne (a: nunits, b: vf)
11547	&& dump_enabled_p ())
11548	/ For SLP VF is set according to unrolling factor, and not*
11549	to vector size, hence for SLP this print is not valid. /*
11550	dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
11551	}
11552
11553	/ Pure SLP statements have already been vectorized. We still need*
11554	to apply loop vectorization to hybrid SLP statements. /*
11555	if (PURE_SLP_STMT (stmt_info))
11556	return false;
11557
11558	if (dump_enabled_p ())
11559	dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
11560
11561	if (vect_transform_stmt (loop_vinfo, stmt_info, gsi, NULL, NULL))
11562	*seen_store = stmt_info;
11563
11564	return true;
11565	}
11566
11567	/ Helper function to pass to simplify_replace_tree to enable replacing tree's*
11568	in the hash_map with its corresponding values. /*
11569
11570	static tree
11571	find_in_mapping (tree t, void *context)
11572	{
11573	hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context;
11574
11575	tree *value = mapping->get (k: t);
11576	return value ? *value : t;
11577	}
11578
11579	/ Update EPILOGUE's loop_vec_info. EPILOGUE was constructed as a copy of the*
11580	original loop that has now been vectorized.
11581
11582	The inits of the data_references need to be advanced with the number of
11583	iterations of the main loop. This has been computed in vect_do_peeling and
11584	is stored in parameter ADVANCE. We first restore the data_references
11585	initial offset with the values recored in ORIG_DRS_INIT.
11586
11587	Since the loop_vec_info of this EPILOGUE was constructed for the original
11588	loop, its stmt_vec_infos all point to the original statements. These need
11589	to be updated to point to their corresponding copies as well as the SSA_NAMES
11590	in their PATTERN_DEF_SEQs and RELATED_STMTs.
11591
11592	The data_reference's connections also need to be updated. Their
11593	corresponding dr_vec_info need to be reconnected to the EPILOGUE's
11594	stmt_vec_infos, their statements need to point to their corresponding copy,
11595	if they are gather loads or scatter stores then their reference needs to be
11596	updated to point to its corresponding copy. /*
11597
11598	static void
11599	update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
11600	{
11601	loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (loop: epilogue);
11602	auto_vec<gimple *> stmt_worklist;
11603	hash_map<tree,tree> mapping;
11604	gimple orig_stmt, new_stmt;
11605	gimple_stmt_iterator epilogue_gsi;
11606	gphi_iterator epilogue_phi_gsi;
11607	stmt_vec_info stmt_vinfo = NULL, related_vinfo;
11608	basic_block *epilogue_bbs = get_loop_body (epilogue);
11609	unsigned i;
11610
11611	free (LOOP_VINFO_BBS (epilogue_vinfo));
11612	LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
11613
11614	/ Advance data_reference's with the number of iterations of the previous*
11615	loop and its prologue. /*
11616	vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
11617
11618
11619	/ The EPILOGUE loop is a copy of the original loop so they share the same*
11620	gimple UIDs. In this loop we update the loop_vec_info of the EPILOGUE to
11621	point to the copied statements. We also create a mapping of all LHS' in
11622	the original loop and all the LHS' in the EPILOGUE and create worklists to
11623	update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs. /*
11624	for (unsigned i = `0`; i < epilogue->num_nodes; ++i)
11625	{
11626	for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
11627	!gsi_end_p (i: epilogue_phi_gsi); gsi_next (i: &epilogue_phi_gsi))
11628	{
11629	new_stmt = epilogue_phi_gsi.phi ();
11630
11631	gcc_assert (gimple_uid (new_stmt) > `0`);
11632	stmt_vinfo
11633	= epilogue_vinfo->stmt_vec_infos [gimple_uid (g: new_stmt) - `1`];
11634
11635	orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
11636	STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
11637
11638	mapping.put (k: gimple_phi_result (gs: orig_stmt),
11639	v: gimple_phi_result (gs: new_stmt));
11640	/ PHI nodes can not have patterns or related statements. /
11641	gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
11642	&& STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
11643	}
11644
11645	for (epilogue_gsi = gsi_start_bb (bb: epilogue_bbs[i]);
11646	!gsi_end_p (i: epilogue_gsi); gsi_next (i: &epilogue_gsi))
11647	{
11648	new_stmt = gsi_stmt (i: epilogue_gsi);
11649	if (is_gimple_debug (gs: new_stmt))
11650	continue;
11651
11652	gcc_assert (gimple_uid (new_stmt) > `0`);
11653	stmt_vinfo
11654	= epilogue_vinfo->stmt_vec_infos [gimple_uid (g: new_stmt) - `1`];
11655
11656	orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
11657	STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
11658
11659	if (tree old_lhs = gimple_get_lhs (orig_stmt))
11660	mapping.put (k: old_lhs, v: gimple_get_lhs (new_stmt));
11661
11662	if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
11663	{
11664	gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
11665	for (gimple_stmt_iterator gsi = gsi_start (seq);
11666	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
11667	stmt_worklist.safe_push (obj: gsi_stmt (i: gsi));
11668	}
11669
11670	related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
11671	if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
11672	{
11673	gimple *stmt = STMT_VINFO_STMT (related_vinfo);
11674	stmt_worklist.safe_push (obj: stmt);
11675	/ Set BB such that the assert in*
11676	'get_initial_def_for_reduction' is able to determine that
11677	the BB of the related stmt is inside this loop. /*
11678	gimple_set_bb (stmt,
11679	gimple_bb (g: new_stmt));
11680	related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo);
11681	gcc_assert (related_vinfo == NULL
11682	\|\| related_vinfo == stmt_vinfo);
11683	}
11684	}
11685	}
11686
11687	/ The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed*
11688	using the original main loop and thus need to be updated to refer to the
11689	cloned variables used in the epilogue. /*
11690	for (unsigned i = `0`; i < stmt_worklist.length (); ++i)
11691	{
11692	gimple *stmt = stmt_worklist [i];
11693	tree *new_op;
11694
11695	for (unsigned j = `1`; j < gimple_num_ops (gs: stmt); ++j)
11696	{
11697	tree op = gimple_op (gs: stmt, i: j);
11698	if ((new_op = mapping.get(k: op)))
11699	gimple_set_op (gs: stmt, i: j, op: *new_op);
11700	else
11701	{
11702	/ PR92429: The last argument of simplify_replace_tree disables*
11703	folding when replacing arguments. This is required as
11704	otherwise you might end up with different statements than the
11705	ones analyzed in vect_loop_analyze, leading to different
11706	vectorization. /*
11707	op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
11708	&find_in_mapping, &mapping, do_fold: false);
11709	gimple_set_op (gs: stmt, i: j, op);
11710	}
11711	}
11712	}
11713
11714	struct data_reference *dr;
11715	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (epilogue_vinfo);
11716	FOR_EACH_VEC_ELT (datarefs, i, dr)
11717	{
11718	orig_stmt = DR_STMT (dr);
11719	gcc_assert (gimple_uid (orig_stmt) > `0`);
11720	stmt_vinfo = epilogue_vinfo->stmt_vec_infos [gimple_uid (g: orig_stmt) - `1`];
11721	/ Data references for gather loads and scatter stores do not use the*
11722	updated offset we set using ADVANCE. Instead we have to make sure the
11723	reference in the data references point to the corresponding copy of
11724	the original in the epilogue. Make sure to update both
11725	gather/scatters recognized by dataref analysis and also other
11726	refs that get_load_store_type classified as VMAT_GATHER_SCATTER. /*
11727	auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
11728	if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER
11729	\|\| STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo))
11730	{
11731	DR_REF (dr)
11732	= simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
11733	&find_in_mapping, &mapping);
11734	DR_BASE_ADDRESS (dr)
11735	= simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
11736	&find_in_mapping, &mapping);
11737	}
11738	DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
11739	stmt_vinfo->dr_aux.stmt = stmt_vinfo;
11740	}
11741
11742	epilogue_vinfo->shared->datarefs_copy.release ();
11743	epilogue_vinfo->shared->save_datarefs ();
11744	}
11745
11746	/ When vectorizing early break statements instructions that happen before*
11747	the early break in the current BB need to be moved to after the early
11748	break. This function deals with that and assumes that any validity
11749	checks has already been performed.
11750
11751	While moving the instructions if it encounters a VUSE or VDEF it then
11752	corrects the VUSES as it moves the statements along. GDEST is the location
11753	in which to insert the new statements. /*
11754
11755	static void
11756	move_early_exit_stmts (loop_vec_info loop_vinfo)
11757	{
11758	DUMP_VECT_SCOPE ("move_early_exit_stmts");
11759
11760	if (LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).is_empty ())
11761	return;
11762
11763	/ Move all stmts that need moving. /
11764	basic_block dest_bb = LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo);
11765	gimple_stmt_iterator dest_gsi = gsi_after_labels (bb: dest_bb);
11766
11767	tree last_seen_vuse = NULL_TREE;
11768	for (gimple *stmt : LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo))
11769	{
11770	/ We have to update crossed degenerate virtual PHIs. Simply*
11771	elide them. /*
11772	if (gphi vphi = dyn_cast <gphi > (p: stmt))
11773	{
11774	tree vdef = gimple_phi_result (gs: vphi);
11775	tree vuse = gimple_phi_arg_def (gs: vphi, index: `0`);
11776	imm_use_iterator iter;
11777	use_operand_p use_p;
11778	gimple *use_stmt;
11779	FOR_EACH_IMM_USE_STMT (use_stmt, iter, vdef)
11780	{
11781	FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
11782	SET_USE (use_p, vuse);
11783	}
11784	auto gsi = gsi_for_stmt (stmt);
11785	remove_phi_node (&gsi, true);
11786	last_seen_vuse = vuse;
11787	continue;
11788	}
11789
11790	/ Check to see if statement is still required for vect or has been*
11791	elided. /*
11792	auto stmt_info = loop_vinfo->lookup_stmt (stmt);
11793	if (!stmt_info)
11794	continue;
11795
11796	if (dump_enabled_p ())
11797	dump_printf_loc (MSG_NOTE, vect_location, "moving stmt %G", stmt);
11798
11799	gimple_stmt_iterator stmt_gsi = gsi_for_stmt (stmt);
11800	gsi_move_before (&stmt_gsi, &dest_gsi, GSI_NEW_STMT);
11801	last_seen_vuse = gimple_vuse (g: stmt);
11802	}
11803
11804	/ Update all the stmts with their new reaching VUSES. /
11805	for (auto p : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
11806	{
11807	if (dump_enabled_p ())
11808	dump_printf_loc (MSG_NOTE, vect_location,
11809	"updating vuse to %T for load %G",
11810	last_seen_vuse, p);
11811	gimple_set_vuse (g: p, vuse: last_seen_vuse);
11812	update_stmt (s: p);
11813	}
11814
11815	/ And update the LC PHIs on exits. /
11816	for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo)))
11817	if (!dominated_by_p (CDI_DOMINATORS, e->src, dest_bb))
11818	if (gphi *phi = get_virtual_phi (e->dest))
11819	SET_PHI_ARG_DEF_ON_EDGE (phi, e, last_seen_vuse);
11820	}
11821
11822	/ Function vect_transform_loop.*
11823
11824	The analysis phase has determined that the loop is vectorizable.
11825	Vectorize the loop - created vectorized stmts to replace the scalar
11826	stmts in the loop, and update the loop exit condition.
11827	Returns scalar epilogue loop if any. /*
11828
11829	class loop *
11830	vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
11831	{
11832	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
11833	class loop *epilogue = NULL;
11834	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
11835	int nbbs = loop->num_nodes;
11836	int i;
11837	tree niters_vector = NULL_TREE;
11838	tree step_vector = NULL_TREE;
11839	tree niters_vector_mult_vf = NULL_TREE;
11840	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
11841	unsigned int lowest_vf = constant_lower_bound (a: vf);
11842	gimple *stmt;
11843	bool check_profitability = false;
11844	unsigned int th;
11845	bool flat = maybe_flat_loop_profile (loop);
11846
11847	DUMP_VECT_SCOPE ("vec_transform_loop");
11848
11849	loop_vinfo->shared->check_datarefs ();
11850
11851	/ Use the more conservative vectorization threshold. If the number*
11852	of iterations is constant assume the cost check has been performed
11853	by our caller. If the threshold makes all loops profitable that
11854	run at least the (estimated) vectorization factor number of times
11855	checking is pointless, too. /*
11856	th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
11857	if (vect_apply_runtime_profitability_check_p (loop_vinfo))
11858	{
11859	if (dump_enabled_p ())
11860	dump_printf_loc (MSG_NOTE, vect_location,
11861	"Profitability threshold is %d loop iterations.\n",
11862	th);
11863	check_profitability = true;
11864	}
11865
11866	/ Make sure there exists a single-predecessor exit bb. Do this before*
11867	versioning. /*
11868	edge e = LOOP_VINFO_IV_EXIT (loop_vinfo);
11869	if (! single_pred_p (bb: e->dest) && !LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
11870	{
11871	split_loop_exit_edge (e, true);
11872	if (dump_enabled_p ())
11873	dump_printf (MSG_NOTE, "split exit edge\n");
11874	}
11875
11876	/ Version the loop first, if required, so the profitability check*
11877	comes first. /*
11878
11879	if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
11880	{
11881	class loop *sloop
11882	= vect_loop_versioning (loop_vinfo, loop_vectorized_call);
11883	sloop->force_vectorize = false;
11884	check_profitability = false;
11885	}
11886
11887	/ Make sure there exists a single-predecessor exit bb also on the*
11888	scalar loop copy. Do this after versioning but before peeling
11889	so CFG structure is fine for both scalar and if-converted loop
11890	to make slpeel_duplicate_current_defs_from_edges face matched
11891	loop closed PHI nodes on the exit. /*
11892	if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
11893	{
11894	e = LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo);
11895	if (! single_pred_p (bb: e->dest))
11896	{
11897	split_loop_exit_edge (e, true);
11898	if (dump_enabled_p ())
11899	dump_printf (MSG_NOTE, "split exit edge of scalar loop\n");
11900	}
11901	}
11902
11903	tree niters = vect_build_loop_niters (loop_vinfo);
11904	LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
11905	tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
11906	bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
11907	tree advance;
11908	drs_init_vec orig_drs_init;
11909
11910	epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
11911	&step_vector, &niters_vector_mult_vf, th,
11912	check_profitability, niters_no_overflow,
11913	&advance);
11914	if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo)
11915	&& LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo).initialized_p ())
11916	{
11917	/ Ifcvt duplicates loop preheader, loop body and produces an basic*
11918	block after loop exit. We need to scale all that. /*
11919	basic_block preheader
11920	= loop_preheader_edge (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))->src;
11921	preheader->count
11922	= preheader->count.apply_probability
11923	(LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
11924	scale_loop_frequencies (LOOP_VINFO_SCALAR_LOOP (loop_vinfo),
11925	LOOP_VINFO_SCALAR_LOOP_SCALING (loop_vinfo));
11926	LOOP_VINFO_SCALAR_IV_EXIT (loop_vinfo)->dest->count = preheader->count;
11927	}
11928
11929	if (niters_vector == NULL_TREE)
11930	{
11931	if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
11932	&& !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
11933	&& known_eq (lowest_vf, vf))
11934	{
11935	niters_vector
11936	= build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
11937	LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
11938	step_vector = build_one_cst (TREE_TYPE (niters));
11939	}
11940	else if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
11941	vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
11942	&step_vector, niters_no_overflow);
11943	else
11944	/ vect_do_peeling subtracted the number of peeled prologue*
11945	iterations from LOOP_VINFO_NITERS. /*
11946	vect_gen_vector_loop_niters (loop_vinfo, LOOP_VINFO_NITERS (loop_vinfo),
11947	&niters_vector, &step_vector,
11948	niters_no_overflow);
11949	}
11950
11951	/ 1) Make sure the loop header has exactly two entries*
11952	2) Make sure we have a preheader basic block. /*
11953
11954	gcc_assert (EDGE_COUNT (loop->header->preds) == `2`);
11955
11956	split_edge (loop_preheader_edge (loop));
11957
11958	if (vect_use_loop_mask_for_alignment_p (loop_vinfo))
11959	/ This will deal with any possible peeling. /
11960	vect_prepare_for_masked_peels (loop_vinfo);
11961
11962	/ Handle any code motion that we need to for early-break vectorization after*
11963	we've done peeling but just before we start vectorizing. /*
11964	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
11965	move_early_exit_stmts (loop_vinfo);
11966
11967	/ Schedule the SLP instances first, then handle loop vectorization*
11968	below. /*
11969	if (!loop_vinfo->slp_instances.is_empty ())
11970	{
11971	DUMP_VECT_SCOPE ("scheduling SLP instances");
11972	vect_schedule_slp (loop_vinfo, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
11973	}
11974
11975	/ FORNOW: the vectorizer supports only loops which body consist*
11976	of one basic block (header + empty latch). When the vectorizer will
11977	support more involved loop forms, the order by which the BBs are
11978	traversed need to be reconsidered. /*
11979
11980	for (i = `0`; i < nbbs; i++)
11981	{
11982	basic_block bb = bbs[i];
11983	stmt_vec_info stmt_info;
11984
11985	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
11986	gsi_next (i: &si))
11987	{
11988	gphi *phi = si.phi ();
11989	if (dump_enabled_p ())
11990	dump_printf_loc (MSG_NOTE, vect_location,
11991	"------>vectorizing phi: %G", (gimple *) phi);
11992	stmt_info = loop_vinfo->lookup_stmt (phi);
11993	if (!stmt_info)
11994	continue;
11995
11996	if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
11997	vect_loop_kill_debug_uses (loop, stmt_info);
11998
11999	if (!STMT_VINFO_RELEVANT_P (stmt_info)
12000	&& !STMT_VINFO_LIVE_P (stmt_info))
12001	continue;
12002
12003	if (STMT_VINFO_VECTYPE (stmt_info)
12004	&& (maybe_ne
12005	(a: TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), b: vf))
12006	&& dump_enabled_p ())
12007	dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
12008
12009	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
12010	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
12011	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
12012	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
12013	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence
12014	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
12015	&& ! PURE_SLP_STMT (stmt_info))
12016	{
12017	if (dump_enabled_p ())
12018	dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
12019	vect_transform_stmt (loop_vinfo, stmt_info, NULL, NULL, NULL);
12020	}
12021	}
12022
12023	for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (i: si);
12024	gsi_next (i: &si))
12025	{
12026	gphi *phi = si.phi ();
12027	stmt_info = loop_vinfo->lookup_stmt (phi);
12028	if (!stmt_info)
12029	continue;
12030
12031	if (!STMT_VINFO_RELEVANT_P (stmt_info)
12032	&& !STMT_VINFO_LIVE_P (stmt_info))
12033	continue;
12034
12035	if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
12036	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
12037	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
12038	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
12039	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
12040	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence)
12041	&& ! PURE_SLP_STMT (stmt_info))
12042	maybe_set_vectorized_backedge_value (loop_vinfo, def_stmt_info: stmt_info);
12043	}
12044
12045	for (gimple_stmt_iterator si = gsi_start_bb (bb);
12046	!gsi_end_p (i: si);)
12047	{
12048	stmt = gsi_stmt (i: si);
12049	/ During vectorization remove existing clobber stmts. /
12050	if (gimple_clobber_p (s: stmt))
12051	{
12052	unlink_stmt_vdef (stmt);
12053	gsi_remove (&si, true);
12054	release_defs (stmt);
12055	}
12056	else
12057	{
12058	/ Ignore vector stmts created in the outer loop. /
12059	stmt_info = loop_vinfo->lookup_stmt (stmt);
12060
12061	/ vector stmts created in the outer-loop during vectorization of*
12062	stmts in an inner-loop may not have a stmt_info, and do not
12063	need to be vectorized. /*
12064	stmt_vec_info seen_store = NULL;
12065	if (stmt_info)
12066	{
12067	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
12068	{
12069	gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
12070	for (gimple_stmt_iterator subsi = gsi_start (seq&: def_seq);
12071	!gsi_end_p (i: subsi); gsi_next (i: &subsi))
12072	{
12073	stmt_vec_info pat_stmt_info
12074	= loop_vinfo->lookup_stmt (gsi_stmt (i: subsi));
12075	vect_transform_loop_stmt (loop_vinfo, stmt_info: pat_stmt_info,
12076	gsi: &si, seen_store: &seen_store);
12077	}
12078	stmt_vec_info pat_stmt_info
12079	= STMT_VINFO_RELATED_STMT (stmt_info);
12080	if (vect_transform_loop_stmt (loop_vinfo, stmt_info: pat_stmt_info,
12081	gsi: &si, seen_store: &seen_store))
12082	maybe_set_vectorized_backedge_value (loop_vinfo,
12083	def_stmt_info: pat_stmt_info);
12084	}
12085	else
12086	{
12087	if (vect_transform_loop_stmt (loop_vinfo, stmt_info, gsi: &si,
12088	seen_store: &seen_store))
12089	maybe_set_vectorized_backedge_value (loop_vinfo,
12090	def_stmt_info: stmt_info);
12091	}
12092	}
12093	gsi_next (i: &si);
12094	if (seen_store)
12095	{
12096	if (STMT_VINFO_GROUPED_ACCESS (seen_store))
12097	/ Interleaving. If IS_STORE is TRUE, the*
12098	vectorization of the interleaving chain was
12099	completed - free all the stores in the chain. /*
12100	vect_remove_stores (loop_vinfo,
12101	DR_GROUP_FIRST_ELEMENT (seen_store));
12102	else
12103	/ Free the attached stmt_vec_info and remove the stmt. /
12104	loop_vinfo->remove_stmt (stmt_info);
12105	}
12106	}
12107	}
12108
12109	/ Stub out scalar statements that must not survive vectorization.*
12110	Doing this here helps with grouped statements, or statements that
12111	are involved in patterns. /*
12112	for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
12113	!gsi_end_p (i: gsi); gsi_next (i: &gsi))
12114	{
12115	gcall call = dyn_cast <gcall > (p: gsi_stmt (i: gsi));
12116	if (!call \|\| !gimple_call_internal_p (gs: call))
12117	continue;
12118	internal_fn ifn = gimple_call_internal_fn (gs: call);
12119	if (ifn == IFN_MASK_LOAD)
12120	{
12121	tree lhs = gimple_get_lhs (call);
12122	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
12123	{
12124	tree zero = build_zero_cst (TREE_TYPE (lhs));
12125	gimple *new_stmt = gimple_build_assign (lhs, zero);
12126	gsi_replace (&gsi, new_stmt, true);
12127	}
12128	}
12129	else if (conditional_internal_fn_code (ifn) != ERROR_MARK)
12130	{
12131	tree lhs = gimple_get_lhs (call);
12132	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
12133	{
12134	tree else_arg
12135	= gimple_call_arg (gs: call, index: gimple_call_num_args (gs: call) - `1`);
12136	gimple *new_stmt = gimple_build_assign (lhs, else_arg);
12137	gsi_replace (&gsi, new_stmt, true);
12138	}
12139	}
12140	}
12141	} / BBs in loop /
12142
12143	/ The vectorization factor is always > 1, so if we use an IV increment of 1.*
12144	a zero NITERS becomes a nonzero NITERS_VECTOR. /*
12145	if (integer_onep (step_vector))
12146	niters_no_overflow = true;
12147	vect_set_loop_condition (loop, LOOP_VINFO_IV_EXIT (loop_vinfo), loop_vinfo,
12148	niters_vector, step_vector, niters_vector_mult_vf,
12149	!niters_no_overflow);
12150
12151	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
12152
12153	/ True if the final iteration might not handle a full vector's*
12154	worth of scalar iterations. /*
12155	bool final_iter_may_be_partial
12156	= LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
12157	\|\| LOOP_VINFO_EARLY_BREAKS (loop_vinfo);
12158
12159	/ +1 to convert latch counts to loop iteration counts. /
12160	int bias_for_lowest = `1`;
12161
12162	/ When we are peeling for gaps then we take away one scalar iteration*
12163	from the vector loop. Thus we can adjust the upper bound by one
12164	scalar iteration. But only when we know the bound applies to the
12165	IV exit test which might not be true when we have multiple exits. /*
12166	if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
12167	bias_for_lowest -= LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? `1` : `0`;
12168
12169	int bias_for_assumed = bias_for_lowest;
12170	int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
12171	if (alignment_npeels && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
12172	{
12173	/ When the amount of peeling is known at compile time, the first*
12174	iteration will have exactly alignment_npeels active elements.
12175	In the worst case it will have at least one. /*
12176	int min_first_active = (alignment_npeels > `0` ? alignment_npeels : `1`);
12177	bias_for_lowest += lowest_vf - min_first_active;
12178	bias_for_assumed += assumed_vf - min_first_active;
12179	}
12180	/ In these calculations the "- 1" converts loop iteration counts*
12181	back to latch counts. /*
12182	if (loop->any_upper_bound)
12183	{
12184	loop_vec_info main_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
12185	loop->nb_iterations_upper_bound
12186	= (final_iter_may_be_partial
12187	? wi::udiv_ceil (x: loop->nb_iterations_upper_bound + bias_for_lowest,
12188	y: lowest_vf) - `1`
12189	: wi::udiv_floor (x: loop->nb_iterations_upper_bound + bias_for_lowest,
12190	y: lowest_vf) - `1`);
12191	if (main_vinfo
12192	/ Both peeling for alignment and peeling for gaps can end up*
12193	with the scalar epilogue running for more than VF-1 iterations. /*
12194	&& !main_vinfo->peeling_for_alignment
12195	&& !main_vinfo->peeling_for_gaps)
12196	{
12197	unsigned int bound;
12198	poly_uint64 main_iters
12199	= upper_bound (LOOP_VINFO_VECT_FACTOR (main_vinfo),
12200	LOOP_VINFO_COST_MODEL_THRESHOLD (main_vinfo));
12201	main_iters
12202	= upper_bound (a: main_iters,
12203	LOOP_VINFO_VERSIONING_THRESHOLD (main_vinfo));
12204	if (can_div_away_from_zero_p (a: main_iters,
12205	LOOP_VINFO_VECT_FACTOR (loop_vinfo),
12206	quotient: &bound))
12207	loop->nb_iterations_upper_bound
12208	= wi::umin (x: (bound_wide_int) (bound - `1`),
12209	y: loop->nb_iterations_upper_bound);
12210	}
12211	}
12212	if (loop->any_likely_upper_bound)
12213	loop->nb_iterations_likely_upper_bound
12214	= (final_iter_may_be_partial
12215	? wi::udiv_ceil (x: loop->nb_iterations_likely_upper_bound
12216	+ bias_for_lowest, y: lowest_vf) - `1`
12217	: wi::udiv_floor (x: loop->nb_iterations_likely_upper_bound
12218	+ bias_for_lowest, y: lowest_vf) - `1`);
12219	if (loop->any_estimate)
12220	loop->nb_iterations_estimate
12221	= (final_iter_may_be_partial
12222	? wi::udiv_ceil (x: loop->nb_iterations_estimate + bias_for_assumed,
12223	y: assumed_vf) - `1`
12224	: wi::udiv_floor (x: loop->nb_iterations_estimate + bias_for_assumed,
12225	y: assumed_vf) - `1`);
12226	scale_profile_for_vect_loop (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
12227	vf: assumed_vf, flat);
12228
12229	if (dump_enabled_p ())
12230	{
12231	if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
12232	{
12233	dump_printf_loc (MSG_NOTE, vect_location,
12234	"LOOP VECTORIZED\n");
12235	if (loop->inner)
12236	dump_printf_loc (MSG_NOTE, vect_location,
12237	"OUTER LOOP VECTORIZED\n");
12238	dump_printf (MSG_NOTE, "\n");
12239	}
12240	else
12241	dump_printf_loc (MSG_NOTE, vect_location,
12242	"LOOP EPILOGUE VECTORIZED (MODE=%s)\n",
12243	GET_MODE_NAME (loop_vinfo->vector_mode));
12244	}
12245
12246	/ Loops vectorized with a variable factor won't benefit from*
12247	unrolling/peeling. /*
12248	if (!vf.is_constant ())
12249	{
12250	loop->unroll = `1`;
12251	if (dump_enabled_p ())
12252	dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
12253	" variable-length vectorization factor\n");
12254	}
12255	/ Free SLP instances here because otherwise stmt reference counting*
12256	won't work. /*
12257	slp_instance instance;
12258	FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
12259	vect_free_slp_instance (instance);
12260	LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
12261	/ Clear-up safelen field since its value is invalid after vectorization*
12262	since vectorized loop can have loop-carried dependencies. /*
12263	loop->safelen = `0`;
12264
12265	if (epilogue)
12266	{
12267	update_epilogue_loop_vinfo (epilogue, advance);
12268
12269	epilogue->simduid = loop->simduid;
12270	epilogue->force_vectorize = loop->force_vectorize;
12271	epilogue->dont_vectorize = false;
12272	}
12273
12274	return epilogue;
12275	}
12276
12277	/ The code below is trying to perform simple optimization - revert*
12278	if-conversion for masked stores, i.e. if the mask of a store is zero
12279	do not perform it and all stored value producers also if possible.
12280	For example,
12281	for (i=0; i<n; i++)
12282	if (c[i])
12283	{
12284	p1[i] += 1;
12285	p2[i] = p3[i] +2;
12286	}
12287	this transformation will produce the following semi-hammock:
12288
12289	if (!mask__ifc__42.18_165 == { 0, 0, 0, 0, 0, 0, 0, 0 })
12290	{
12291	vect__11.19_170 = MASK_LOAD (vectp_p1.20_168, 0B, mask__ifc__42.18_165);
12292	vect__12.22_172 = vect__11.19_170 + vect_cst__171;
12293	MASK_STORE (vectp_p1.23_175, 0B, mask__ifc__42.18_165, vect__12.22_172);
12294	vect__18.25_182 = MASK_LOAD (vectp_p3.26_180, 0B, mask__ifc__42.18_165);
12295	vect__19.28_184 = vect__18.25_182 + vect_cst__183;
12296	MASK_STORE (vectp_p2.29_187, 0B, mask__ifc__42.18_165, vect__19.28_184);
12297	}
12298	*/
12299
12300	void
12301	optimize_mask_stores (class loop *loop)
12302	{
12303	basic_block *bbs = get_loop_body (loop);
12304	unsigned nbbs = loop->num_nodes;
12305	unsigned i;
12306	basic_block bb;
12307	class loop *bb_loop;
12308	gimple_stmt_iterator gsi;
12309	gimple *stmt;
12310	auto_vec<gimple *> worklist;
12311	auto_purge_vect_location sentinel;
12312
12313	vect_location = find_loop_location (loop);
12314	/ Pick up all masked stores in loop if any. /
12315	for (i = `0`; i < nbbs; i++)
12316	{
12317	bb = bbs[i];
12318	for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi);
12319	gsi_next (i: &gsi))
12320	{
12321	stmt = gsi_stmt (i: gsi);
12322	if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
12323	worklist.safe_push (obj: stmt);
12324	}
12325	}
12326
12327	free (ptr: bbs);
12328	if (worklist.is_empty ())
12329	return;
12330
12331	/ Loop has masked stores. /
12332	while (!worklist.is_empty ())
12333	{
12334	gimple last, last_store;
12335	edge e, efalse;
12336	tree mask;
12337	basic_block store_bb, join_bb;
12338	gimple_stmt_iterator gsi_to;
12339	tree vdef, new_vdef;
12340	gphi *phi;
12341	tree vectype;
12342	tree zero;
12343
12344	last = worklist.pop ();
12345	mask = gimple_call_arg (gs: last, index: `2`);
12346	bb = gimple_bb (g: last);
12347	/ Create then_bb and if-then structure in CFG, then_bb belongs to*
12348	the same loop as if_bb. It could be different to LOOP when two
12349	level loop-nest is vectorized and mask_store belongs to the inner
12350	one. /*
12351	e = split_block (bb, last);
12352	bb_loop = bb->loop_father;
12353	gcc_assert (loop == bb_loop \|\| flow_loop_nested_p (loop, bb_loop));
12354	join_bb = e->dest;
12355	store_bb = create_empty_bb (bb);
12356	add_bb_to_loop (store_bb, bb_loop);
12357	e->flags = EDGE_TRUE_VALUE;
12358	efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
12359	/ Put STORE_BB to likely part. /
12360	efalse->probability = profile_probability::likely ();
12361	e->probability = efalse->probability.invert ();
12362	store_bb->count = efalse->count ();
12363	make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
12364	if (dom_info_available_p (CDI_DOMINATORS))
12365	set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
12366	if (dump_enabled_p ())
12367	dump_printf_loc (MSG_NOTE, vect_location,
12368	"Create new block %d to sink mask stores.",
12369	store_bb->index);
12370	/ Create vector comparison with boolean result. /
12371	vectype = TREE_TYPE (mask);
12372	zero = build_zero_cst (vectype);
12373	stmt = gimple_build_cond (EQ_EXPR, mask, zero, NULL_TREE, NULL_TREE);
12374	gsi = gsi_last_bb (bb);
12375	gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
12376	/ Create new PHI node for vdef of the last masked store:*
12377	.MEM_2 = VDEF <.MEM_1>
12378	will be converted to
12379	.MEM.3 = VDEF <.MEM_1>
12380	and new PHI node will be created in join bb
12381	.MEM_2 = PHI <.MEM_1, .MEM_3>
12382	*/
12383	vdef = gimple_vdef (g: last);
12384	new_vdef = make_ssa_name (var: gimple_vop (cfun), stmt: last);
12385	gimple_set_vdef (g: last, vdef: new_vdef);
12386	phi = create_phi_node (vdef, join_bb);
12387	add_phi_arg (phi, new_vdef, EDGE_SUCC (store_bb, `0`), UNKNOWN_LOCATION);
12388
12389	/ Put all masked stores with the same mask to STORE_BB if possible. /
12390	while (true)
12391	{
12392	gimple_stmt_iterator gsi_from;
12393	gimple *stmt1 = NULL;
12394
12395	/ Move masked store to STORE_BB. /
12396	last_store = last;
12397	gsi = gsi_for_stmt (last);
12398	gsi_from = gsi;
12399	/ Shift GSI to the previous stmt for further traversal. /
12400	gsi_prev (i: &gsi);
12401	gsi_to = gsi_start_bb (bb: store_bb);
12402	gsi_move_before (&gsi_from, &gsi_to);
12403	/ Setup GSI_TO to the non-empty block start. /
12404	gsi_to = gsi_start_bb (bb: store_bb);
12405	if (dump_enabled_p ())
12406	dump_printf_loc (MSG_NOTE, vect_location,
12407	"Move stmt to created bb\n%G", last);
12408	/ Move all stored value producers if possible. /
12409	while (!gsi_end_p (i: gsi))
12410	{
12411	tree lhs;
12412	imm_use_iterator imm_iter;
12413	use_operand_p use_p;
12414	bool res;
12415
12416	/ Skip debug statements. /
12417	if (is_gimple_debug (gs: gsi_stmt (i: gsi)))
12418	{
12419	gsi_prev (i: &gsi);
12420	continue;
12421	}
12422	stmt1 = gsi_stmt (i: gsi);
12423	/ Do not consider statements writing to memory or having*
12424	volatile operand. /*
12425	if (gimple_vdef (g: stmt1)
12426	\|\| gimple_has_volatile_ops (stmt: stmt1))
12427	break;
12428	gsi_from = gsi;
12429	gsi_prev (i: &gsi);
12430	lhs = gimple_get_lhs (stmt1);
12431	if (!lhs)
12432	break;
12433
12434	/ LHS of vectorized stmt must be SSA_NAME. /
12435	if (TREE_CODE (lhs) != SSA_NAME)
12436	break;
12437
12438	if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
12439	{
12440	/ Remove dead scalar statement. /
12441	if (has_zero_uses (var: lhs))
12442	{
12443	gsi_remove (&gsi_from, true);
12444	continue;
12445	}
12446	}
12447
12448	/ Check that LHS does not have uses outside of STORE_BB. /
12449	res = true;
12450	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
12451	{
12452	gimple *use_stmt;
12453	use_stmt = USE_STMT (use_p);
12454	if (is_gimple_debug (gs: use_stmt))
12455	continue;
12456	if (gimple_bb (g: use_stmt) != store_bb)
12457	{
12458	res = false;
12459	break;
12460	}
12461	}
12462	if (!res)
12463	break;
12464
12465	if (gimple_vuse (g: stmt1)
12466	&& gimple_vuse (g: stmt1) != gimple_vuse (g: last_store))
12467	break;
12468
12469	/ Can move STMT1 to STORE_BB. /
12470	if (dump_enabled_p ())
12471	dump_printf_loc (MSG_NOTE, vect_location,
12472	"Move stmt to created bb\n%G", stmt1);
12473	gsi_move_before (&gsi_from, &gsi_to);
12474	/ Shift GSI_TO for further insertion. /
12475	gsi_prev (i: &gsi_to);
12476	}
12477	/ Put other masked stores with the same mask to STORE_BB. /
12478	if (worklist.is_empty ()
12479	\|\| gimple_call_arg (gs: worklist.last (), index: `2`) != mask
12480	\|\| worklist.last () != stmt1)
12481	break;
12482	last = worklist.pop ();
12483	}
12484	add_phi_arg (phi, gimple_vuse (g: last_store), e, UNKNOWN_LOCATION);
12485	}
12486	}
12487
12488	/ Decide whether it is possible to use a zero-based induction variable*
12489	when vectorizing LOOP_VINFO with partial vectors. If it is, return
12490	the value that the induction variable must be able to hold in order
12491	to ensure that the rgroups eventually have no active vector elements.
12492	Return -1 otherwise. /*
12493
12494	widest_int
12495	vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
12496	{
12497	tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
12498	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
12499	unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
12500
12501	/ Calculate the value that the induction variable must be able*
12502	to hit in order to ensure that we end the loop with an all-false mask.
12503	This involves adding the maximum number of inactive trailing scalar
12504	iterations. /*
12505	widest_int iv_limit = -`1`;
12506	if (max_loop_iterations (loop, &iv_limit))
12507	{
12508	if (niters_skip)
12509	{
12510	/ Add the maximum number of skipped iterations to the*
12511	maximum iteration count. /*
12512	if (TREE_CODE (niters_skip) == INTEGER_CST)
12513	iv_limit += wi::to_widest (t: niters_skip);
12514	else
12515	iv_limit += max_vf - `1`;
12516	}
12517	else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
12518	/ Make a conservatively-correct assumption. /
12519	iv_limit += max_vf - `1`;
12520
12521	/ IV_LIMIT is the maximum number of latch iterations, which is also*
12522	the maximum in-range IV value. Round this value down to the previous
12523	vector alignment boundary and then add an extra full iteration. /*
12524	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
12525	iv_limit = (iv_limit & -(int) known_alignment (a: vf)) + max_vf;
12526	}
12527	return iv_limit;
12528	}
12529
12530	/ For the given rgroup_controls RGC, check whether an induction variable*
12531	would ever hit a value that produces a set of all-false masks or zero
12532	lengths before wrapping around. Return true if it's possible to wrap
12533	around before hitting the desirable value, otherwise return false. /*
12534
12535	bool
12536	vect_rgroup_iv_might_wrap_p (loop_vec_info loop_vinfo, rgroup_controls *rgc)
12537	{
12538	widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
12539
12540	if (iv_limit == -`1`)
12541	return true;
12542
12543	tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
12544	unsigned int compare_precision = TYPE_PRECISION (compare_type);
12545	unsigned nitems = rgc->max_nscalars_per_iter * rgc->factor;
12546
12547	if (wi::min_precision (x: iv_limit * nitems, sgn: UNSIGNED) > compare_precision)
12548	return true;
12549
12550	return false;
12551	}
12552

source code of gcc/tree-vect-loop.cc