tree-vect-stmts.cc source code [gcc/tree-vect-stmts.cc]

1	/ Statement Analysis and Transformation for Vectorization*
2	Copyright (C) 2003-2024 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "target.h"
27	#include "rtl.h"
28	#include "tree.h"
29	#include "gimple.h"
30	#include "ssa.h"
31	#include "optabs-tree.h"
32	#include "insn-config.h"
33	#include "recog.h" /* FIXME: for insn_data */
34	#include "cgraph.h"
35	#include "dumpfile.h"
36	#include "alias.h"
37	#include "fold-const.h"
38	#include "stor-layout.h"
39	#include "tree-eh.h"
40	#include "gimplify.h"
41	#include "gimple-iterator.h"
42	#include "gimplify-me.h"
43	#include "tree-cfg.h"
44	#include "tree-ssa-loop-manip.h"
45	#include "cfgloop.h"
46	#include "explow.h"
47	#include "tree-ssa-loop.h"
48	#include "tree-scalar-evolution.h"
49	#include "tree-vectorizer.h"
50	#include "builtins.h"
51	#include "internal-fn.h"
52	#include "tree-vector-builder.h"
53	#include "vec-perm-indices.h"
54	#include "gimple-range.h"
55	#include "tree-ssa-loop-niter.h"
56	#include "gimple-fold.h"
57	#include "regs.h"
58	#include "attribs.h"
59	#include "optabs-libfuncs.h"
60
61	/ For lang_hooks.types.type_for_mode. /
62	#include "langhooks.h"
63
64	/ Return the vectorized type for the given statement. /
65
66	tree
67	stmt_vectype (class _stmt_vec_info *stmt_info)
68	{
69	return STMT_VINFO_VECTYPE (stmt_info);
70	}
71
72	/ Return TRUE iff the given statement is in an inner loop relative to*
73	the loop being vectorized. /*
74	bool
75	stmt_in_inner_loop_p (vec_info vinfo, class* _stmt_vec_info *stmt_info)
76	{
77	gimple *stmt = STMT_VINFO_STMT (stmt_info);
78	basic_block bb = gimple_bb (g: stmt);
79	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
80	class loop* loop;
81
82	if (!loop_vinfo)
83	return false;
84
85	loop = LOOP_VINFO_LOOP (loop_vinfo);
86
87	return (bb->loop_father == loop->inner);
88	}
89
90	/ Record the cost of a statement, either by directly informing the*
91	target model or by saving it in a vector for later processing.
92	Return a preliminary estimate of the statement's cost. /*
93
94	static unsigned
95	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
96	enum vect_cost_for_stmt kind,
97	stmt_vec_info stmt_info, slp_tree node,
98	tree vectype, int misalign,
99	enum vect_cost_model_location where)
100	{
101	if ((kind == vector_load \|\| kind == unaligned_load)
102	&& (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103	kind = vector_gather_load;
104	if ((kind == vector_store \|\| kind == unaligned_store)
105	&& (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106	kind = vector_scatter_store;
107
108	stmt_info_for_cost si
109	= { .count: count, .kind: kind, .where: where, .stmt_info: stmt_info, .node: node, .vectype: vectype, .misalign: misalign };
110	body_cost_vec->safe_push (obj: si);
111
112	return (unsigned)
113	(builtin_vectorization_cost (type_of_cost: kind, vectype, misalign) * count);
114	}
115
116	unsigned
117	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
118	enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119	tree vectype, int misalign,
120	enum vect_cost_model_location where)
121	{
122	return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123	vectype, misalign, where);
124	}
125
126	unsigned
127	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
128	enum vect_cost_for_stmt kind, slp_tree node,
129	tree vectype, int misalign,
130	enum vect_cost_model_location where)
131	{
132	return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133	vectype, misalign, where);
134	}
135
136	unsigned
137	record_stmt_cost (stmt_vector_for_cost body_cost_vec, int* count,
138	enum vect_cost_for_stmt kind,
139	enum vect_cost_model_location where)
140	{
141	gcc_assert (kind == cond_branch_taken \|\| kind == cond_branch_not_taken
142	\|\| kind == scalar_stmt);
143	return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144	NULL_TREE, misalign: `0`, where);
145	}
146
147	/ Return a variable of type ELEM_TYPE[NELEMS]. /
148
149	static tree
150	create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151	{
152	return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153	"vect_array");
154	}
155
156	/ ARRAY is an array of vectors created by create_vector_array.*
157	Return an SSA_NAME for the vector in index N. The reference
158	is part of the vectorization of STMT_INFO and the vector is associated
159	with scalar destination SCALAR_DEST. /*
160
161	static tree
162	read_vector_array (vec_info *vinfo,
163	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164	tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165	{
166	tree vect_type, vect, vect_name, array_ref;
167	gimple *new_stmt;
168
169	gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170	vect_type = TREE_TYPE (TREE_TYPE (array));
171	vect = vect_create_destination_var (scalar_dest, vect_type);
172	array_ref = build4 (ARRAY_REF, vect_type, array,
173	build_int_cst (size_type_node, n),
174	NULL_TREE, NULL_TREE);
175
176	new_stmt = gimple_build_assign (vect, array_ref);
177	vect_name = make_ssa_name (var: vect, stmt: new_stmt);
178	gimple_assign_set_lhs (gs: new_stmt, lhs: vect_name);
179	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180
181	return vect_name;
182	}
183
184	/ ARRAY is an array of vectors created by create_vector_array.*
185	Emit code to store SSA_NAME VECT in index N of the array.
186	The store is part of the vectorization of STMT_INFO. /*
187
188	static void
189	write_vector_array (vec_info *vinfo,
190	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191	tree vect, tree array, unsigned HOST_WIDE_INT n)
192	{
193	tree array_ref;
194	gimple *new_stmt;
195
196	array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197	build_int_cst (size_type_node, n),
198	NULL_TREE, NULL_TREE);
199
200	new_stmt = gimple_build_assign (array_ref, vect);
201	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202	}
203
204	/ PTR is a pointer to an array of type TYPE. Return a representation*
205	of PTR. The memory reference replaces those in FIRST_DR*
206	(and its group). /*
207
208	static tree
209	create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210	{
211	tree mem_ref;
212
213	mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, `0`));
214	/ Arrays have the same alignment as their type. /
215	set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), `0`);
216	return mem_ref;
217	}
218
219	/ Add a clobber of variable VAR to the vectorization of STMT_INFO.*
220	Emit the clobber before GSI. /
221
222	static void
223	vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224	gimple_stmt_iterator *gsi, tree var)
225	{
226	tree clobber = build_clobber (TREE_TYPE (var));
227	gimple *new_stmt = gimple_build_assign (var, clobber);
228	vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229	}
230
231	/ Utility functions used by vect_mark_stmts_to_be_vectorized. /
232
233	/ Function vect_mark_relevant.*
234
235	Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. /*
236
237	static void
238	vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239	enum vect_relevant relevant, bool live_p)
240	{
241	enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242	bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243
244	if (dump_enabled_p ())
245	dump_printf_loc (MSG_NOTE, vect_location,
246	"mark relevant %d, live %d: %G", relevant, live_p,
247	stmt_info->stmt);
248
249	/ If this stmt is an original stmt in a pattern, we might need to mark its*
250	related pattern stmt instead of the original stmt. However, such stmts
251	may have their own uses that are not in any pattern, in such cases the
252	stmt itself should be marked. /*
253	if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254	{
255	/ This is the last stmt in a sequence that was detected as a*
256	pattern that can potentially be vectorized. Don't mark the stmt
257	as relevant/live because it's not going to be vectorized.
258	Instead mark the pattern-stmt that replaces it. /*
259
260	if (dump_enabled_p ())
261	dump_printf_loc (MSG_NOTE, vect_location,
262	"last stmt in pattern. don't mark"
263	" relevant/live.\n");
264
265	stmt_vec_info old_stmt_info = stmt_info;
266	stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267	gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268	save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269	save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270
271	if (live_p && relevant == vect_unused_in_scope)
272	{
273	if (dump_enabled_p ())
274	dump_printf_loc (MSG_NOTE, vect_location,
275	"vec_stmt_relevant_p: forcing live pattern stmt "
276	"relevant.\n");
277	relevant = vect_used_only_live;
278	}
279
280	if (dump_enabled_p ())
281	dump_printf_loc (MSG_NOTE, vect_location,
282	"mark relevant %d, live %d: %G", relevant, live_p,
283	stmt_info->stmt);
284	}
285
286	STMT_VINFO_LIVE_P (stmt_info) \|= live_p;
287	if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288	STMT_VINFO_RELEVANT (stmt_info) = relevant;
289
290	if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291	&& STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
292	{
293	if (dump_enabled_p ())
294	dump_printf_loc (MSG_NOTE, vect_location,
295	"already marked relevant/live.\n");
296	return;
297	}
298
299	worklist->safe_push (obj: stmt_info);
300	}
301
302
303	/ Function is_simple_and_all_uses_invariant*
304
305	Return true if STMT_INFO is simple and all uses of it are invariant. /*
306
307	bool
308	is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309	loop_vec_info loop_vinfo)
310	{
311	tree op;
312	ssa_op_iter iter;
313
314	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
315	if (!stmt)
316	return false;
317
318	FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
319	{
320	enum vect_def_type dt = vect_uninitialized_def;
321
322	if (!vect_is_simple_use (op, loop_vinfo, &dt))
323	{
324	if (dump_enabled_p ())
325	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326	"use not simple.\n");
327	return false;
328	}
329
330	if (dt != vect_external_def && dt != vect_constant_def)
331	return false;
332	}
333	return true;
334	}
335
336	/ Function vect_stmt_relevant_p.*
337
338	Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339	is "relevant for vectorization".
340
341	A stmt is considered "relevant for vectorization" if:
342	- it has uses outside the loop.
343	- it has vdefs (it alters memory).
344	- control stmts in the loop (except for the exit condition).
345	- it is an induction and we have multiple exits.
346
347	CHECKME: what other side effects would the vectorizer allow? /*
348
349	static bool
350	vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
351	enum vect_relevant relevant, bool* *live_p)
352	{
353	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
354	ssa_op_iter op_iter;
355	imm_use_iterator imm_iter;
356	use_operand_p use_p;
357	def_operand_p def_p;
358
359	*relevant = vect_unused_in_scope;
360	live_p = false*;
361
362	/ cond stmt other than loop exit cond. /
363	gimple *stmt = STMT_VINFO_STMT (stmt_info);
364	if (is_ctrl_stmt (stmt)
365	&& LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
366	&& (!loop->inner \|\| gimple_bb (g: stmt)->loop_father == loop))
367	*relevant = vect_used_in_scope;
368
369	/ changing memory. /
370	if (gimple_code (g: stmt_info->stmt) != GIMPLE_PHI)
371	if (gimple_vdef (g: stmt_info->stmt)
372	&& !gimple_clobber_p (s: stmt_info->stmt))
373	{
374	if (dump_enabled_p ())
375	dump_printf_loc (MSG_NOTE, vect_location,
376	"vec_stmt_relevant_p: stmt has vdefs.\n");
377	*relevant = vect_used_in_scope;
378	}
379
380	/ uses outside the loop. /
381	FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
382	{
383	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
384	{
385	basic_block bb = gimple_bb (USE_STMT (use_p));
386	if (!flow_bb_inside_loop_p (loop, bb))
387	{
388	if (is_gimple_debug (USE_STMT (use_p)))
389	continue;
390
391	if (dump_enabled_p ())
392	dump_printf_loc (MSG_NOTE, vect_location,
393	"vec_stmt_relevant_p: used out of loop.\n");
394
395	/ We expect all such uses to be in the loop exit phis*
396	(because of loop closed form) /*
397	gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
398
399	live_p = true*;
400	}
401	}
402	}
403
404	/ Check if it's an induction and multiple exits. In this case there will be*
405	a usage later on after peeling which is needed for the alternate exit. /*
406	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
407	&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
408	{
409	if (dump_enabled_p ())
410	dump_printf_loc (MSG_NOTE, vect_location,
411	"vec_stmt_relevant_p: induction forced for "
412	"early break.\n");
413	live_p = true*;
414
415	}
416
417	if (live_p && relevant == vect_unused_in_scope
418	&& !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
419	{
420	if (dump_enabled_p ())
421	dump_printf_loc (MSG_NOTE, vect_location,
422	"vec_stmt_relevant_p: stmt live but not relevant.\n");
423	*relevant = vect_used_only_live;
424	}
425
426	return (live_p \|\| relevant);
427	}
428
429
430	/ Function exist_non_indexing_operands_for_use_p*
431
432	USE is one of the uses attached to STMT_INFO. Check if USE is
433	used in STMT_INFO for anything other than indexing an array. /*
434
435	static bool
436	exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
437	{
438	tree operand;
439
440	/ USE corresponds to some operand in STMT. If there is no data*
441	reference in STMT, then any operand that corresponds to USE
442	is not indexing an array. /*
443	if (!STMT_VINFO_DATA_REF (stmt_info))
444	return true;
445
446	/ STMT has a data_ref. FORNOW this means that its of one of*
447	the following forms:
448	-1- ARRAY_REF = var
449	-2- var = ARRAY_REF
450	(This should have been verified in analyze_data_refs).
451
452	'var' in the second case corresponds to a def, not a use,
453	so USE cannot correspond to any operands that are not used
454	for array indexing.
455
456	Therefore, all we need to check is if STMT falls into the
457	first case, and whether var corresponds to USE. /*
458
459	gassign assign = dyn_cast <gassign > (p: stmt_info->stmt);
460	if (!assign \|\| !gimple_assign_copy_p (assign))
461	{
462	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
463	if (call && gimple_call_internal_p (gs: call))
464	{
465	internal_fn ifn = gimple_call_internal_fn (gs: call);
466	int mask_index = internal_fn_mask_index (ifn);
467	if (mask_index >= `0`
468	&& use == gimple_call_arg (gs: call, index: mask_index))
469	return true;
470	int stored_value_index = internal_fn_stored_value_index (ifn);
471	if (stored_value_index >= `0`
472	&& use == gimple_call_arg (gs: call, index: stored_value_index))
473	return true;
474	if (internal_gather_scatter_fn_p (ifn)
475	&& use == gimple_call_arg (gs: call, index: `1`))
476	return true;
477	}
478	return false;
479	}
480
481	if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
482	return false;
483	operand = gimple_assign_rhs1 (gs: assign);
484	if (TREE_CODE (operand) != SSA_NAME)
485	return false;
486
487	if (operand == use)
488	return true;
489
490	return false;
491	}
492
493
494	/*
495	Function process_use.
496
497	Inputs:
498	- a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499	- RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500	that defined USE. This is done by calling mark_relevant and passing it
501	the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502	- FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
503	be performed.
504
505	Outputs:
506	Generally, LIVE_P and RELEVANT are used to define the liveness and
507	relevance info of the DEF_STMT of this USE:
508	STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509	STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
510	Exceptions:
511	- case 1: If USE is used only for address computations (e.g. array indexing),
512	which does not need to be directly vectorized, then the liveness/relevance
513	of the respective DEF_STMT is left unchanged.
514	- case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515	we skip DEF_STMT cause it had already been processed.
516	- case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517	"relevant" will be modified accordingly.
518
519	Return true if everything is as expected. Return false otherwise. /*
520
521	static opt_result
522	process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
523	enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
524	bool force)
525	{
526	stmt_vec_info dstmt_vinfo;
527	enum vect_def_type dt;
528
529	/ case 1: we are only interested in uses that need to be vectorized. Uses*
530	that are used for address computation are not considered relevant. /*
531	if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_info: stmt_vinfo))
532	return opt_result::success ();
533
534	if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
535	return opt_result::failure_at (loc: stmt_vinfo->stmt,
536	fmt: "not vectorized:"
537	" unsupported use in stmt.\n");
538
539	if (!dstmt_vinfo)
540	return opt_result::success ();
541
542	basic_block def_bb = gimple_bb (g: dstmt_vinfo->stmt);
543	basic_block bb = gimple_bb (g: stmt_vinfo->stmt);
544
545	/ case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).*
546	We have to force the stmt live since the epilogue loop needs it to
547	continue computing the reduction. /*
548	if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
549	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
550	&& gimple_code (g: dstmt_vinfo->stmt) != GIMPLE_PHI
551	&& STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
552	&& bb->loop_father == def_bb->loop_father)
553	{
554	if (dump_enabled_p ())
555	dump_printf_loc (MSG_NOTE, vect_location,
556	"reduc-stmt defining reduc-phi in the same nest.\n");
557	vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: true);
558	return opt_result::success ();
559	}
560
561	/ case 3a: outer-loop stmt defining an inner-loop stmt:*
562	outer-loop-header-bb:
563	d = dstmt_vinfo
564	inner-loop:
565	stmt # use (d)
566	outer-loop-tail-bb:
567	... /*
568	if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
569	{
570	if (dump_enabled_p ())
571	dump_printf_loc (MSG_NOTE, vect_location,
572	"outer-loop def-stmt defining inner-loop stmt.\n");
573
574	switch (relevant)
575	{
576	case vect_unused_in_scope:
577	relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
578	vect_used_in_scope : vect_unused_in_scope;
579	break;
580
581	case vect_used_in_outer_by_reduction:
582	gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
583	relevant = vect_used_by_reduction;
584	break;
585
586	case vect_used_in_outer:
587	gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
588	relevant = vect_used_in_scope;
589	break;
590
591	case vect_used_in_scope:
592	break;
593
594	default:
595	gcc_unreachable ();
596	}
597	}
598
599	/ case 3b: inner-loop stmt defining an outer-loop stmt:*
600	outer-loop-header-bb:
601	...
602	inner-loop:
603	d = dstmt_vinfo
604	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
605	stmt # use (d) /*
606	else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
607	{
608	if (dump_enabled_p ())
609	dump_printf_loc (MSG_NOTE, vect_location,
610	"inner-loop def-stmt defining outer-loop stmt.\n");
611
612	switch (relevant)
613	{
614	case vect_unused_in_scope:
615	relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
616	\|\| STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
617	vect_used_in_outer_by_reduction : vect_unused_in_scope;
618	break;
619
620	case vect_used_by_reduction:
621	case vect_used_only_live:
622	relevant = vect_used_in_outer_by_reduction;
623	break;
624
625	case vect_used_in_scope:
626	relevant = vect_used_in_outer;
627	break;
628
629	default:
630	gcc_unreachable ();
631	}
632	}
633	/ We are also not interested in uses on loop PHI backedges that are*
634	inductions. Otherwise we'll needlessly vectorize the IV increment
635	and cause hybrid SLP for SLP inductions. Unless the PHI is live
636	of course. /*
637	else if (gimple_code (g: stmt_vinfo->stmt) == GIMPLE_PHI
638	&& STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
639	&& ! STMT_VINFO_LIVE_P (stmt_vinfo)
640	&& (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
641	loop_latch_edge (bb->loop_father))
642	== use))
643	{
644	if (dump_enabled_p ())
645	dump_printf_loc (MSG_NOTE, vect_location,
646	"induction value on backedge.\n");
647	return opt_result::success ();
648	}
649
650
651	vect_mark_relevant (worklist, stmt_info: dstmt_vinfo, relevant, live_p: false);
652	return opt_result::success ();
653	}
654
655
656	/ Function vect_mark_stmts_to_be_vectorized.*
657
658	Not all stmts in the loop need to be vectorized. For example:
659
660	for i...
661	for j...
662	1. T0 = i + j
663	2. T1 = a[T0]
664
665	3. j = j + 1
666
667	Stmt 1 and 3 do not need to be vectorized, because loop control and
668	addressing of vectorized data-refs are handled differently.
669
670	This pass detects such stmts. /*
671
672	opt_result
673	vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
674	{
675	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
676	basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
677	unsigned int nbbs = loop->num_nodes;
678	gimple_stmt_iterator si;
679	unsigned int i;
680	basic_block bb;
681	bool live_p;
682	enum vect_relevant relevant;
683
684	DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
685
686	auto_vec<stmt_vec_info, `64`> worklist;
687
688	/ 1. Init worklist. /
689	for (i = `0`; i < nbbs; i++)
690	{
691	bb = bbs[i];
692	for (si = gsi_start_phis (bb); !gsi_end_p (i: si); gsi_next (i: &si))
693	{
694	stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
695	if (dump_enabled_p ())
696	dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
697	phi_info->stmt);
698
699	if (vect_stmt_relevant_p (stmt_info: phi_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
700	vect_mark_relevant (worklist: &worklist, stmt_info: phi_info, relevant, live_p);
701	}
702	for (si = gsi_start_bb (bb); !gsi_end_p (i: si); gsi_next (i: &si))
703	{
704	if (is_gimple_debug (gs: gsi_stmt (i: si)))
705	continue;
706	stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (i: si));
707	if (dump_enabled_p ())
708	dump_printf_loc (MSG_NOTE, vect_location,
709	"init: stmt relevant? %G", stmt_info->stmt);
710
711	if (vect_stmt_relevant_p (stmt_info, loop_vinfo, relevant: &relevant, live_p: &live_p))
712	vect_mark_relevant (worklist: &worklist, stmt_info, relevant, live_p);
713	}
714	}
715
716	/ 2. Process_worklist /
717	while (worklist.length () > `0`)
718	{
719	use_operand_p use_p;
720	ssa_op_iter iter;
721
722	stmt_vec_info stmt_vinfo = worklist.pop ();
723	if (dump_enabled_p ())
724	dump_printf_loc (MSG_NOTE, vect_location,
725	"worklist: examine stmt: %G", stmt_vinfo->stmt);
726
727	/ Examine the USEs of STMT. For each USE, mark the stmt that defines it*
728	(DEF_STMT) as relevant/irrelevant according to the relevance property
729	of STMT. /*
730	relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
731
732	/ Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is*
733	propagated as is to the DEF_STMTs of its USEs.
734
735	One exception is when STMT has been identified as defining a reduction
736	variable; in this case we set the relevance to vect_used_by_reduction.
737	This is because we distinguish between two kinds of relevant stmts -
738	those that are used by a reduction computation, and those that are
739	(also) used by a regular computation. This allows us later on to
740	identify stmts that are used solely by a reduction, and therefore the
741	order of the results that they produce does not have to be kept. /*
742
743	switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
744	{
745	case vect_reduction_def:
746	gcc_assert (relevant != vect_unused_in_scope);
747	if (relevant != vect_unused_in_scope
748	&& relevant != vect_used_in_scope
749	&& relevant != vect_used_by_reduction
750	&& relevant != vect_used_only_live)
751	return opt_result::failure_at
752	(loc: stmt_vinfo->stmt, fmt: "unsupported use of reduction.\n");
753	break;
754
755	case vect_nested_cycle:
756	if (relevant != vect_unused_in_scope
757	&& relevant != vect_used_in_outer_by_reduction
758	&& relevant != vect_used_in_outer)
759	return opt_result::failure_at
760	(loc: stmt_vinfo->stmt, fmt: "unsupported use of nested cycle.\n");
761	break;
762
763	case vect_double_reduction_def:
764	if (relevant != vect_unused_in_scope
765	&& relevant != vect_used_by_reduction
766	&& relevant != vect_used_only_live)
767	return opt_result::failure_at
768	(loc: stmt_vinfo->stmt, fmt: "unsupported use of double reduction.\n");
769	break;
770
771	default:
772	break;
773	}
774
775	if (is_pattern_stmt_p (stmt_info: stmt_vinfo))
776	{
777	/ Pattern statements are not inserted into the code, so*
778	FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779	have to scan the RHS or function arguments instead. /*
780	if (gassign assign = dyn_cast <gassign > (p: stmt_vinfo->stmt))
781	{
782	enum tree_code rhs_code = gimple_assign_rhs_code (gs: assign);
783	tree op = gimple_assign_rhs1 (gs: assign);
784
785	i = `1`;
786	if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
787	{
788	opt_result res
789	= process_use (stmt_vinfo, TREE_OPERAND (op, `0`),
790	loop_vinfo, relevant, worklist: &worklist, force: false);
791	if (!res)
792	return res;
793	res = process_use (stmt_vinfo, TREE_OPERAND (op, `1`),
794	loop_vinfo, relevant, worklist: &worklist, force: false);
795	if (!res)
796	return res;
797	i = `2`;
798	}
799	for (; i < gimple_num_ops (gs: assign); i++)
800	{
801	op = gimple_op (gs: assign, i);
802	if (TREE_CODE (op) == SSA_NAME)
803	{
804	opt_result res
805	= process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
806	worklist: &worklist, force: false);
807	if (!res)
808	return res;
809	}
810	}
811	}
812	else if (gcond cond = dyn_cast <gcond > (p: stmt_vinfo->stmt))
813	{
814	tree_code rhs_code = gimple_cond_code (gs: cond);
815	gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
816	opt_result res
817	= process_use (stmt_vinfo, use: gimple_cond_lhs (gs: cond),
818	loop_vinfo, relevant, worklist: &worklist, force: false);
819	if (!res)
820	return res;
821	res = process_use (stmt_vinfo, use: gimple_cond_rhs (gs: cond),
822	loop_vinfo, relevant, worklist: &worklist, force: false);
823	if (!res)
824	return res;
825	}
826	else if (gcall call = dyn_cast <gcall > (p: stmt_vinfo->stmt))
827	{
828	for (i = `0`; i < gimple_call_num_args (gs: call); i++)
829	{
830	tree arg = gimple_call_arg (gs: call, index: i);
831	opt_result res
832	= process_use (stmt_vinfo, use: arg, loop_vinfo, relevant,
833	worklist: &worklist, force: false);
834	if (!res)
835	return res;
836	}
837	}
838	else
839	gcc_unreachable ();
840	}
841	else
842	FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
843	{
844	tree op = USE_FROM_PTR (use_p);
845	opt_result res
846	= process_use (stmt_vinfo, use: op, loop_vinfo, relevant,
847	worklist: &worklist, force: false);
848	if (!res)
849	return res;
850	}
851
852	if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
853	{
854	gather_scatter_info gs_info;
855	if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
856	gcc_unreachable ();
857	opt_result res
858	= process_use (stmt_vinfo, use: gs_info.offset, loop_vinfo, relevant,
859	worklist: &worklist, force: true);
860	if (!res)
861	{
862	if (fatal)
863	fatal = false*;
864	return res;
865	}
866	}
867	} / while worklist /
868
869	return opt_result::success ();
870	}
871
872	/ Function vect_model_simple_cost.*
873
874	Models cost for simple operations, i.e. those that only emit ncopies of a
875	single op. Right now, this does not account for multiple insns that could
876	be generated for the single vector op. We will handle that shortly. /*
877
878	static void
879	vect_model_simple_cost (vec_info *,
880	stmt_vec_info stmt_info, int ncopies,
881	enum vect_def_type *dt,
882	int ndts,
883	slp_tree node,
884	stmt_vector_for_cost *cost_vec,
885	vect_cost_for_stmt kind = vector_stmt)
886	{
887	int inside_cost = `0`, prologue_cost = `0`;
888
889	gcc_assert (cost_vec != NULL);
890
891	/ ??? Somehow we need to fix this at the callers. /
892	if (node)
893	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
894
895	if (!node)
896	/ Cost the "broadcast" of a scalar operand in to a vector operand.*
897	Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
898	cost model. /*
899	for (int i = `0`; i < ndts; i++)
900	if (dt[i] == vect_constant_def \|\| dt[i] == vect_external_def)
901	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
902	stmt_info, misalign: `0`, where: vect_prologue);
903
904	/ Pass the inside-of-loop statements to the target-specific cost model. /
905	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind,
906	stmt_info, misalign: `0`, where: vect_body);
907
908	if (dump_enabled_p ())
909	dump_printf_loc (MSG_NOTE, vect_location,
910	"vect_model_simple_cost: inside_cost = %d, "
911	"prologue_cost = %d .\n", inside_cost, prologue_cost);
912	}
913
914
915	/ Model cost for type demotion and promotion operations. PWR is*
916	normally zero for single-step promotions and demotions. It will be
917	one if two-step promotion/demotion is required, and so on. NCOPIES
918	is the number of vector results (and thus number of instructions)
919	for the narrowest end of the operation chain. Each additional
920	step doubles the number of instructions required. If WIDEN_ARITH
921	is true the stmt is doing widening arithmetic. /*
922
923	static void
924	vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
925	enum vect_def_type *dt,
926	unsigned int ncopies, int pwr,
927	stmt_vector_for_cost *cost_vec,
928	bool widen_arith)
929	{
930	int i;
931	int inside_cost = `0`, prologue_cost = `0`;
932
933	for (i = `0`; i < pwr + `1`; i++)
934	{
935	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: ncopies,
936	kind: widen_arith
937	? vector_stmt : vec_promote_demote,
938	stmt_info, misalign: `0`, where: vect_body);
939	ncopies *= `2`;
940	}
941
942	/ FORNOW: Assuming maximum 2 args per stmts. /
943	for (i = `0`; i < `2`; i++)
944	if (dt[i] == vect_constant_def \|\| dt[i] == vect_external_def)
945	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vector_stmt,
946	stmt_info, misalign: `0`, where: vect_prologue);
947
948	if (dump_enabled_p ())
949	dump_printf_loc (MSG_NOTE, vect_location,
950	"vect_model_promotion_demotion_cost: inside_cost = %d, "
951	"prologue_cost = %d .\n", inside_cost, prologue_cost);
952	}
953
954	/ Returns true if the current function returns DECL. /
955
956	static bool
957	cfun_returns (tree decl)
958	{
959	edge_iterator ei;
960	edge e;
961	FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
962	{
963	greturn ret = safe_dyn_cast <greturn > (p: *gsi_last_bb (bb: e->src));
964	if (!ret)
965	continue;
966	if (gimple_return_retval (gs: ret) == decl)
967	return true;
968	/ We often end up with an aggregate copy to the result decl,*
969	handle that case as well. First skip intermediate clobbers
970	though. /*
971	gimple *def = ret;
972	do
973	{
974	def = SSA_NAME_DEF_STMT (gimple_vuse (def));
975	}
976	while (gimple_clobber_p (s: def));
977	if (is_a <gassign *> (p: def)
978	&& gimple_assign_lhs (gs: def) == gimple_return_retval (gs: ret)
979	&& gimple_assign_rhs1 (gs: def) == decl)
980	return true;
981	}
982	return false;
983	}
984
985	/ Calculate cost of DR's memory access. /
986	void
987	vect_get_store_cost (vec_info , stmt_vec_info stmt_info, int* ncopies,
988	dr_alignment_support alignment_support_scheme,
989	int misalignment,
990	unsigned int *inside_cost,
991	stmt_vector_for_cost *body_cost_vec)
992	{
993	switch (alignment_support_scheme)
994	{
995	case dr_aligned:
996	{
997	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
998	kind: vector_store, stmt_info, misalign: `0`,
999	where: vect_body);
1000
1001	if (dump_enabled_p ())
1002	dump_printf_loc (MSG_NOTE, vect_location,
1003	"vect_model_store_cost: aligned.\n");
1004	break;
1005	}
1006
1007	case dr_unaligned_supported:
1008	{
1009	/ Here, we assign an additional cost for the unaligned store. /
1010	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1011	kind: unaligned_store, stmt_info,
1012	misalign: misalignment, where: vect_body);
1013	if (dump_enabled_p ())
1014	dump_printf_loc (MSG_NOTE, vect_location,
1015	"vect_model_store_cost: unaligned supported by "
1016	"hardware.\n");
1017	break;
1018	}
1019
1020	case dr_unaligned_unsupported:
1021	{
1022	*inside_cost = VECT_MAX_COST;
1023
1024	if (dump_enabled_p ())
1025	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1026	"vect_model_store_cost: unsupported access.\n");
1027	break;
1028	}
1029
1030	default:
1031	gcc_unreachable ();
1032	}
1033	}
1034
1035	/ Calculate cost of DR's memory access. /
1036	void
1037	vect_get_load_cost (vec_info , stmt_vec_info stmt_info, int* ncopies,
1038	dr_alignment_support alignment_support_scheme,
1039	int misalignment,
1040	bool add_realign_cost, unsigned int *inside_cost,
1041	unsigned int *prologue_cost,
1042	stmt_vector_for_cost *prologue_cost_vec,
1043	stmt_vector_for_cost *body_cost_vec,
1044	bool record_prologue_costs)
1045	{
1046	switch (alignment_support_scheme)
1047	{
1048	case dr_aligned:
1049	{
1050	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1051	stmt_info, misalign: `0`, where: vect_body);
1052
1053	if (dump_enabled_p ())
1054	dump_printf_loc (MSG_NOTE, vect_location,
1055	"vect_model_load_cost: aligned.\n");
1056
1057	break;
1058	}
1059	case dr_unaligned_supported:
1060	{
1061	/ Here, we assign an additional cost for the unaligned load. /
1062	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1063	kind: unaligned_load, stmt_info,
1064	misalign: misalignment, where: vect_body);
1065
1066	if (dump_enabled_p ())
1067	dump_printf_loc (MSG_NOTE, vect_location,
1068	"vect_model_load_cost: unaligned supported by "
1069	"hardware.\n");
1070
1071	break;
1072	}
1073	case dr_explicit_realign:
1074	{
1075	inside_cost += record_stmt_cost (body_cost_vec, count: ncopies `2`,
1076	kind: vector_load, stmt_info, misalign: `0`, where: vect_body);
1077	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies,
1078	kind: vec_perm, stmt_info, misalign: `0`, where: vect_body);
1079
1080	/ FIXME: If the misalignment remains fixed across the iterations of*
1081	the containing loop, the following cost should be added to the
1082	prologue costs. /*
1083	if (targetm.vectorize.builtin_mask_for_load)
1084	*inside_cost += record_stmt_cost (body_cost_vec, count: `1`, kind: vector_stmt,
1085	stmt_info, misalign: `0`, where: vect_body);
1086
1087	if (dump_enabled_p ())
1088	dump_printf_loc (MSG_NOTE, vect_location,
1089	"vect_model_load_cost: explicit realign\n");
1090
1091	break;
1092	}
1093	case dr_explicit_realign_optimized:
1094	{
1095	if (dump_enabled_p ())
1096	dump_printf_loc (MSG_NOTE, vect_location,
1097	"vect_model_load_cost: unaligned software "
1098	"pipelined.\n");
1099
1100	/ Unaligned software pipeline has a load of an address, an initial*
1101	load, and possibly a mask operation to "prime" the loop. However,
1102	if this is an access in a group of loads, which provide grouped
1103	access, then the above cost should only be considered for one
1104	access in the group. Inside the loop, there is a load op
1105	and a realignment op. /*
1106
1107	if (add_realign_cost && record_prologue_costs)
1108	{
1109	*prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: `2`,
1110	kind: vector_stmt, stmt_info,
1111	misalign: `0`, where: vect_prologue);
1112	if (targetm.vectorize.builtin_mask_for_load)
1113	*prologue_cost += record_stmt_cost (body_cost_vec: prologue_cost_vec, count: `1`,
1114	kind: vector_stmt, stmt_info,
1115	misalign: `0`, where: vect_prologue);
1116	}
1117
1118	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vector_load,
1119	stmt_info, misalign: `0`, where: vect_body);
1120	*inside_cost += record_stmt_cost (body_cost_vec, count: ncopies, kind: vec_perm,
1121	stmt_info, misalign: `0`, where: vect_body);
1122
1123	if (dump_enabled_p ())
1124	dump_printf_loc (MSG_NOTE, vect_location,
1125	"vect_model_load_cost: explicit realign optimized"
1126	"\n");
1127
1128	break;
1129	}
1130
1131	case dr_unaligned_unsupported:
1132	{
1133	*inside_cost = VECT_MAX_COST;
1134
1135	if (dump_enabled_p ())
1136	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137	"vect_model_load_cost: unsupported access.\n");
1138	break;
1139	}
1140
1141	default:
1142	gcc_unreachable ();
1143	}
1144	}
1145
1146	/ Insert the new stmt NEW_STMT at GSI or at the appropriate place in
1147	the loop preheader for the vectorized stmt STMT_VINFO. /*
1148
1149	static void
1150	vect_init_vector_1 (vec_info vinfo, stmt_vec_info stmt_vinfo, gimple new_stmt,
1151	gimple_stmt_iterator *gsi)
1152	{
1153	if (gsi)
1154	vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1155	else
1156	vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1157
1158	if (dump_enabled_p ())
1159	dump_printf_loc (MSG_NOTE, vect_location,
1160	"created new init_stmt: %G", new_stmt);
1161	}
1162
1163	/ Function vect_init_vector.*
1164
1165	Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166	TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167	vector type a vector with all elements equal to VAL is created first.
1168	Place the initialization at GSI if it is not NULL. Otherwise, place the
1169	initialization at the loop preheader.
1170	Return the DEF of INIT_STMT.
1171	It will be used in the vectorization of STMT_INFO. /*
1172
1173	tree
1174	vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1175	gimple_stmt_iterator *gsi)
1176	{
1177	gimple *init_stmt;
1178	tree new_temp;
1179
1180	/ We abuse this function to push sth to a SSA name with initial 'val'. /
1181	if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1182	{
1183	gcc_assert (VECTOR_TYPE_P (type));
1184	if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1185	{
1186	/ Scalar boolean value should be transformed into*
1187	all zeros or all ones value before building a vector. /*
1188	if (VECTOR_BOOLEAN_TYPE_P (type))
1189	{
1190	tree true_val = build_all_ones_cst (TREE_TYPE (type));
1191	tree false_val = build_zero_cst (TREE_TYPE (type));
1192
1193	if (CONSTANT_CLASS_P (val))
1194	val = integer_zerop (val) ? false_val : true_val;
1195	else
1196	{
1197	new_temp = make_ssa_name (TREE_TYPE (type));
1198	init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1199	val, true_val, false_val);
1200	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1201	val = new_temp;
1202	}
1203	}
1204	else
1205	{
1206	gimple_seq stmts = NULL;
1207	if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1208	val = gimple_build (seq: &stmts, code: VIEW_CONVERT_EXPR,
1209	TREE_TYPE (type), ops: val);
1210	else
1211	/ ??? Condition vectorization expects us to do*
1212	promotion of invariant/external defs. /*
1213	val = gimple_convert (seq: &stmts, TREE_TYPE (type), op: val);
1214	for (gimple_stmt_iterator gsi2 = gsi_start (seq&: stmts);
1215	!gsi_end_p (i: gsi2); )
1216	{
1217	init_stmt = gsi_stmt (i: gsi2);
1218	gsi_remove (&gsi2, false);
1219	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1220	}
1221	}
1222	}
1223	val = build_vector_from_val (type, val);
1224	}
1225
1226	new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1227	init_stmt = gimple_build_assign (new_temp, val);
1228	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, gsi);
1229	return new_temp;
1230	}
1231
1232
1233	/ Function vect_get_vec_defs_for_operand.*
1234
1235	OP is an operand in STMT_VINFO. This function returns a vector of
1236	NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1237
1238	In the case that OP is an SSA_NAME which is defined in the loop, then
1239	STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1240
1241	In case OP is an invariant or constant, a new stmt that creates a vector def
1242	needs to be introduced. VECTYPE may be used to specify a required type for
1243	vector invariant. /*
1244
1245	void
1246	vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1247	unsigned ncopies,
1248	tree op, vec<tree> *vec_oprnds, tree vectype)
1249	{
1250	gimple *def_stmt;
1251	enum vect_def_type dt;
1252	bool is_simple_use;
1253	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1254
1255	if (dump_enabled_p ())
1256	dump_printf_loc (MSG_NOTE, vect_location,
1257	"vect_get_vec_defs_for_operand: %T\n", op);
1258
1259	stmt_vec_info def_stmt_info;
1260	is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1261	&def_stmt_info, &def_stmt);
1262	gcc_assert (is_simple_use);
1263	if (def_stmt && dump_enabled_p ())
1264	dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1265
1266	vec_oprnds->create (nelems: ncopies);
1267	if (dt == vect_constant_def \|\| dt == vect_external_def)
1268	{
1269	tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1270	tree vector_type;
1271
1272	if (vectype)
1273	vector_type = vectype;
1274	else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1275	&& VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1276	vector_type = truth_type_for (stmt_vectype);
1277	else
1278	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1279
1280	gcc_assert (vector_type);
1281	tree vop = vect_init_vector (vinfo, stmt_info: stmt_vinfo, val: op, type: vector_type, NULL);
1282	while (ncopies--)
1283	vec_oprnds->quick_push (obj: vop);
1284	}
1285	else
1286	{
1287	def_stmt_info = vect_stmt_to_vectorize (stmt_info: def_stmt_info);
1288	gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1289	for (unsigned i = `0`; i < ncopies; ++i)
1290	vec_oprnds->quick_push (obj: gimple_get_lhs
1291	(STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1292	}
1293	}
1294
1295
1296	/ Get vectorized definitions for OP0 and OP1. /
1297
1298	void
1299	vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1300	unsigned ncopies,
1301	tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1302	tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1303	tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1304	tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1305	{
1306	if (slp_node)
1307	{
1308	if (op0)
1309	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`0`], vec_oprnds0);
1310	if (op1)
1311	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`1`], vec_oprnds1);
1312	if (op2)
1313	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`2`], vec_oprnds2);
1314	if (op3)
1315	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`3`], vec_oprnds3);
1316	}
1317	else
1318	{
1319	if (op0)
1320	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1321	op: op0, vec_oprnds: vec_oprnds0, vectype: vectype0);
1322	if (op1)
1323	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1324	op: op1, vec_oprnds: vec_oprnds1, vectype: vectype1);
1325	if (op2)
1326	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1327	op: op2, vec_oprnds: vec_oprnds2, vectype: vectype2);
1328	if (op3)
1329	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
1330	op: op3, vec_oprnds: vec_oprnds3, vectype: vectype3);
1331	}
1332	}
1333
1334	void
1335	vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1336	unsigned ncopies,
1337	tree op0, vec<tree> *vec_oprnds0,
1338	tree op1, vec<tree> *vec_oprnds1,
1339	tree op2, vec<tree> *vec_oprnds2,
1340	tree op3, vec<tree> *vec_oprnds3)
1341	{
1342	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1343	op0, NULL_TREE, vec_oprnds0,
1344	op1, NULL_TREE, vec_oprnds1,
1345	op2, NULL_TREE, vec_oprnds2,
1346	op3, NULL_TREE, vec_oprnds3);
1347	}
1348
1349	/ Helper function called by vect_finish_replace_stmt and*
1350	vect_finish_stmt_generation. Set the location of the new
1351	statement and create and return a stmt_vec_info for it. /*
1352
1353	static void
1354	vect_finish_stmt_generation_1 (vec_info *,
1355	stmt_vec_info stmt_info, gimple *vec_stmt)
1356	{
1357	if (dump_enabled_p ())
1358	dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1359
1360	if (stmt_info)
1361	{
1362	gimple_set_location (g: vec_stmt, location: gimple_location (g: stmt_info->stmt));
1363
1364	/ While EH edges will generally prevent vectorization, stmt might*
1365	e.g. be in a must-not-throw region. Ensure newly created stmts
1366	that could throw are part of the same region. /*
1367	int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1368	if (lp_nr != `0` && stmt_could_throw_p (cfun, vec_stmt))
1369	add_stmt_to_eh_lp (vec_stmt, lp_nr);
1370	}
1371	else
1372	gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1373	}
1374
1375	/ Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,*
1376	which sets the same scalar result as STMT_INFO did. Create and return a
1377	stmt_vec_info for VEC_STMT. /*
1378
1379	void
1380	vect_finish_replace_stmt (vec_info *vinfo,
1381	stmt_vec_info stmt_info, gimple *vec_stmt)
1382	{
1383	gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1384	gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1385
1386	gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1387	gsi_replace (&gsi, vec_stmt, true);
1388
1389	vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1390	}
1391
1392	/ Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it*
1393	before GSI. Create and return a stmt_vec_info for VEC_STMT. /
1394
1395	void
1396	vect_finish_stmt_generation (vec_info *vinfo,
1397	stmt_vec_info stmt_info, gimple *vec_stmt,
1398	gimple_stmt_iterator *gsi)
1399	{
1400	gcc_assert (!stmt_info \|\| gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1401
1402	if (!gsi_end_p (i: *gsi)
1403	&& gimple_has_mem_ops (g: vec_stmt))
1404	{
1405	gimple at_stmt = gsi_stmt (i: gsi);
1406	tree vuse = gimple_vuse (g: at_stmt);
1407	if (vuse && TREE_CODE (vuse) == SSA_NAME)
1408	{
1409	tree vdef = gimple_vdef (g: at_stmt);
1410	gimple_set_vuse (g: vec_stmt, vuse: gimple_vuse (g: at_stmt));
1411	gimple_set_modified (s: vec_stmt, modifiedp: true);
1412	/ If we have an SSA vuse and insert a store, update virtual*
1413	SSA form to avoid triggering the renamer. Do so only
1414	if we can easily see all uses - which is what almost always
1415	happens with the way vectorized stmts are inserted. /*
1416	if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1417	&& ((is_gimple_assign (gs: vec_stmt)
1418	&& !is_gimple_reg (gimple_assign_lhs (gs: vec_stmt)))
1419	\|\| (is_gimple_call (gs: vec_stmt)
1420	&& (!(gimple_call_flags (vec_stmt)
1421	& (ECF_CONST\|ECF_PURE\|ECF_NOVOPS))
1422	\|\| (gimple_call_lhs (gs: vec_stmt)
1423	&& !is_gimple_reg (gimple_call_lhs (gs: vec_stmt)))))))
1424	{
1425	tree new_vdef = copy_ssa_name (var: vuse, stmt: vec_stmt);
1426	gimple_set_vdef (g: vec_stmt, vdef: new_vdef);
1427	SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1428	}
1429	}
1430	}
1431	gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1432	vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1433	}
1434
1435	/ We want to vectorize a call to combined function CFN with function*
1436	decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437	as the types of all inputs. Check whether this is possible using
1438	an internal function, returning its code if so or IFN_LAST if not. /*
1439
1440	static internal_fn
1441	vectorizable_internal_function (combined_fn cfn, tree fndecl,
1442	tree vectype_out, tree vectype_in)
1443	{
1444	internal_fn ifn;
1445	if (internal_fn_p (code: cfn))
1446	ifn = as_internal_fn (code: cfn);
1447	else
1448	ifn = associated_internal_fn (fndecl);
1449	if (ifn != IFN_LAST && direct_internal_fn_p (fn: ifn))
1450	{
1451	const direct_internal_fn_info &info = direct_internal_fn (fn: ifn);
1452	if (info.vectorizable)
1453	{
1454	bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1455	tree type0 = (info.type0 < `0` ? vectype_out : vectype_in);
1456	tree type1 = (info.type1 < `0` ? vectype_out : vectype_in);
1457
1458	/ The type size of both the vectype_in and vectype_out should be*
1459	exactly the same when vectype_out isn't participating the optab.
1460	While there is no restriction for type size when vectype_out
1461	is part of the optab query. /*
1462	if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1463	return IFN_LAST;
1464
1465	if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1466	OPTIMIZE_FOR_SPEED))
1467	return ifn;
1468	}
1469	}
1470	return IFN_LAST;
1471	}
1472
1473
1474	static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1475	gimple_stmt_iterator *);
1476
1477	/ Check whether a load or store statement in the loop described by*
1478	LOOP_VINFO is possible in a loop using partial vectors. This is
1479	testing whether the vectorizer pass has the appropriate support,
1480	as well as whether the target does.
1481
1482	VLS_TYPE says whether the statement is a load or store and VECTYPE
1483	is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484	node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485	says how the load or store is going to be implemented and GROUP_SIZE
1486	is the number of load or store statements in the containing group.
1487	If the access is a gather load or scatter store, GS_INFO describes
1488	its arguments. If the load or store is conditional, SCALAR_MASK is the
1489	condition under which it occurs.
1490
1491	Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492	vectors is not supported, otherwise record the required rgroup control
1493	types. /*
1494
1495	static void
1496	check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1497	slp_tree slp_node,
1498	vec_load_store_type vls_type,
1499	int group_size,
1500	vect_memory_access_type
1501	memory_access_type,
1502	gather_scatter_info *gs_info,
1503	tree scalar_mask)
1504	{
1505	/ Invariant loads need no special support. /
1506	if (memory_access_type == VMAT_INVARIANT)
1507	return;
1508
1509	unsigned int nvectors;
1510	if (slp_node)
1511	nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1512	else
1513	nvectors = vect_get_num_copies (loop_vinfo, vectype);
1514
1515	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1516	vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1517	machine_mode vecmode = TYPE_MODE (vectype);
1518	bool is_load = (vls_type == VLS_LOAD);
1519	if (memory_access_type == VMAT_LOAD_STORE_LANES)
1520	{
1521	internal_fn ifn
1522	= (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1523	: vect_store_lanes_supported (vectype, group_size, true));
1524	if (ifn == IFN_MASK_LEN_LOAD_LANES \|\| ifn == IFN_MASK_LEN_STORE_LANES)
1525	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, `1`);
1526	else if (ifn == IFN_MASK_LOAD_LANES \|\| ifn == IFN_MASK_STORE_LANES)
1527	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528	scalar_mask);
1529	else
1530	{
1531	if (dump_enabled_p ())
1532	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533	"can't operate on partial vectors because"
1534	" the target doesn't have an appropriate"
1535	" load/store-lanes instruction.\n");
1536	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1537	}
1538	return;
1539	}
1540
1541	if (memory_access_type == VMAT_GATHER_SCATTER)
1542	{
1543	internal_fn ifn = (is_load
1544	? IFN_MASK_GATHER_LOAD
1545	: IFN_MASK_SCATTER_STORE);
1546	internal_fn len_ifn = (is_load
1547	? IFN_MASK_LEN_GATHER_LOAD
1548	: IFN_MASK_LEN_SCATTER_STORE);
1549	if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1550	gs_info->memory_type,
1551	gs_info->offset_vectype,
1552	gs_info->scale))
1553	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, `1`);
1554	else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1555	gs_info->memory_type,
1556	gs_info->offset_vectype,
1557	gs_info->scale))
1558	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1559	scalar_mask);
1560	else
1561	{
1562	if (dump_enabled_p ())
1563	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1564	"can't operate on partial vectors because"
1565	" the target doesn't have an appropriate"
1566	" gather load or scatter store instruction.\n");
1567	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1568	}
1569	return;
1570	}
1571
1572	if (memory_access_type != VMAT_CONTIGUOUS
1573	&& memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1574	{
1575	/ Element X of the data must come from iteration i * VF + X of the*
1576	scalar loop. We need more work to support other mappings. /*
1577	if (dump_enabled_p ())
1578	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1579	"can't operate on partial vectors because an"
1580	" access isn't contiguous.\n");
1581	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582	return;
1583	}
1584
1585	if (!VECTOR_MODE_P (vecmode))
1586	{
1587	if (dump_enabled_p ())
1588	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1589	"can't operate on partial vectors when emulating"
1590	" vector operations.\n");
1591	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1592	return;
1593	}
1594
1595	/ We might load more scalars than we need for permuting SLP loads.*
1596	We checked in get_group_load_store_type that the extra elements
1597	don't leak into a new vector. /*
1598	auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1599	{
1600	unsigned int nvectors;
1601	if (can_div_away_from_zero_p (a: size, b: nunits, quotient: &nvectors))
1602	return nvectors;
1603	gcc_unreachable ();
1604	};
1605
1606	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1607	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1608	machine_mode mask_mode;
1609	machine_mode vmode;
1610	bool using_partial_vectors_p = false;
1611	if (get_len_load_store_mode (vecmode, is_load).exists (mode: &vmode))
1612	{
1613	nvectors = group_memory_nvectors (group_size * vf, nunits);
1614	unsigned factor = (vecmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vecmode);
1615	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1616	using_partial_vectors_p = true;
1617	}
1618	else if (targetm.vectorize.get_mask_mode (vecmode).exists (mode: &mask_mode)
1619	&& can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1620	{
1621	nvectors = group_memory_nvectors (group_size * vf, nunits);
1622	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1623	using_partial_vectors_p = true;
1624	}
1625
1626	if (!using_partial_vectors_p)
1627	{
1628	if (dump_enabled_p ())
1629	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630	"can't operate on partial vectors because the"
1631	" target doesn't have the appropriate partial"
1632	" vectorization load or store.\n");
1633	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1634	}
1635	}
1636
1637	/ Return the mask input to a masked load or store. VEC_MASK is the vectorized*
1638	form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639	that needs to be applied to all loads and stores in a vectorized loop.
1640	Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641	otherwise return VEC_MASK & LOOP_MASK.
1642
1643	MASK_TYPE is the type of both masks. If new statements are needed,
1644	insert them before GSI. /*
1645
1646	static tree
1647	prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1648	tree vec_mask, gimple_stmt_iterator *gsi)
1649	{
1650	gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1651	if (!loop_mask)
1652	return vec_mask;
1653
1654	gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1655
1656	if (loop_vinfo->vec_cond_masked_set.contains (k: { vec_mask, loop_mask }))
1657	return vec_mask;
1658
1659	tree and_res = make_temp_ssa_name (type: mask_type, NULL, name: "vec_mask_and");
1660	gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1661	vec_mask, loop_mask);
1662
1663	gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1664	return and_res;
1665	}
1666
1667	/ Determine whether we can use a gather load or scatter store to vectorize*
1668	strided load or store STMT_INFO by truncating the current offset to a
1669	smaller width. We need to be able to construct an offset vector:
1670
1671	{ 0, X, X2, X3, ... }
1672
1673	without loss of precision, where X is STMT_INFO's DR_STEP.
1674
1675	Return true if this is possible, describing the gather load or scatter
1676	store in GS_INFO. MASKED_P is true if the load or store is conditional. /*
1677
1678	static bool
1679	vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1680	loop_vec_info loop_vinfo, bool masked_p,
1681	gather_scatter_info *gs_info)
1682	{
1683	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1684	data_reference *dr = dr_info->dr;
1685	tree step = DR_STEP (dr);
1686	if (TREE_CODE (step) != INTEGER_CST)
1687	{
1688	/ ??? Perhaps we could use range information here? /
1689	if (dump_enabled_p ())
1690	dump_printf_loc (MSG_NOTE, vect_location,
1691	"cannot truncate variable step.\n");
1692	return false;
1693	}
1694
1695	/ Get the number of bits in an element. /
1696	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697	scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1698	unsigned int element_bits = GET_MODE_BITSIZE (mode: element_mode);
1699
1700	/ Set COUNT to the upper limit on the number of elements - 1.*
1701	Start with the maximum vectorization factor. /*
1702	unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - `1`;
1703
1704	/ Try lowering COUNT to the number of scalar latch iterations. /
1705	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1706	widest_int max_iters;
1707	if (max_loop_iterations (loop, &max_iters)
1708	&& max_iters < count)
1709	count = max_iters.to_shwi ();
1710
1711	/ Try scales of 1 and the element size. /
1712	int scales[] = { `1`, vect_get_scalar_dr_size (dr_info) };
1713	wi::overflow_type overflow = wi::OVF_NONE;
1714	for (int i = `0`; i < `2`; ++i)
1715	{
1716	int scale = scales[i];
1717	widest_int factor;
1718	if (!wi::multiple_of_p (x: wi::to_widest (t: step), y: scale, sgn: SIGNED, res: &factor))
1719	continue;
1720
1721	/ Determine the minimum precision of (COUNT - 1) * STEP / SCALE. /
1722	widest_int range = wi::mul (x: count, y: factor, sgn: SIGNED, overflow: &overflow);
1723	if (overflow)
1724	continue;
1725	signop sign = range >= `0` ? UNSIGNED : SIGNED;
1726	unsigned int min_offset_bits = wi::min_precision (x: range, sgn: sign);
1727
1728	/ Find the narrowest viable offset type. /
1729	unsigned int offset_bits = `1U` << ceil_log2 (x: min_offset_bits);
1730	tree offset_type = build_nonstandard_integer_type (offset_bits,
1731	sign == UNSIGNED);
1732
1733	/ See whether the target supports the operation with an offset*
1734	no narrower than OFFSET_TYPE. /*
1735	tree memory_type = TREE_TYPE (DR_REF (dr));
1736	if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1737	vectype, memory_type, offset_type, scale,
1738	&gs_info->ifn, &gs_info->offset_vectype)
1739	\|\| gs_info->ifn == IFN_LAST)
1740	continue;
1741
1742	gs_info->decl = NULL_TREE;
1743	/ Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,*
1744	but we don't need to store that here. /*
1745	gs_info->base = NULL_TREE;
1746	gs_info->element_type = TREE_TYPE (vectype);
1747	gs_info->offset = fold_convert (offset_type, step);
1748	gs_info->offset_dt = vect_constant_def;
1749	gs_info->scale = scale;
1750	gs_info->memory_type = memory_type;
1751	return true;
1752	}
1753
1754	if (overflow && dump_enabled_p ())
1755	dump_printf_loc (MSG_NOTE, vect_location,
1756	"truncating gather/scatter offset to %d bits"
1757	" might change its value.\n", element_bits);
1758
1759	return false;
1760	}
1761
1762	/ Return true if we can use gather/scatter internal functions to*
1763	vectorize STMT_INFO, which is a grouped or strided load or store.
1764	MASKED_P is true if load or store is conditional. When returning
1765	true, fill in GS_INFO with the information required to perform the
1766	operation. /*
1767
1768	static bool
1769	vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1770	loop_vec_info loop_vinfo, bool masked_p,
1771	gather_scatter_info *gs_info)
1772	{
1773	if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1774	\|\| gs_info->ifn == IFN_LAST)
1775	return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1776	masked_p, gs_info);
1777
1778	tree old_offset_type = TREE_TYPE (gs_info->offset);
1779	tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1780
1781	gcc_assert (TYPE_PRECISION (new_offset_type)
1782	>= TYPE_PRECISION (old_offset_type));
1783	gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1784
1785	if (dump_enabled_p ())
1786	dump_printf_loc (MSG_NOTE, vect_location,
1787	"using gather/scatter for strided/grouped access,"
1788	" scale = %d\n", gs_info->scale);
1789
1790	return true;
1791	}
1792
1793	/ STMT_INFO is a non-strided load or store, meaning that it accesses*
1794	elements with a known constant step. Return -1 if that step
1795	is negative, 0 if it is zero, and 1 if it is greater than zero. /*
1796
1797	static int
1798	compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1799	{
1800	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1801	return tree_int_cst_compare (t1: vect_dr_behavior (vinfo, dr_info)->step,
1802	size_zero_node);
1803	}
1804
1805	/ If the target supports a permute mask that reverses the elements in*
1806	a vector of type VECTYPE, return that mask, otherwise return null. /*
1807
1808	tree
1809	perm_mask_for_reverse (tree vectype)
1810	{
1811	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
1812
1813	/ The encoding has a single stepped pattern. /
1814	vec_perm_builder sel (nunits, `1`, `3`);
1815	for (int i = `0`; i < `3`; ++i)
1816	sel.quick_push (obj: nunits - `1` - i);
1817
1818	vec_perm_indices indices (sel, `1`, nunits);
1819	if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1820	indices))
1821	return NULL_TREE;
1822	return vect_gen_perm_mask_checked (vectype, indices);
1823	}
1824
1825	/ A subroutine of get_load_store_type, with a subset of the same*
1826	arguments. Handle the case where STMT_INFO is a load or store that
1827	accesses consecutive elements with a negative step. Sets POFFSET*
1828	to the offset to be applied to the DR for the first access. /*
1829
1830	static vect_memory_access_type
1831	get_negative_load_store_type (vec_info *vinfo,
1832	stmt_vec_info stmt_info, tree vectype,
1833	vec_load_store_type vls_type,
1834	unsigned int ncopies, poly_int64 *poffset)
1835	{
1836	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837	dr_alignment_support alignment_support_scheme;
1838
1839	if (ncopies > `1`)
1840	{
1841	if (dump_enabled_p ())
1842	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843	"multiple types with negative step.\n");
1844	return VMAT_ELEMENTWISE;
1845	}
1846
1847	/ For backward running DRs the first access in vectype actually is*
1848	N-1 elements before the address of the DR. /*
1849	*poffset = ((-TYPE_VECTOR_SUBPARTS (node: vectype) + `1`)
1850	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1851
1852	int misalignment = dr_misalignment (dr_info, vectype, offset: *poffset);
1853	alignment_support_scheme
1854	= vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1855	if (alignment_support_scheme != dr_aligned
1856	&& alignment_support_scheme != dr_unaligned_supported)
1857	{
1858	if (dump_enabled_p ())
1859	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860	"negative step but alignment required.\n");
1861	*poffset = `0`;
1862	return VMAT_ELEMENTWISE;
1863	}
1864
1865	if (vls_type == VLS_STORE_INVARIANT)
1866	{
1867	if (dump_enabled_p ())
1868	dump_printf_loc (MSG_NOTE, vect_location,
1869	"negative step with invariant source;"
1870	" no permute needed.\n");
1871	return VMAT_CONTIGUOUS_DOWN;
1872	}
1873
1874	if (!perm_mask_for_reverse (vectype))
1875	{
1876	if (dump_enabled_p ())
1877	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878	"negative step and reversing not supported.\n");
1879	*poffset = `0`;
1880	return VMAT_ELEMENTWISE;
1881	}
1882
1883	return VMAT_CONTIGUOUS_REVERSE;
1884	}
1885
1886	/ STMT_INFO is either a masked or unconditional store. Return the value*
1887	being stored. /*
1888
1889	tree
1890	vect_get_store_rhs (stmt_vec_info stmt_info)
1891	{
1892	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
1893	{
1894	gcc_assert (gimple_assign_single_p (assign));
1895	return gimple_assign_rhs1 (gs: assign);
1896	}
1897	if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
1898	{
1899	internal_fn ifn = gimple_call_internal_fn (gs: call);
1900	int index = internal_fn_stored_value_index (ifn);
1901	gcc_assert (index >= `0`);
1902	return gimple_call_arg (gs: call, index);
1903	}
1904	gcc_unreachable ();
1905	}
1906
1907	/ Function VECTOR_VECTOR_COMPOSITION_TYPE*
1908
1909	This function returns a vector type which can be composed with NETLS pieces,
1910	whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911	same vector size as the return vector. It checks target whether supports
1912	pieces-size vector mode for construction firstly, if target fails to, check
1913	pieces-size scalar mode for construction further. It returns NULL_TREE if
1914	fails to find the available composition.
1915
1916	For example, for (vtype=V16QI, nelts=4), we can probably get:
1917	- V16QI with PTYPE V4QI.
1918	- V4SI with PTYPE SI.
1919	- NULL_TREE. /*
1920
1921	static tree
1922	vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1923	{
1924	gcc_assert (VECTOR_TYPE_P (vtype));
1925	gcc_assert (known_gt (nelts, `0U`));
1926
1927	machine_mode vmode = TYPE_MODE (vtype);
1928	if (!VECTOR_MODE_P (vmode))
1929	return NULL_TREE;
1930
1931	/ When we are asked to compose the vector from its components let*
1932	that happen directly. /*
1933	if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1934	{
1935	*ptype = TREE_TYPE (vtype);
1936	return vtype;
1937	}
1938
1939	poly_uint64 vbsize = GET_MODE_BITSIZE (mode: vmode);
1940	unsigned int pbsize;
1941	if (constant_multiple_p (a: vbsize, b: nelts, multiple: &pbsize))
1942	{
1943	/ First check if vec_init optab supports construction from*
1944	vector pieces directly. /*
1945	scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1946	poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (mode: elmode);
1947	machine_mode rmode;
1948	if (related_vector_mode (vmode, elmode, inelts).exists (mode: &rmode)
1949	&& (convert_optab_handler (op: vec_init_optab, to_mode: vmode, from_mode: rmode)
1950	!= CODE_FOR_nothing))
1951	{
1952	*ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1953	return vtype;
1954	}
1955
1956	/ Otherwise check if exists an integer type of the same piece size and*
1957	if vec_init optab supports construction from it directly. /*
1958	if (int_mode_for_size (size: pbsize, limit: `0`).exists (mode: &elmode)
1959	&& related_vector_mode (vmode, elmode, nelts).exists (mode: &rmode)
1960	&& (convert_optab_handler (op: vec_init_optab, to_mode: rmode, from_mode: elmode)
1961	!= CODE_FOR_nothing))
1962	{
1963	*ptype = build_nonstandard_integer_type (pbsize, `1`);
1964	return build_vector_type (*ptype, nelts);
1965	}
1966	}
1967
1968	return NULL_TREE;
1969	}
1970
1971	/ A subroutine of get_load_store_type, with a subset of the same*
1972	arguments. Handle the case where STMT_INFO is part of a grouped load
1973	or store.
1974
1975	For stores, the statements in the group are all consecutive
1976	and there is no gap at the end. For loads, the statements in the
1977	group might not be consecutive; there can be gaps between statements
1978	as well as at the end. /*
1979
1980	static bool
1981	get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1982	tree vectype, slp_tree slp_node,
1983	bool masked_p, vec_load_store_type vls_type,
1984	vect_memory_access_type *memory_access_type,
1985	poly_int64 *poffset,
1986	dr_alignment_support *alignment_support_scheme,
1987	int *misalignment,
1988	gather_scatter_info *gs_info,
1989	internal_fn *lanes_ifn)
1990	{
1991	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1992	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1993	stmt_vec_info first_stmt_info;
1994	unsigned int group_size;
1995	unsigned HOST_WIDE_INT gap;
1996	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1997	{
1998	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1999	group_size = DR_GROUP_SIZE (first_stmt_info);
2000	gap = DR_GROUP_GAP (first_stmt_info);
2001	}
2002	else
2003	{
2004	first_stmt_info = stmt_info;
2005	group_size = `1`;
2006	gap = `0`;
2007	}
2008	dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2009	bool single_element_p = (stmt_info == first_stmt_info
2010	&& !DR_GROUP_NEXT_ELEMENT (stmt_info));
2011	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2012
2013	/ True if the vectorized statements would access beyond the last*
2014	statement in the group. /*
2015	bool overrun_p = false;
2016
2017	/ True if we can cope with such overrun by peeling for gaps, so that*
2018	there is at least one final scalar iteration after the vector loop. /*
2019	bool can_overrun_p = (!masked_p
2020	&& vls_type == VLS_LOAD
2021	&& loop_vinfo
2022	&& !loop->inner);
2023
2024	/ There can only be a gap at the end of the group if the stride is*
2025	known at compile time. /*
2026	gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) \|\| gap == `0`);
2027
2028	/ Stores can't yet have gaps. /
2029	gcc_assert (slp_node \|\| vls_type == VLS_LOAD \|\| gap == `0`);
2030
2031	if (slp_node)
2032	{
2033	/ For SLP vectorization we directly vectorize a subchain*
2034	without permutation. /*
2035	if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2036	first_dr_info
2037	= STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[`0`]);
2038	if (STMT_VINFO_STRIDED_P (first_stmt_info))
2039	{
2040	/ Try to use consecutive accesses of DR_GROUP_SIZE elements,*
2041	separated by the stride, until we have a complete vector.
2042	Fall back to scalar accesses if that isn't possible. /*
2043	if (multiple_p (a: nunits, b: group_size))
2044	*memory_access_type = VMAT_STRIDED_SLP;
2045	else
2046	*memory_access_type = VMAT_ELEMENTWISE;
2047	}
2048	else
2049	{
2050	overrun_p = loop_vinfo && gap != `0`;
2051	if (overrun_p && vls_type != VLS_LOAD)
2052	{
2053	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054	"Grouped store with gaps requires"
2055	" non-consecutive accesses\n");
2056	return false;
2057	}
2058	/ An overrun is fine if the trailing elements are smaller*
2059	than the alignment boundary B. Every vector access will
2060	be a multiple of B and so we are guaranteed to access a
2061	non-gap element in the same B-sized block. /*
2062	if (overrun_p
2063	&& gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info,
2064	vectype)
2065	/ vect_get_scalar_dr_size (dr_info: first_dr_info)))
2066	overrun_p = false;
2067
2068	/ If the gap splits the vector in half and the target*
2069	can do half-vector operations avoid the epilogue peeling
2070	by simply loading half of the vector only. Usually
2071	the construction with an upper zero half will be elided. /*
2072	dr_alignment_support alss;
2073	int misalign = dr_misalignment (dr_info: first_dr_info, vectype);
2074	tree half_vtype;
2075	if (overrun_p
2076	&& !masked_p
2077	&& (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2078	vectype, misalign)))
2079	== dr_aligned
2080	\|\| alss == dr_unaligned_supported)
2081	&& known_eq (nunits, (group_size - gap) * `2`)
2082	&& known_eq (nunits, group_size)
2083	&& (vector_vector_composition_type (vtype: vectype, nelts: `2`, ptype: &half_vtype)
2084	!= NULL_TREE))
2085	overrun_p = false;
2086
2087	if (overrun_p && !can_overrun_p)
2088	{
2089	if (dump_enabled_p ())
2090	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091	"Peeling for outer loop is not supported\n");
2092	return false;
2093	}
2094	int cmp = compare_step_with_zero (vinfo, stmt_info);
2095	if (cmp < `0`)
2096	{
2097	if (single_element_p)
2098	/ ??? The VMAT_CONTIGUOUS_REVERSE code generation is*
2099	only correct for single element "interleaving" SLP. /*
2100	*memory_access_type = get_negative_load_store_type
2101	(vinfo, stmt_info, vectype, vls_type, ncopies: `1`, poffset);
2102	else
2103	{
2104	/ Try to use consecutive accesses of DR_GROUP_SIZE elements,*
2105	separated by the stride, until we have a complete vector.
2106	Fall back to scalar accesses if that isn't possible. /*
2107	if (multiple_p (a: nunits, b: group_size))
2108	*memory_access_type = VMAT_STRIDED_SLP;
2109	else
2110	*memory_access_type = VMAT_ELEMENTWISE;
2111	}
2112	}
2113	else if (cmp == `0` && loop_vinfo)
2114	{
2115	gcc_assert (vls_type == VLS_LOAD);
2116	*memory_access_type = VMAT_INVARIANT;
2117	/ Invariant accesses perform only component accesses, alignment*
2118	is irrelevant for them. /*
2119	*alignment_support_scheme = dr_unaligned_supported;
2120	}
2121	else
2122	*memory_access_type = VMAT_CONTIGUOUS;
2123
2124	/ When we have a contiguous access across loop iterations*
2125	but the access in the loop doesn't cover the full vector
2126	we can end up with no gap recorded but still excess
2127	elements accessed, see PR103116. Make sure we peel for
2128	gaps if necessary and sufficient and give up if not.
2129
2130	If there is a combination of the access not covering the full
2131	vector and a gap recorded then we may need to peel twice. /*
2132	if (loop_vinfo
2133	&& *memory_access_type == VMAT_CONTIGUOUS
2134	&& SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2135	&& !multiple_p (a: group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2136	b: nunits))
2137	{
2138	unsigned HOST_WIDE_INT cnunits, cvf;
2139	if (!can_overrun_p
2140	\|\| !nunits.is_constant (const_value: &cnunits)
2141	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &cvf)
2142	/ Peeling for gaps assumes that a single scalar iteration*
2143	is enough to make sure the last vector iteration doesn't
2144	access excess elements.
2145	??? Enhancements include peeling multiple iterations
2146	or using masked loads with a static mask. /*
2147	\|\| (group_size * cvf) % cnunits + group_size - gap < cnunits)
2148	{
2149	if (dump_enabled_p ())
2150	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151	"peeling for gaps insufficient for "
2152	"access\n");
2153	return false;
2154	}
2155	overrun_p = true;
2156	}
2157	}
2158	}
2159	else
2160	{
2161	/ We can always handle this case using elementwise accesses,*
2162	but see if something more efficient is available. /*
2163	*memory_access_type = VMAT_ELEMENTWISE;
2164
2165	/ If there is a gap at the end of the group then these optimizations*
2166	would access excess elements in the last iteration. /*
2167	bool would_overrun_p = (gap != `0`);
2168	/ An overrun is fine if the trailing elements are smaller than the*
2169	alignment boundary B. Every vector access will be a multiple of B
2170	and so we are guaranteed to access a non-gap element in the
2171	same B-sized block. /*
2172	if (would_overrun_p
2173	&& !masked_p
2174	&& gap < (vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype)
2175	/ vect_get_scalar_dr_size (dr_info: first_dr_info)))
2176	would_overrun_p = false;
2177
2178	if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2179	&& (can_overrun_p \|\| !would_overrun_p)
2180	&& compare_step_with_zero (vinfo, stmt_info) > `0`)
2181	{
2182	/ First cope with the degenerate case of a single-element*
2183	vector. /*
2184	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), `1U`))
2185	;
2186
2187	else
2188	{
2189	/ Otherwise try using LOAD/STORE_LANES. /
2190	*lanes_ifn
2191	= vls_type == VLS_LOAD
2192	? vect_load_lanes_supported (vectype, group_size, masked_p)
2193	: vect_store_lanes_supported (vectype, group_size,
2194	masked_p);
2195	if (*lanes_ifn != IFN_LAST)
2196	{
2197	*memory_access_type = VMAT_LOAD_STORE_LANES;
2198	overrun_p = would_overrun_p;
2199	}
2200
2201	/ If that fails, try using permuting loads. /
2202	else if (vls_type == VLS_LOAD
2203	? vect_grouped_load_supported (vectype,
2204	single_element_p,
2205	group_size)
2206	: vect_grouped_store_supported (vectype, group_size))
2207	{
2208	*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2209	overrun_p = would_overrun_p;
2210	}
2211	}
2212	}
2213
2214	/ As a last resort, trying using a gather load or scatter store.*
2215
2216	??? Although the code can handle all group sizes correctly,
2217	it probably isn't a win to use separate strided accesses based
2218	on nearby locations. Or, even if it's a win over scalar code,
2219	it might not be a win over vectorizing at a lower VF, if that
2220	allows us to use contiguous accesses. /*
2221	if (*memory_access_type == VMAT_ELEMENTWISE
2222	&& single_element_p
2223	&& loop_vinfo
2224	&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2225	masked_p, gs_info))
2226	*memory_access_type = VMAT_GATHER_SCATTER;
2227	}
2228
2229	if (*memory_access_type == VMAT_GATHER_SCATTER
2230	\|\| *memory_access_type == VMAT_ELEMENTWISE)
2231	{
2232	*alignment_support_scheme = dr_unaligned_supported;
2233	*misalignment = DR_MISALIGNMENT_UNKNOWN;
2234	}
2235	else
2236	{
2237	misalignment = dr_misalignment (dr_info: first_dr_info, vectype, offset: poffset);
2238	*alignment_support_scheme
2239	= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2240	*misalignment);
2241	}
2242
2243	if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2244	{
2245	/ STMT is the leader of the group. Check the operands of all the*
2246	stmts of the group. /*
2247	stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2248	while (next_stmt_info)
2249	{
2250	tree op = vect_get_store_rhs (stmt_info: next_stmt_info);
2251	enum vect_def_type dt;
2252	if (!vect_is_simple_use (op, vinfo, &dt))
2253	{
2254	if (dump_enabled_p ())
2255	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2256	"use not simple.\n");
2257	return false;
2258	}
2259	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2260	}
2261	}
2262
2263	if (overrun_p)
2264	{
2265	gcc_assert (can_overrun_p);
2266	if (dump_enabled_p ())
2267	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2268	"Data access with gaps requires scalar "
2269	"epilogue loop\n");
2270	LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2271	}
2272
2273	return true;
2274	}
2275
2276	/ Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true*
2277	if there is a memory access type that the vectorized form can use,
2278	storing it in MEMORY_ACCESS_TYPE if so. If we decide to use gathers*
2279	or scatters, fill in GS_INFO accordingly. In addition
2280	*ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281	the target does not support the alignment scheme. MISALIGNMENT*
2282	is set according to the alignment of the access (including
2283	DR_MISALIGNMENT_UNKNOWN when it is unknown).
2284
2285	SLP says whether we're performing SLP rather than loop vectorization.
2286	MASKED_P is true if the statement is conditional on a vectorized mask.
2287	VECTYPE is the vector type that the vectorized statements will use.
2288	NCOPIES is the number of vector statements that will be needed. /*
2289
2290	static bool
2291	get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2292	tree vectype, slp_tree slp_node,
2293	bool masked_p, vec_load_store_type vls_type,
2294	unsigned int ncopies,
2295	vect_memory_access_type *memory_access_type,
2296	poly_int64 *poffset,
2297	dr_alignment_support *alignment_support_scheme,
2298	int *misalignment,
2299	gather_scatter_info *gs_info,
2300	internal_fn *lanes_ifn)
2301	{
2302	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2303	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2304	*misalignment = DR_MISALIGNMENT_UNKNOWN;
2305	*poffset = `0`;
2306	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2307	{
2308	*memory_access_type = VMAT_GATHER_SCATTER;
2309	if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2310	gcc_unreachable ();
2311	/ When using internal functions, we rely on pattern recognition*
2312	to convert the type of the offset to the type that the target
2313	requires, with the result being a call to an internal function.
2314	If that failed for some reason (e.g. because another pattern
2315	took priority), just handle cases in which the offset already
2316	has the right type. /*
2317	else if (gs_info->ifn != IFN_LAST
2318	&& !is_gimple_call (gs: stmt_info->stmt)
2319	&& !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2320	TREE_TYPE (gs_info->offset_vectype)))
2321	{
2322	if (dump_enabled_p ())
2323	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324	"%s offset requires a conversion\n",
2325	vls_type == VLS_LOAD ? "gather" : "scatter");
2326	return false;
2327	}
2328	else if (!vect_is_simple_use (gs_info->offset, vinfo,
2329	&gs_info->offset_dt,
2330	&gs_info->offset_vectype))
2331	{
2332	if (dump_enabled_p ())
2333	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2334	"%s index use not simple.\n",
2335	vls_type == VLS_LOAD ? "gather" : "scatter");
2336	return false;
2337	}
2338	else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2339	{
2340	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant ()
2341	\|\| !TYPE_VECTOR_SUBPARTS (node: gs_info->offset_vectype).is_constant ()
2342	\|\| !constant_multiple_p (a: TYPE_VECTOR_SUBPARTS
2343	(node: gs_info->offset_vectype),
2344	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2345	{
2346	if (dump_enabled_p ())
2347	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348	"unsupported vector types for emulated "
2349	"gather.\n");
2350	return false;
2351	}
2352	}
2353	/ Gather-scatter accesses perform only component accesses, alignment*
2354	is irrelevant for them. /*
2355	*alignment_support_scheme = dr_unaligned_supported;
2356	}
2357	else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) \|\| slp_node)
2358	{
2359	if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2360	masked_p,
2361	vls_type, memory_access_type, poffset,
2362	alignment_support_scheme,
2363	misalignment, gs_info, lanes_ifn))
2364	return false;
2365	}
2366	else if (STMT_VINFO_STRIDED_P (stmt_info))
2367	{
2368	gcc_assert (!slp_node);
2369	if (loop_vinfo
2370	&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371	masked_p, gs_info))
2372	*memory_access_type = VMAT_GATHER_SCATTER;
2373	else
2374	*memory_access_type = VMAT_ELEMENTWISE;
2375	/ Alignment is irrelevant here. /
2376	*alignment_support_scheme = dr_unaligned_supported;
2377	}
2378	else
2379	{
2380	int cmp = compare_step_with_zero (vinfo, stmt_info);
2381	if (cmp == `0`)
2382	{
2383	gcc_assert (vls_type == VLS_LOAD);
2384	*memory_access_type = VMAT_INVARIANT;
2385	/ Invariant accesses perform only component accesses, alignment*
2386	is irrelevant for them. /*
2387	*alignment_support_scheme = dr_unaligned_supported;
2388	}
2389	else
2390	{
2391	if (cmp < `0`)
2392	*memory_access_type = get_negative_load_store_type
2393	(vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2394	else
2395	*memory_access_type = VMAT_CONTIGUOUS;
2396	*misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2397	vectype, offset: *poffset);
2398	*alignment_support_scheme
2399	= vect_supportable_dr_alignment (vinfo,
2400	STMT_VINFO_DR_INFO (stmt_info),
2401	vectype, *misalignment);
2402	}
2403	}
2404
2405	if ((*memory_access_type == VMAT_ELEMENTWISE
2406	\|\| *memory_access_type == VMAT_STRIDED_SLP)
2407	&& !nunits.is_constant ())
2408	{
2409	if (dump_enabled_p ())
2410	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411	"Not using elementwise accesses due to variable "
2412	"vectorization factor.\n");
2413	return false;
2414	}
2415
2416	if (*alignment_support_scheme == dr_unaligned_unsupported)
2417	{
2418	if (dump_enabled_p ())
2419	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420	"unsupported unaligned access\n");
2421	return false;
2422	}
2423
2424	/ FIXME: At the moment the cost model seems to underestimate the*
2425	cost of using elementwise accesses. This check preserves the
2426	traditional behavior until that can be fixed. /*
2427	stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2428	if (!first_stmt_info)
2429	first_stmt_info = stmt_info;
2430	if (*memory_access_type == VMAT_ELEMENTWISE
2431	&& !STMT_VINFO_STRIDED_P (first_stmt_info)
2432	&& !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2433	&& !DR_GROUP_NEXT_ELEMENT (stmt_info)
2434	&& !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2435	{
2436	if (dump_enabled_p ())
2437	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438	"not falling back to elementwise accesses\n");
2439	return false;
2440	}
2441	return true;
2442	}
2443
2444	/ Return true if boolean argument at MASK_INDEX is suitable for vectorizing*
2445	conditional operation STMT_INFO. When returning true, store the mask
2446	in MASK, the type of its definition in MASK_DT_OUT, the type of the
2447	vectorized mask in MASK_VECTYPE_OUT and the SLP node corresponding*
2448	to the mask in MASK_NODE if MASK_NODE is not NULL. /
2449
2450	static bool
2451	vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2452	slp_tree slp_node, unsigned mask_index,
2453	tree mask, slp_tree mask_node,
2454	vect_def_type mask_dt_out, tree mask_vectype_out)
2455	{
2456	enum vect_def_type mask_dt;
2457	tree mask_vectype;
2458	slp_tree mask_node_1;
2459	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2460	mask, &mask_node_1, &mask_dt, &mask_vectype))
2461	{
2462	if (dump_enabled_p ())
2463	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464	"mask use not simple.\n");
2465	return false;
2466	}
2467
2468	if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2469	{
2470	if (dump_enabled_p ())
2471	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472	"mask argument is not a boolean.\n");
2473	return false;
2474	}
2475
2476	/ If the caller is not prepared for adjusting an external/constant*
2477	SLP mask vector type fail. /*
2478	if (slp_node
2479	&& !mask_node
2480	&& SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2481	{
2482	if (dump_enabled_p ())
2483	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484	"SLP mask argument is not vectorized.\n");
2485	return false;
2486	}
2487
2488	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489	if (!mask_vectype)
2490	mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2491	mask_node_1);
2492
2493	if (!mask_vectype \|\| !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2494	{
2495	if (dump_enabled_p ())
2496	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2497	"could not find an appropriate vector mask type.\n");
2498	return false;
2499	}
2500
2501	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: mask_vectype),
2502	b: TYPE_VECTOR_SUBPARTS (node: vectype)))
2503	{
2504	if (dump_enabled_p ())
2505	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506	"vector mask type %T"
2507	" does not match vector data type %T.\n",
2508	mask_vectype, vectype);
2509
2510	return false;
2511	}
2512
2513	*mask_dt_out = mask_dt;
2514	*mask_vectype_out = mask_vectype;
2515	if (mask_node)
2516	*mask_node = mask_node_1;
2517	return true;
2518	}
2519
2520	/ Return true if stored value is suitable for vectorizing store*
2521	statement STMT_INFO. When returning true, store the scalar stored
2522	in RHS and RHS_NODE, the type of the definition in RHS_DT_OUT,*
2523	the type of the vectorized store value in
2524	RHS_VECTYPE_OUT and the type of the store in VLS_TYPE_OUT. */
2525
2526	static bool
2527	vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2528	slp_tree slp_node, tree rhs, slp_tree rhs_node,
2529	vect_def_type rhs_dt_out, tree rhs_vectype_out,
2530	vec_load_store_type *vls_type_out)
2531	{
2532	int op_no = `0`;
2533	if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
2534	{
2535	if (gimple_call_internal_p (gs: call)
2536	&& internal_store_fn_p (gimple_call_internal_fn (gs: call)))
2537	op_no = internal_fn_stored_value_index (gimple_call_internal_fn (gs: call));
2538	}
2539	if (slp_node)
2540	op_no = vect_slp_child_index_for_operand
2541	(stmt_info->stmt, op: op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2542
2543	enum vect_def_type rhs_dt;
2544	tree rhs_vectype;
2545	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2546	rhs, rhs_node, &rhs_dt, &rhs_vectype))
2547	{
2548	if (dump_enabled_p ())
2549	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550	"use not simple.\n");
2551	return false;
2552	}
2553
2554	/ In the case this is a store from a constant make sure*
2555	native_encode_expr can handle it. /*
2556	if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, `64`) == `0`)
2557	{
2558	if (dump_enabled_p ())
2559	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560	"cannot encode constant as a byte sequence.\n");
2561	return false;
2562	}
2563
2564	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2565	if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2566	{
2567	if (dump_enabled_p ())
2568	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569	"incompatible vector types.\n");
2570	return false;
2571	}
2572
2573	*rhs_dt_out = rhs_dt;
2574	*rhs_vectype_out = rhs_vectype;
2575	if (rhs_dt == vect_constant_def \|\| rhs_dt == vect_external_def)
2576	*vls_type_out = VLS_STORE_INVARIANT;
2577	else
2578	*vls_type_out = VLS_STORE;
2579	return true;
2580	}
2581
2582	/ Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.*
2583	Note that we support masks with floating-point type, in which case the
2584	floats are interpreted as a bitmask. /*
2585
2586	static tree
2587	vect_build_all_ones_mask (vec_info *vinfo,
2588	stmt_vec_info stmt_info, tree masktype)
2589	{
2590	if (TREE_CODE (masktype) == INTEGER_TYPE)
2591	return build_int_cst (masktype, -`1`);
2592	else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2593	\|\| TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2594	{
2595	tree mask = build_int_cst (TREE_TYPE (masktype), -`1`);
2596	mask = build_vector_from_val (masktype, mask);
2597	return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2598	}
2599	else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2600	{
2601	REAL_VALUE_TYPE r;
2602	long tmp[`6`];
2603	for (int j = `0`; j < `6`; ++j)
2604	tmp[j] = -`1`;
2605	real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2606	tree mask = build_real (TREE_TYPE (masktype), r);
2607	mask = build_vector_from_val (masktype, mask);
2608	return vect_init_vector (vinfo, stmt_info, val: mask, type: masktype, NULL);
2609	}
2610	gcc_unreachable ();
2611	}
2612
2613	/ Build an all-zero merge value of type VECTYPE while vectorizing*
2614	STMT_INFO as a gather load. /*
2615
2616	static tree
2617	vect_build_zero_merge_argument (vec_info *vinfo,
2618	stmt_vec_info stmt_info, tree vectype)
2619	{
2620	tree merge;
2621	if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2622	merge = build_int_cst (TREE_TYPE (vectype), `0`);
2623	else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2624	{
2625	REAL_VALUE_TYPE r;
2626	long tmp[`6`];
2627	for (int j = `0`; j < `6`; ++j)
2628	tmp[j] = `0`;
2629	real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2630	merge = build_real (TREE_TYPE (vectype), r);
2631	}
2632	else
2633	gcc_unreachable ();
2634	merge = build_vector_from_val (vectype, merge);
2635	return vect_init_vector (vinfo, stmt_info, val: merge, type: vectype, NULL);
2636	}
2637
2638	/ Build a gather load call while vectorizing STMT_INFO. Insert new*
2639	instructions before GSI and add them to VEC_STMT. GS_INFO describes
2640	the gather load operation. If the load is conditional, MASK is the
2641	vectorized condition, otherwise MASK is null. PTR is the base
2642	pointer and OFFSET is the vectorized offset. /*
2643
2644	static gimple *
2645	vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2646	gimple_stmt_iterator *gsi,
2647	gather_scatter_info *gs_info,
2648	tree ptr, tree offset, tree mask)
2649	{
2650	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2651	tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2652	tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2653	tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2654	/ ptrtype / arglist = TREE_CHAIN (arglist);
2655	tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2656	tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2657	tree scaletype = TREE_VALUE (arglist);
2658	tree var;
2659	gcc_checking_assert (types_compatible_p (srctype, rettype)
2660	&& (!mask
2661	\|\| TREE_CODE (masktype) == INTEGER_TYPE
2662	\|\| types_compatible_p (srctype, masktype)));
2663
2664	tree op = offset;
2665	if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2666	{
2667	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2668	TYPE_VECTOR_SUBPARTS (idxtype)));
2669	var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2670	op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2671	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2672	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2673	op = var;
2674	}
2675
2676	tree src_op = NULL_TREE;
2677	tree mask_op = NULL_TREE;
2678	if (mask)
2679	{
2680	if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2681	{
2682	tree utype, optype = TREE_TYPE (mask);
2683	if (VECTOR_TYPE_P (masktype)
2684	\|\| TYPE_MODE (masktype) == TYPE_MODE (optype))
2685	utype = masktype;
2686	else
2687	utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), `1`);
2688	var = vect_get_new_ssa_name (utype, vect_scalar_var);
2689	tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2690	gassign *new_stmt
2691	= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2692	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2693	mask_arg = var;
2694	if (!useless_type_conversion_p (masktype, utype))
2695	{
2696	gcc_assert (TYPE_PRECISION (utype)
2697	<= TYPE_PRECISION (masktype));
2698	var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2699	new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2700	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2701	mask_arg = var;
2702	}
2703	src_op = build_zero_cst (srctype);
2704	mask_op = mask_arg;
2705	}
2706	else
2707	{
2708	src_op = mask;
2709	mask_op = mask;
2710	}
2711	}
2712	else
2713	{
2714	src_op = vect_build_zero_merge_argument (vinfo, stmt_info, vectype: rettype);
2715	mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2716	}
2717
2718	tree scale = build_int_cst (scaletype, gs_info->scale);
2719	gimple *new_stmt = gimple_build_call (gs_info->decl, `5`, src_op, ptr, op,
2720	mask_op, scale);
2721
2722	if (!useless_type_conversion_p (vectype, rettype))
2723	{
2724	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2725	TYPE_VECTOR_SUBPARTS (rettype)));
2726	op = vect_get_new_ssa_name (rettype, vect_simple_var);
2727	gimple_call_set_lhs (gs: new_stmt, lhs: op);
2728	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2729	op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2730	new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2731	}
2732
2733	return new_stmt;
2734	}
2735
2736	/ Build a scatter store call while vectorizing STMT_INFO. Insert new*
2737	instructions before GSI. GS_INFO describes the scatter store operation.
2738	PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2739	vectorized data to store.
2740	If the store is conditional, MASK is the vectorized condition, otherwise
2741	MASK is null. /*
2742
2743	static gimple *
2744	vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2745	gimple_stmt_iterator *gsi,
2746	gather_scatter_info *gs_info,
2747	tree ptr, tree offset, tree oprnd, tree mask)
2748	{
2749	tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2750	tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2751	/ tree ptrtype = TREE_VALUE (arglist); / arglist = TREE_CHAIN (arglist);
2752	tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2753	tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2754	tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2755	tree scaletype = TREE_VALUE (arglist);
2756	gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2757	&& TREE_CODE (rettype) == VOID_TYPE);
2758
2759	tree mask_arg = NULL_TREE;
2760	if (mask)
2761	{
2762	mask_arg = mask;
2763	tree optype = TREE_TYPE (mask_arg);
2764	tree utype;
2765	if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2766	utype = masktype;
2767	else
2768	utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), `1`);
2769	tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2770	mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2771	gassign *new_stmt
2772	= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2773	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2774	mask_arg = var;
2775	if (!useless_type_conversion_p (masktype, utype))
2776	{
2777	gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2778	tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2779	new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2780	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2781	mask_arg = var;
2782	}
2783	}
2784	else
2785	{
2786	mask_arg = build_int_cst (masktype, -`1`);
2787	mask_arg = vect_init_vector (vinfo, stmt_info, val: mask_arg, type: masktype, NULL);
2788	}
2789
2790	tree src = oprnd;
2791	if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2792	{
2793	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2794	TYPE_VECTOR_SUBPARTS (srctype)));
2795	tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2796	src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2797	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2798	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2799	src = var;
2800	}
2801
2802	tree op = offset;
2803	if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2804	{
2805	gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2806	TYPE_VECTOR_SUBPARTS (idxtype)));
2807	tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2808	op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2809	gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2810	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
2811	op = var;
2812	}
2813
2814	tree scale = build_int_cst (scaletype, gs_info->scale);
2815	gcall *new_stmt
2816	= gimple_build_call (gs_info->decl, `5`, ptr, mask_arg, op, src, scale);
2817	return new_stmt;
2818	}
2819
2820	/ Prepare the base and offset in GS_INFO for vectorization.*
2821	Set DATAREF_PTR to the loop-invariant base address and VEC_OFFSET
2822	to the vectorized offset argument for the first copy of STMT_INFO.
2823	STMT_INFO is the statement described by GS_INFO and LOOP is the
2824	containing loop. /*
2825
2826	static void
2827	vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2828	class loop *loop, stmt_vec_info stmt_info,
2829	slp_tree slp_node, gather_scatter_info *gs_info,
2830	tree dataref_ptr, vec<tree> vec_offset)
2831	{
2832	gimple_seq stmts = NULL;
2833	dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true*, NULL_TREE);
2834	if (stmts != NULL)
2835	{
2836	basic_block new_bb;
2837	edge pe = loop_preheader_edge (loop);
2838	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2839	gcc_assert (!new_bb);
2840	}
2841	if (slp_node)
2842	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[`0`], vec_offset);
2843	else
2844	{
2845	unsigned ncopies
2846	= vect_get_num_copies (loop_vinfo, vectype: gs_info->offset_vectype);
2847	vect_get_vec_defs_for_operand (vinfo: loop_vinfo, stmt_vinfo: stmt_info, ncopies,
2848	op: gs_info->offset, vec_oprnds: vec_offset,
2849	vectype: gs_info->offset_vectype);
2850	}
2851	}
2852
2853	/ Prepare to implement a grouped or strided load or store using*
2854	the gather load or scatter store operation described by GS_INFO.
2855	STMT_INFO is the load or store statement.
2856
2857	Set DATAREF_BUMP to the amount that should be added to the base*
2858	address after each copy of the vectorized statement. Set VEC_OFFSET*
2859	to an invariant offset vector in which element I has the value
2860	I DR_STEP / SCALE. /
2861
2862	static void
2863	vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2864	loop_vec_info loop_vinfo,
2865	gimple_stmt_iterator *gsi,
2866	gather_scatter_info *gs_info,
2867	tree dataref_bump, tree vec_offset,
2868	vec_loop_lens *loop_lens)
2869	{
2870	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2871	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2872
2873	if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2874	{
2875	/ _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);*
2876	ivtmp_8 = _31 16 (step in bytes);*
2877	.MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2878	vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; /*
2879	tree loop_len
2880	= vect_get_loop_len (loop_vinfo, gsi, loop_lens, `1`, vectype, `0`, `0`);
2881	tree tmp
2882	= fold_build2 (MULT_EXPR, sizetype,
2883	fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2884	loop_len);
2885	dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true*,
2886	GSI_SAME_STMT);
2887	}
2888	else
2889	{
2890	tree bump
2891	= size_binop (MULT_EXPR,
2892	fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2893	size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2894	*dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2895	}
2896
2897	/ The offset given in GS_INFO can have pointer type, so use the element*
2898	type of the vector instead. /*
2899	tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2900
2901	/ Calculate X = DR_STEP / SCALE and convert it to the appropriate type. /
2902	tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2903	ssize_int (gs_info->scale));
2904	step = fold_convert (offset_type, step);
2905
2906	/ Create {0, X, X2, X3, ...}. /
2907	tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2908	build_zero_cst (offset_type), step);
2909	*vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2910	}
2911
2912	/ Prepare the pointer IVs which needs to be updated by a variable amount.*
2913	Such variable amount is the outcome of .SELECT_VL. In this case, we can
2914	allow each iteration process the flexible number of elements as long as
2915	the number <= vf elments.
2916
2917	Return data reference according to SELECT_VL.
2918	If new statements are needed, insert them before GSI. /*
2919
2920	static tree
2921	vect_get_loop_variant_data_ptr_increment (
2922	vec_info vinfo, tree aggr_type, gimple_stmt_iterator gsi,
2923	vec_loop_lens loop_lens, dr_vec_info dr_info,
2924	vect_memory_access_type memory_access_type)
2925	{
2926	loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2927	tree step = vect_dr_behavior (vinfo, dr_info)->step;
2928
2929	/ gather/scatter never reach here. /
2930	gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2931
2932	/ When we support SELECT_VL pattern, we dynamic adjust*
2933	the memory address by .SELECT_VL result.
2934
2935	The result of .SELECT_VL is the number of elements to
2936	be processed of each iteration. So the memory address
2937	adjustment operation should be:
2938
2939	addr = addr + .SELECT_VL (ARG..) step;*
2940	*/
2941	tree loop_len
2942	= vect_get_loop_len (loop_vinfo, gsi, loop_lens, `1`, aggr_type, `0`, `0`);
2943	tree len_type = TREE_TYPE (loop_len);
2944	/ Since the outcome of .SELECT_VL is element size, we should adjust*
2945	it into bytesize so that it can be used in address pointer variable
2946	amount IVs adjustment. /*
2947	tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2948	wide_int_to_tree (len_type, wi::to_widest (step)));
2949	tree bump = make_temp_ssa_name (type: len_type, NULL, name: "ivtmp");
2950	gassign *assign = gimple_build_assign (bump, tmp);
2951	gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2952	return bump;
2953	}
2954
2955	/ Return the amount that should be added to a vector pointer to move*
2956	to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2957	being vectorized and MEMORY_ACCESS_TYPE describes the type of
2958	vectorization. /*
2959
2960	static tree
2961	vect_get_data_ptr_increment (vec_info vinfo, gimple_stmt_iterator gsi,
2962	dr_vec_info *dr_info, tree aggr_type,
2963	vect_memory_access_type memory_access_type,
2964	vec_loop_lens loop_lens = nullptr*)
2965	{
2966	if (memory_access_type == VMAT_INVARIANT)
2967	return size_zero_node;
2968
2969	loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (p: vinfo);
2970	if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2971	return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2972	loop_lens, dr_info,
2973	memory_access_type);
2974
2975	tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2976	tree step = vect_dr_behavior (vinfo, dr_info)->step;
2977	if (tree_int_cst_sgn (step) == -`1`)
2978	iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2979	return iv_step;
2980	}
2981
2982	/ Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. /
2983
2984	static bool
2985	vectorizable_bswap (vec_info *vinfo,
2986	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2987	gimple **vec_stmt, slp_tree slp_node,
2988	slp_tree *slp_op,
2989	tree vectype_in, stmt_vector_for_cost *cost_vec)
2990	{
2991	tree op, vectype;
2992	gcall stmt = as_a <gcall > (p: stmt_info->stmt);
2993	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2994	unsigned ncopies;
2995
2996	op = gimple_call_arg (gs: stmt, index: `0`);
2997	vectype = STMT_VINFO_VECTYPE (stmt_info);
2998	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
2999
3000	/ Multiple types in SLP are handled by creating the appropriate number of*
3001	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3002	case of SLP. /*
3003	if (slp_node)
3004	ncopies = `1`;
3005	else
3006	ncopies = vect_get_num_copies (loop_vinfo, vectype);
3007
3008	gcc_assert (ncopies >= `1`);
3009
3010	if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3011	{
3012	if (dump_enabled_p ())
3013	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014	"mismatched vector sizes %T and %T\n",
3015	vectype_in, vectype);
3016	return false;
3017	}
3018
3019	tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3020	if (! char_vectype)
3021	return false;
3022
3023	poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (node: char_vectype);
3024	unsigned word_bytes;
3025	if (!constant_multiple_p (a: num_bytes, b: nunits, multiple: &word_bytes))
3026	return false;
3027
3028	/ The encoding uses one stepped pattern for each byte in the word. /
3029	vec_perm_builder elts (num_bytes, word_bytes, `3`);
3030	for (unsigned i = `0`; i < `3`; ++i)
3031	for (unsigned j = `0`; j < word_bytes; ++j)
3032	elts.quick_push (obj: (i + `1`) * word_bytes - j - `1`);
3033
3034	vec_perm_indices indices (elts, `1`, num_bytes);
3035	machine_mode vmode = TYPE_MODE (char_vectype);
3036	if (!can_vec_perm_const_p (vmode, vmode, indices))
3037	return false;
3038
3039	if (! vec_stmt)
3040	{
3041	if (slp_node
3042	&& !vect_maybe_update_slp_op_vectype (slp_op[`0`], vectype_in))
3043	{
3044	if (dump_enabled_p ())
3045	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3046	"incompatible vector types for invariants\n");
3047	return false;
3048	}
3049
3050	STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3051	DUMP_VECT_SCOPE ("vectorizable_bswap");
3052	record_stmt_cost (body_cost_vec: cost_vec,
3053	count: `1`, kind: vector_stmt, stmt_info, misalign: `0`, where: vect_prologue);
3054	record_stmt_cost (body_cost_vec: cost_vec,
3055	count: slp_node
3056	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3057	kind: vec_perm, stmt_info, misalign: `0`, where: vect_body);
3058	return true;
3059	}
3060
3061	tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3062
3063	/ Transform. /
3064	vec<tree> vec_oprnds = vNULL;
3065	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3066	op0: op, vec_oprnds0: &vec_oprnds);
3067	/ Arguments are ready. create the new vector stmt. /
3068	unsigned i;
3069	tree vop;
3070	FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3071	{
3072	gimple *new_stmt;
3073	tree tem = make_ssa_name (var: char_vectype);
3074	new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3075	char_vectype, vop));
3076	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3077	tree tem2 = make_ssa_name (var: char_vectype);
3078	new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3079	tem, tem, bswap_vconst);
3080	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3081	tem = make_ssa_name (var: vectype);
3082	new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3083	vectype, tem2));
3084	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3085	if (slp_node)
3086	slp_node->push_vec_def (def: new_stmt);
3087	else
3088	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3089	}
3090
3091	if (!slp_node)
3092	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
3093
3094	vec_oprnds.release ();
3095	return true;
3096	}
3097
3098	/ Return true if vector types VECTYPE_IN and VECTYPE_OUT have*
3099	integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3100	in a single step. On success, store the binary pack code in
3101	CONVERT_CODE. /
3102
3103	static bool
3104	simple_integer_narrowing (tree vectype_out, tree vectype_in,
3105	code_helper *convert_code)
3106	{
3107	if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3108	\|\| !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3109	return false;
3110
3111	code_helper code;
3112	int multi_step_cvt = `0`;
3113	auto_vec <tree, `8`> interm_types;
3114	if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3115	&code, &multi_step_cvt, &interm_types)
3116	\|\| multi_step_cvt)
3117	return false;
3118
3119	*convert_code = code;
3120	return true;
3121	}
3122
3123	/ Function vectorizable_call.*
3124
3125	Check if STMT_INFO performs a function call that can be vectorized.
3126	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3127	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3128	Return true if STMT_INFO is vectorizable in this way. /*
3129
3130	static bool
3131	vectorizable_call (vec_info *vinfo,
3132	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3133	gimple **vec_stmt, slp_tree slp_node,
3134	stmt_vector_for_cost *cost_vec)
3135	{
3136	gcall *stmt;
3137	tree vec_dest;
3138	tree scalar_dest;
3139	tree op;
3140	tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3141	tree vectype_out, vectype_in;
3142	poly_uint64 nunits_in;
3143	poly_uint64 nunits_out;
3144	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3145	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3146	tree fndecl, new_temp, rhs_type;
3147	enum vect_def_type dt[`4`]
3148	= { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3149	vect_unknown_def_type };
3150	tree vectypes[ARRAY_SIZE (dt)] = {};
3151	slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3152	int ndts = ARRAY_SIZE (dt);
3153	int ncopies, j;
3154	auto_vec<tree, `8`> vargs;
3155	enum { NARROW, NONE, WIDEN } modifier;
3156	size_t i, nargs;
3157	tree lhs;
3158	tree clz_ctz_arg1 = NULL_TREE;
3159
3160	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3161	return false;
3162
3163	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3164	&& ! vec_stmt)
3165	return false;
3166
3167	/ Is STMT_INFO a vectorizable call? /
3168	stmt = dyn_cast <gcall *> (p: stmt_info->stmt);
3169	if (!stmt)
3170	return false;
3171
3172	if (gimple_call_internal_p (gs: stmt)
3173	&& (internal_load_fn_p (gimple_call_internal_fn (gs: stmt))
3174	\|\| internal_store_fn_p (gimple_call_internal_fn (gs: stmt))))
3175	/ Handled by vectorizable_load and vectorizable_store. /
3176	return false;
3177
3178	if (gimple_call_lhs (gs: stmt) == NULL_TREE
3179	\|\| TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3180	return false;
3181
3182	gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3183
3184	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3185
3186	/ Process function arguments. /
3187	rhs_type = NULL_TREE;
3188	vectype_in = NULL_TREE;
3189	nargs = gimple_call_num_args (gs: stmt);
3190
3191	/ Bail out if the function has more than four arguments, we do not have*
3192	interesting builtin functions to vectorize with more than two arguments
3193	except for fma. No arguments is also not good. /*
3194	if (nargs == `0` \|\| nargs > `4`)
3195	return false;
3196
3197	/ Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. /
3198	combined_fn cfn = gimple_call_combined_fn (stmt);
3199	if (cfn == CFN_GOMP_SIMD_LANE)
3200	{
3201	nargs = `0`;
3202	rhs_type = unsigned_type_node;
3203	}
3204	/ Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second*
3205	argument just says whether it is well-defined at zero or not and what
3206	value should be returned for it. /*
3207	if ((cfn == CFN_CLZ \|\| cfn == CFN_CTZ) && nargs == `2`)
3208	{
3209	nargs = `1`;
3210	clz_ctz_arg1 = gimple_call_arg (gs: stmt, index: `1`);
3211	}
3212
3213	int mask_opno = -`1`;
3214	if (internal_fn_p (code: cfn))
3215	mask_opno = internal_fn_mask_index (as_internal_fn (code: cfn));
3216
3217	for (i = `0`; i < nargs; i++)
3218	{
3219	if ((int) i == mask_opno)
3220	{
3221	if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index: mask_opno,
3222	mask: &op, mask_node: &slp_op[i], mask_dt_out: &dt[i], mask_vectype_out: &vectypes[i]))
3223	return false;
3224	continue;
3225	}
3226
3227	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3228	i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3229	{
3230	if (dump_enabled_p ())
3231	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232	"use not simple.\n");
3233	return false;
3234	}
3235
3236	/ We can only handle calls with arguments of the same type. /
3237	if (rhs_type
3238	&& !types_compatible_p (type1: rhs_type, TREE_TYPE (op)))
3239	{
3240	if (dump_enabled_p ())
3241	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242	"argument types differ.\n");
3243	return false;
3244	}
3245	if (!rhs_type)
3246	rhs_type = TREE_TYPE (op);
3247
3248	if (!vectype_in)
3249	vectype_in = vectypes[i];
3250	else if (vectypes[i]
3251	&& !types_compatible_p (type1: vectypes[i], type2: vectype_in))
3252	{
3253	if (dump_enabled_p ())
3254	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255	"argument vector types differ.\n");
3256	return false;
3257	}
3258	}
3259	/ If all arguments are external or constant defs, infer the vector type*
3260	from the scalar type. /*
3261	if (!vectype_in)
3262	vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3263	if (vec_stmt)
3264	gcc_assert (vectype_in);
3265	if (!vectype_in)
3266	{
3267	if (dump_enabled_p ())
3268	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269	"no vectype for scalar type %T\n", rhs_type);
3270
3271	return false;
3272	}
3273
3274	if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3275	!= VECTOR_BOOLEAN_TYPE_P (vectype_in))
3276	{
3277	if (dump_enabled_p ())
3278	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279	"mixed mask and nonmask vector types\n");
3280	return false;
3281	}
3282
3283	if (vect_emulated_vector_p (vectype_in) \|\| vect_emulated_vector_p (vectype_out))
3284	{
3285	if (dump_enabled_p ())
3286	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287	"use emulated vector type for call\n");
3288	return false;
3289	}
3290
3291	/ FORNOW /
3292	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
3293	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
3294	if (known_eq (nunits_in * `2`, nunits_out))
3295	modifier = NARROW;
3296	else if (known_eq (nunits_out, nunits_in))
3297	modifier = NONE;
3298	else if (known_eq (nunits_out * `2`, nunits_in))
3299	modifier = WIDEN;
3300	else
3301	return false;
3302
3303	/ We only handle functions that do not read or clobber memory. /
3304	if (gimple_vuse (g: stmt))
3305	{
3306	if (dump_enabled_p ())
3307	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308	"function reads from or writes to memory.\n");
3309	return false;
3310	}
3311
3312	/ For now, we only vectorize functions if a target specific builtin*
3313	is available. TODO -- in some cases, it might be profitable to
3314	insert the calls for pieces of the vector, in order to be able
3315	to vectorize other operations in the loop. /*
3316	fndecl = NULL_TREE;
3317	internal_fn ifn = IFN_LAST;
3318	tree callee = gimple_call_fndecl (gs: stmt);
3319
3320	/ First try using an internal function. /
3321	code_helper convert_code = MAX_TREE_CODES;
3322	if (cfn != CFN_LAST
3323	&& (modifier == NONE
3324	\|\| (modifier == NARROW
3325	&& simple_integer_narrowing (vectype_out, vectype_in,
3326	convert_code: &convert_code))))
3327	ifn = vectorizable_internal_function (cfn, fndecl: callee, vectype_out,
3328	vectype_in);
3329
3330	/ If that fails, try asking for a target-specific built-in function. /
3331	if (ifn == IFN_LAST)
3332	{
3333	if (cfn != CFN_LAST)
3334	fndecl = targetm.vectorize.builtin_vectorized_function
3335	(cfn, vectype_out, vectype_in);
3336	else if (callee && fndecl_built_in_p (node: callee, klass: BUILT_IN_MD))
3337	fndecl = targetm.vectorize.builtin_md_vectorized_function
3338	(callee, vectype_out, vectype_in);
3339	}
3340
3341	if (ifn == IFN_LAST && !fndecl)
3342	{
3343	if (cfn == CFN_GOMP_SIMD_LANE
3344	&& !slp_node
3345	&& loop_vinfo
3346	&& LOOP_VINFO_LOOP (loop_vinfo)->simduid
3347	&& TREE_CODE (gimple_call_arg (stmt, `0`)) == SSA_NAME
3348	&& LOOP_VINFO_LOOP (loop_vinfo)->simduid
3349	== SSA_NAME_VAR (gimple_call_arg (stmt, `0`)))
3350	{
3351	/ We can handle IFN_GOMP_SIMD_LANE by returning a*
3352	{ 0, 1, 2, ... vf - 1 } vector. /*
3353	gcc_assert (nargs == `0`);
3354	}
3355	else if (modifier == NONE
3356	&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3357	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3358	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3359	\|\| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3360	return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3361	slp_op, vectype_in, cost_vec);
3362	else
3363	{
3364	if (dump_enabled_p ())
3365	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366	"function is not vectorizable.\n");
3367	return false;
3368	}
3369	}
3370
3371	if (slp_node)
3372	ncopies = `1`;
3373	else if (modifier == NARROW && ifn == IFN_LAST)
3374	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
3375	else
3376	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
3377
3378	/ Sanity check: make sure that at least one copy of the vectorized stmt*
3379	needs to be generated. /*
3380	gcc_assert (ncopies >= `1`);
3381
3382	int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3383	internal_fn cond_fn = get_conditional_internal_fn (ifn);
3384	internal_fn cond_len_fn = get_len_internal_fn (ifn);
3385	int len_opno = internal_fn_len_index (cond_len_fn);
3386	vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3387	vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3388	if (!vec_stmt) / transformation not required. /
3389	{
3390	if (slp_node)
3391	for (i = `0`; i < nargs; ++i)
3392	if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3393	vectypes[i]
3394	? vectypes[i] : vectype_in))
3395	{
3396	if (dump_enabled_p ())
3397	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3398	"incompatible vector types for invariants\n");
3399	return false;
3400	}
3401	STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3402	DUMP_VECT_SCOPE ("vectorizable_call");
3403	vect_model_simple_cost (vinfo, stmt_info,
3404	ncopies, dt, ndts, node: slp_node, cost_vec);
3405	if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3406	record_stmt_cost (body_cost_vec: cost_vec, count: ncopies / `2`,
3407	kind: vec_promote_demote, stmt_info, misalign: `0`, where: vect_body);
3408
3409	if (loop_vinfo
3410	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3411	&& (reduc_idx >= `0` \|\| mask_opno >= `0`))
3412	{
3413	if (reduc_idx >= `0`
3414	&& (cond_fn == IFN_LAST
3415	\|\| !direct_internal_fn_supported_p (cond_fn, vectype_out,
3416	OPTIMIZE_FOR_SPEED))
3417	&& (cond_len_fn == IFN_LAST
3418	\|\| !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3419	OPTIMIZE_FOR_SPEED)))
3420	{
3421	if (dump_enabled_p ())
3422	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3423	"can't use a fully-masked loop because no"
3424	" conditional operation is available.\n");
3425	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3426	}
3427	else
3428	{
3429	unsigned int nvectors
3430	= (slp_node
3431	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3432	: ncopies);
3433	tree scalar_mask = NULL_TREE;
3434	if (mask_opno >= `0`)
3435	scalar_mask = gimple_call_arg (gs: stmt_info->stmt, index: mask_opno);
3436	if (cond_len_fn != IFN_LAST
3437	&& direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3438	OPTIMIZE_FOR_SPEED))
3439	vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3440	`1`);
3441	else
3442	vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3443	scalar_mask);
3444	}
3445	}
3446	return true;
3447	}
3448
3449	/ Transform. /
3450
3451	if (dump_enabled_p ())
3452	dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3453
3454	/ Handle def. /
3455	scalar_dest = gimple_call_lhs (gs: stmt);
3456	vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3457
3458	bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3459	bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3460	unsigned int vect_nargs = nargs;
3461	if (len_loop_p)
3462	{
3463	if (len_opno >= `0`)
3464	{
3465	ifn = cond_len_fn;
3466	/ COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. /
3467	vect_nargs += `2`;
3468	}
3469	else if (reduc_idx >= `0`)
3470	gcc_unreachable ();
3471	}
3472	else if (masked_loop_p && reduc_idx >= `0`)
3473	{
3474	ifn = cond_fn;
3475	vect_nargs += `2`;
3476	}
3477	if (clz_ctz_arg1)
3478	++vect_nargs;
3479
3480	if (modifier == NONE \|\| ifn != IFN_LAST)
3481	{
3482	tree prev_res = NULL_TREE;
3483	vargs.safe_grow (len: vect_nargs, exact: true);
3484	auto_vec<vec<tree> > vec_defs (nargs);
3485	for (j = `0`; j < ncopies; ++j)
3486	{
3487	/ Build argument list for the vectorized call. /
3488	if (slp_node)
3489	{
3490	vec<tree> vec_oprnds0;
3491
3492	vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3493	vec_oprnds0 = vec_defs [`0`];
3494
3495	/ Arguments are ready. Create the new vector stmt. /
3496	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3497	{
3498	int varg = `0`;
3499	if (masked_loop_p && reduc_idx >= `0`)
3500	{
3501	unsigned int vec_num = vec_oprnds0.length ();
3502	/ Always true for SLP. /
3503	gcc_assert (ncopies == `1`);
3504	vargs [varg++] = vect_get_loop_mask (loop_vinfo,
3505	gsi, masks, vec_num,
3506	vectype_out, i);
3507	}
3508	size_t k;
3509	for (k = `0`; k < nargs; k++)
3510	{
3511	vec<tree> vec_oprndsk = vec_defs [k];
3512	vargs [varg++] = vec_oprndsk [i];
3513	}
3514	if (masked_loop_p && reduc_idx >= `0`)
3515	vargs [varg++] = vargs [reduc_idx + `1`];
3516	if (clz_ctz_arg1)
3517	vargs [varg++] = clz_ctz_arg1;
3518
3519	gimple *new_stmt;
3520	if (modifier == NARROW)
3521	{
3522	/ We don't define any narrowing conditional functions*
3523	at present. /*
3524	gcc_assert (mask_opno < `0`);
3525	tree half_res = make_ssa_name (var: vectype_in);
3526	gcall *call
3527	= gimple_build_call_internal_vec (ifn, vargs);
3528	gimple_call_set_lhs (gs: call, lhs: half_res);
3529	gimple_call_set_nothrow (s: call, nothrow_p: true);
3530	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3531	if ((i & `1`) == `0`)
3532	{
3533	prev_res = half_res;
3534	continue;
3535	}
3536	new_temp = make_ssa_name (var: vec_dest);
3537	new_stmt = vect_gimple_build (new_temp, convert_code,
3538	prev_res, half_res);
3539	vect_finish_stmt_generation (vinfo, stmt_info,
3540	vec_stmt: new_stmt, gsi);
3541	}
3542	else
3543	{
3544	if (len_opno >= `0` && len_loop_p)
3545	{
3546	unsigned int vec_num = vec_oprnds0.length ();
3547	/ Always true for SLP. /
3548	gcc_assert (ncopies == `1`);
3549	tree len
3550	= vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3551	vectype_out, i, `1`);
3552	signed char biasval
3553	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3554	tree bias = build_int_cst (intQI_type_node, biasval);
3555	vargs [len_opno] = len;
3556	vargs [len_opno + `1`] = bias;
3557	}
3558	else if (mask_opno >= `0` && masked_loop_p)
3559	{
3560	unsigned int vec_num = vec_oprnds0.length ();
3561	/ Always true for SLP. /
3562	gcc_assert (ncopies == `1`);
3563	tree mask = vect_get_loop_mask (loop_vinfo,
3564	gsi, masks, vec_num,
3565	vectype_out, i);
3566	vargs [mask_opno] = prepare_vec_mask
3567	(loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3568	vec_mask: vargs [mask_opno], gsi);
3569	}
3570
3571	gcall *call;
3572	if (ifn != IFN_LAST)
3573	call = gimple_build_call_internal_vec (ifn, vargs);
3574	else
3575	call = gimple_build_call_vec (fndecl, vargs);
3576	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3577	gimple_call_set_lhs (gs: call, lhs: new_temp);
3578	gimple_call_set_nothrow (s: call, nothrow_p: true);
3579	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3580	new_stmt = call;
3581	}
3582	slp_node->push_vec_def (def: new_stmt);
3583	}
3584	continue;
3585	}
3586
3587	int varg = `0`;
3588	if (masked_loop_p && reduc_idx >= `0`)
3589	vargs [varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3590	vectype_out, j);
3591	for (i = `0`; i < nargs; i++)
3592	{
3593	op = gimple_call_arg (gs: stmt, index: i);
3594	if (j == `0`)
3595	{
3596	vec_defs.quick_push (obj: vNULL);
3597	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
3598	op, vec_oprnds: &vec_defs [i],
3599	vectype: vectypes[i]);
3600	}
3601	vargs [varg++] = vec_defs [i][j];
3602	}
3603	if (masked_loop_p && reduc_idx >= `0`)
3604	vargs [varg++] = vargs [reduc_idx + `1`];
3605	if (clz_ctz_arg1)
3606	vargs [varg++] = clz_ctz_arg1;
3607
3608	if (len_opno >= `0` && len_loop_p)
3609	{
3610	tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3611	vectype_out, j, `1`);
3612	signed char biasval
3613	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3614	tree bias = build_int_cst (intQI_type_node, biasval);
3615	vargs [len_opno] = len;
3616	vargs [len_opno + `1`] = bias;
3617	}
3618	else if (mask_opno >= `0` && masked_loop_p)
3619	{
3620	tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3621	vectype_out, j);
3622	vargs [mask_opno]
3623	= prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
3624	vec_mask: vargs [mask_opno], gsi);
3625	}
3626
3627	gimple *new_stmt;
3628	if (cfn == CFN_GOMP_SIMD_LANE)
3629	{
3630	tree cst = build_index_vector (vectype_out, j * nunits_out, `1`);
3631	tree new_var
3632	= vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3633	gimple *init_stmt = gimple_build_assign (new_var, cst);
3634	vect_init_vector_1 (vinfo, stmt_vinfo: stmt_info, new_stmt: init_stmt, NULL);
3635	new_temp = make_ssa_name (var: vec_dest);
3636	new_stmt = gimple_build_assign (new_temp, new_var);
3637	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3638	}
3639	else if (modifier == NARROW)
3640	{
3641	/ We don't define any narrowing conditional functions at*
3642	present. /*
3643	gcc_assert (mask_opno < `0`);
3644	tree half_res = make_ssa_name (var: vectype_in);
3645	gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3646	gimple_call_set_lhs (gs: call, lhs: half_res);
3647	gimple_call_set_nothrow (s: call, nothrow_p: true);
3648	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3649	if ((j & `1`) == `0`)
3650	{
3651	prev_res = half_res;
3652	continue;
3653	}
3654	new_temp = make_ssa_name (var: vec_dest);
3655	new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3656	half_res);
3657	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3658	}
3659	else
3660	{
3661	gcall *call;
3662	if (ifn != IFN_LAST)
3663	call = gimple_build_call_internal_vec (ifn, vargs);
3664	else
3665	call = gimple_build_call_vec (fndecl, vargs);
3666	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3667	gimple_call_set_lhs (gs: call, lhs: new_temp);
3668	gimple_call_set_nothrow (s: call, nothrow_p: true);
3669	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3670	new_stmt = call;
3671	}
3672
3673	if (j == (modifier == NARROW ? `1` : `0`))
3674	*vec_stmt = new_stmt;
3675	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3676	}
3677	for (i = `0`; i < nargs; i++)
3678	{
3679	vec<tree> vec_oprndsi = vec_defs [i];
3680	vec_oprndsi.release ();
3681	}
3682	}
3683	else if (modifier == NARROW)
3684	{
3685	auto_vec<vec<tree> > vec_defs (nargs);
3686	/ We don't define any narrowing conditional functions at present. /
3687	gcc_assert (mask_opno < `0`);
3688	for (j = `0`; j < ncopies; ++j)
3689	{
3690	/ Build argument list for the vectorized call. /
3691	if (j == `0`)
3692	vargs.create (nelems: nargs * `2`);
3693	else
3694	vargs.truncate (size: `0`);
3695
3696	if (slp_node)
3697	{
3698	vec<tree> vec_oprnds0;
3699
3700	vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3701	vec_oprnds0 = vec_defs [`0`];
3702
3703	/ Arguments are ready. Create the new vector stmt. /
3704	for (i = `0`; vec_oprnds0.iterate (ix: i, ptr: &vec_oprnd0); i += `2`)
3705	{
3706	size_t k;
3707	vargs.truncate (size: `0`);
3708	for (k = `0`; k < nargs; k++)
3709	{
3710	vec<tree> vec_oprndsk = vec_defs [k];
3711	vargs.quick_push (obj: vec_oprndsk [i]);
3712	vargs.quick_push (obj: vec_oprndsk [i + `1`]);
3713	}
3714	gcall *call;
3715	if (ifn != IFN_LAST)
3716	call = gimple_build_call_internal_vec (ifn, vargs);
3717	else
3718	call = gimple_build_call_vec (fndecl, vargs);
3719	new_temp = make_ssa_name (var: vec_dest, stmt: call);
3720	gimple_call_set_lhs (gs: call, lhs: new_temp);
3721	gimple_call_set_nothrow (s: call, nothrow_p: true);
3722	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
3723	slp_node->push_vec_def (def: call);
3724	}
3725	continue;
3726	}
3727
3728	for (i = `0`; i < nargs; i++)
3729	{
3730	op = gimple_call_arg (gs: stmt, index: i);
3731	if (j == `0`)
3732	{
3733	vec_defs.quick_push (obj: vNULL);
3734	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies: `2` * ncopies,
3735	op, vec_oprnds: &vec_defs [i], vectype: vectypes[i]);
3736	}
3737	vec_oprnd0 = vec_defs [i][`2`*j];
3738	vec_oprnd1 = vec_defs [i][`2`*j+`1`];
3739
3740	vargs.quick_push (obj: vec_oprnd0);
3741	vargs.quick_push (obj: vec_oprnd1);
3742	}
3743
3744	gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3745	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
3746	gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
3747	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
3748
3749	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
3750	}
3751
3752	if (!slp_node)
3753	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
3754
3755	for (i = `0`; i < nargs; i++)
3756	{
3757	vec<tree> vec_oprndsi = vec_defs [i];
3758	vec_oprndsi.release ();
3759	}
3760	}
3761	else
3762	/ No current target implements this case. /
3763	return false;
3764
3765	vargs.release ();
3766
3767	/ The call in STMT might prevent it from being removed in dce.*
3768	We however cannot remove it here, due to the way the ssa name
3769	it defines is mapped to the new definition. So just replace
3770	rhs of the statement with something harmless. /*
3771
3772	if (slp_node)
3773	return true;
3774
3775	stmt_info = vect_orig_stmt (stmt_info);
3776	lhs = gimple_get_lhs (stmt_info->stmt);
3777
3778	gassign *new_stmt
3779	= gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3780	vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3781
3782	return true;
3783	}
3784
3785
3786	struct simd_call_arg_info
3787	{
3788	tree vectype;
3789	tree op;
3790	HOST_WIDE_INT linear_step;
3791	enum vect_def_type dt;
3792	unsigned int align;
3793	bool simd_lane_linear;
3794	};
3795
3796	/ Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,*
3797	is linear within simd lane (but not within whole loop), note it in
3798	ARGINFO. /
3799
3800	static void
3801	vect_simd_lane_linear (tree op, class loop *loop,
3802	struct simd_call_arg_info *arginfo)
3803	{
3804	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3805
3806	if (!is_gimple_assign (gs: def_stmt)
3807	\|\| gimple_assign_rhs_code (gs: def_stmt) != POINTER_PLUS_EXPR
3808	\|\| !is_gimple_min_invariant (gimple_assign_rhs1 (gs: def_stmt)))
3809	return;
3810
3811	tree base = gimple_assign_rhs1 (gs: def_stmt);
3812	HOST_WIDE_INT linear_step = `0`;
3813	tree v = gimple_assign_rhs2 (gs: def_stmt);
3814	while (TREE_CODE (v) == SSA_NAME)
3815	{
3816	tree t;
3817	def_stmt = SSA_NAME_DEF_STMT (v);
3818	if (is_gimple_assign (gs: def_stmt))
3819	switch (gimple_assign_rhs_code (gs: def_stmt))
3820	{
3821	case PLUS_EXPR:
3822	t = gimple_assign_rhs2 (gs: def_stmt);
3823	if (linear_step \|\| TREE_CODE (t) != INTEGER_CST)
3824	return;
3825	base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3826	v = gimple_assign_rhs1 (gs: def_stmt);
3827	continue;
3828	case MULT_EXPR:
3829	t = gimple_assign_rhs2 (gs: def_stmt);
3830	if (linear_step \|\| !tree_fits_shwi_p (t) \|\| integer_zerop (t))
3831	return;
3832	linear_step = tree_to_shwi (t);
3833	v = gimple_assign_rhs1 (gs: def_stmt);
3834	continue;
3835	CASE_CONVERT:
3836	t = gimple_assign_rhs1 (gs: def_stmt);
3837	if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3838	\|\| (TYPE_PRECISION (TREE_TYPE (v))
3839	< TYPE_PRECISION (TREE_TYPE (t))))
3840	return;
3841	if (!linear_step)
3842	linear_step = `1`;
3843	v = t;
3844	continue;
3845	default:
3846	return;
3847	}
3848	else if (gimple_call_internal_p (gs: def_stmt, fn: IFN_GOMP_SIMD_LANE)
3849	&& loop->simduid
3850	&& TREE_CODE (gimple_call_arg (def_stmt, `0`)) == SSA_NAME
3851	&& (SSA_NAME_VAR (gimple_call_arg (def_stmt, `0`))
3852	== loop->simduid))
3853	{
3854	if (!linear_step)
3855	linear_step = `1`;
3856	arginfo->linear_step = linear_step;
3857	arginfo->op = base;
3858	arginfo->simd_lane_linear = true;
3859	return;
3860	}
3861	}
3862	}
3863
3864	/ Function vectorizable_simd_clone_call.*
3865
3866	Check if STMT_INFO performs a function call that can be vectorized
3867	by calling a simd clone of the function.
3868	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3869	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3870	Return true if STMT_INFO is vectorizable in this way. /*
3871
3872	static bool
3873	vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3874	gimple_stmt_iterator *gsi,
3875	gimple **vec_stmt, slp_tree slp_node,
3876	stmt_vector_for_cost *)
3877	{
3878	tree vec_dest;
3879	tree scalar_dest;
3880	tree op, type;
3881	tree vec_oprnd0 = NULL_TREE;
3882	tree vectype;
3883	poly_uint64 nunits;
3884	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3885	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
3886	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3887	tree fndecl, new_temp;
3888	int ncopies, j;
3889	auto_vec<simd_call_arg_info> arginfo;
3890	vec<tree> vargs = vNULL;
3891	size_t i, nargs;
3892	tree lhs, rtype, ratype;
3893	vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3894	int masked_call_offset = `0`;
3895
3896	/ Is STMT a vectorizable call? /
3897	gcall stmt = dyn_cast <gcall > (p: stmt_info->stmt);
3898	if (!stmt)
3899	return false;
3900
3901	fndecl = gimple_call_fndecl (gs: stmt);
3902	if (fndecl == NULL_TREE
3903	&& gimple_call_internal_p (gs: stmt, fn: IFN_MASK_CALL))
3904	{
3905	fndecl = gimple_call_arg (gs: stmt, index: `0`);
3906	gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3907	fndecl = TREE_OPERAND (fndecl, `0`);
3908	gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3909	masked_call_offset = `1`;
3910	}
3911	if (fndecl == NULL_TREE)
3912	return false;
3913
3914	struct cgraph_node *node = cgraph_node::get (decl: fndecl);
3915	if (node == NULL \|\| node->simd_clones == NULL)
3916	return false;
3917
3918	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3919	return false;
3920
3921	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3922	&& ! vec_stmt)
3923	return false;
3924
3925	if (gimple_call_lhs (gs: stmt)
3926	&& TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3927	return false;
3928
3929	gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3930
3931	vectype = STMT_VINFO_VECTYPE (stmt_info);
3932
3933	if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3934	return false;
3935
3936	/ Process function arguments. /
3937	nargs = gimple_call_num_args (gs: stmt) - masked_call_offset;
3938
3939	/ Bail out if the function has zero arguments. /
3940	if (nargs == `0`)
3941	return false;
3942
3943	vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3944	: STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3945	arginfo.reserve (nelems: nargs, exact: true);
3946	auto_vec<slp_tree> slp_op;
3947	slp_op.safe_grow_cleared (len: nargs);
3948
3949	for (i = `0`; i < nargs; i++)
3950	{
3951	simd_call_arg_info thisarginfo;
3952	affine_iv iv;
3953
3954	thisarginfo.linear_step = `0`;
3955	thisarginfo.align = `0`;
3956	thisarginfo.op = NULL_TREE;
3957	thisarginfo.simd_lane_linear = false;
3958
3959	int op_no = i + masked_call_offset;
3960	if (slp_node)
3961	op_no = vect_slp_child_index_for_operand (stmt, op: op_no, false);
3962	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3963	op_no, &op, &slp_op [i],
3964	&thisarginfo.dt, &thisarginfo.vectype)
3965	\|\| thisarginfo.dt == vect_uninitialized_def)
3966	{
3967	if (dump_enabled_p ())
3968	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3969	"use not simple.\n");
3970	return false;
3971	}
3972
3973	if (thisarginfo.dt == vect_constant_def
3974	\|\| thisarginfo.dt == vect_external_def)
3975	{
3976	/ With SLP we determine the vector type of constants/externals*
3977	at analysis time, handling conflicts via
3978	vect_maybe_update_slp_op_vectype. At transform time
3979	we have a vector type recorded for SLP. /*
3980	gcc_assert (!vec_stmt
3981	\|\| !slp_node
3982	\|\| thisarginfo.vectype != NULL_TREE);
3983	if (!vec_stmt)
3984	thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3985	TREE_TYPE (op),
3986	slp_node);
3987	}
3988	else
3989	gcc_assert (thisarginfo.vectype != NULL_TREE);
3990
3991	/ For linear arguments, the analyze phase should have saved*
3992	the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. /*
3993	if (i * `3` + `4` <= simd_clone_info.length ()
3994	&& simd_clone_info [i * `3` + `2`])
3995	{
3996	gcc_assert (vec_stmt);
3997	thisarginfo.linear_step = tree_to_shwi (simd_clone_info [i * `3` + `2`]);
3998	thisarginfo.op = simd_clone_info [i * `3` + `1`];
3999	thisarginfo.simd_lane_linear
4000	= (simd_clone_info [i * `3` + `3`] == boolean_true_node);
4001	/ If loop has been peeled for alignment, we need to adjust it. /
4002	tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4003	tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4004	if (n1 != n2 && !thisarginfo.simd_lane_linear)
4005	{
4006	tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4007	tree step = simd_clone_info [i * `3` + `2`];
4008	tree opt = TREE_TYPE (thisarginfo.op);
4009	bias = fold_convert (TREE_TYPE (step), bias);
4010	bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4011	thisarginfo.op
4012	= fold_build2 (POINTER_TYPE_P (opt)
4013	? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4014	thisarginfo.op, bias);
4015	}
4016	}
4017	else if (!vec_stmt
4018	&& thisarginfo.dt != vect_constant_def
4019	&& thisarginfo.dt != vect_external_def
4020	&& loop_vinfo
4021	&& TREE_CODE (op) == SSA_NAME
4022	&& simple_iv (loop, loop_containing_stmt (stmt), op,
4023	&iv, false)
4024	&& tree_fits_shwi_p (iv.step))
4025	{
4026	thisarginfo.linear_step = tree_to_shwi (iv.step);
4027	thisarginfo.op = iv.base;
4028	}
4029	else if ((thisarginfo.dt == vect_constant_def
4030	\|\| thisarginfo.dt == vect_external_def)
4031	&& POINTER_TYPE_P (TREE_TYPE (op)))
4032	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4033	/ Addresses of array elements indexed by GOMP_SIMD_LANE are*
4034	linear too. /*
4035	if (POINTER_TYPE_P (TREE_TYPE (op))
4036	&& !thisarginfo.linear_step
4037	&& !vec_stmt
4038	&& thisarginfo.dt != vect_constant_def
4039	&& thisarginfo.dt != vect_external_def
4040	&& loop_vinfo
4041	&& TREE_CODE (op) == SSA_NAME)
4042	vect_simd_lane_linear (op, loop, arginfo: &thisarginfo);
4043
4044	arginfo.quick_push (obj: thisarginfo);
4045	}
4046
4047	poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : `1`;
4048	unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : `1`;
4049	unsigned int badness = `0`;
4050	struct cgraph_node *bestn = NULL;
4051	if (simd_clone_info.exists ())
4052	bestn = cgraph_node::get (decl: simd_clone_info [`0`]);
4053	else
4054	for (struct cgraph_node *n = node->simd_clones; n != NULL;
4055	n = n->simdclone->next_clone)
4056	{
4057	unsigned int this_badness = `0`;
4058	unsigned int num_calls;
4059	/ The number of arguments in the call and the number of parameters in*
4060	the simdclone should match. However, when the simdclone is
4061	'inbranch', it could have one more paramater than nargs when using
4062	an inbranch simdclone to call a non-inbranch call, either in a
4063	non-masked loop using a all true constant mask, or inside a masked
4064	loop using it's mask. /*
4065	size_t simd_nargs = n->simdclone->nargs;
4066	if (!masked_call_offset && n->simdclone->inbranch)
4067	simd_nargs--;
4068	if (!constant_multiple_p (a: vf * group_size, b: n->simdclone->simdlen,
4069	multiple: &num_calls)
4070	\|\| (!n->simdclone->inbranch && (masked_call_offset > `0`))
4071	\|\| (nargs != simd_nargs))
4072	continue;
4073	if (num_calls != `1`)
4074	this_badness += floor_log2 (x: num_calls) * `4096`;
4075	if (n->simdclone->inbranch)
4076	this_badness += `8192`;
4077	int target_badness = targetm.simd_clone.usable (n);
4078	if (target_badness < `0`)
4079	continue;
4080	this_badness += target_badness * `512`;
4081	for (i = `0`; i < nargs; i++)
4082	{
4083	switch (n->simdclone->args[i].arg_type)
4084	{
4085	case SIMD_CLONE_ARG_TYPE_VECTOR:
4086	if (!useless_type_conversion_p
4087	(n->simdclone->args[i].orig_type,
4088	TREE_TYPE (gimple_call_arg (stmt,
4089	i + masked_call_offset))))
4090	i = -`1`;
4091	else if (arginfo [i].dt == vect_constant_def
4092	\|\| arginfo [i].dt == vect_external_def
4093	\|\| arginfo [i].linear_step)
4094	this_badness += `64`;
4095	break;
4096	case SIMD_CLONE_ARG_TYPE_UNIFORM:
4097	if (arginfo [i].dt != vect_constant_def
4098	&& arginfo [i].dt != vect_external_def)
4099	i = -`1`;
4100	break;
4101	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4102	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4103	if (arginfo [i].dt == vect_constant_def
4104	\|\| arginfo [i].dt == vect_external_def
4105	\|\| (arginfo [i].linear_step
4106	!= n->simdclone->args[i].linear_step))
4107	i = -`1`;
4108	break;
4109	case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4110	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4111	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4112	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4113	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4114	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4115	/ FORNOW /
4116	i = -`1`;
4117	break;
4118	case SIMD_CLONE_ARG_TYPE_MASK:
4119	/ While we can create a traditional data vector from*
4120	an incoming integer mode mask we have no good way to
4121	force generate an integer mode mask from a traditional
4122	boolean vector input. /*
4123	if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4124	&& !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4125	i = -`1`;
4126	else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4127	&& SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4128	this_badness += `2048`;
4129	break;
4130	}
4131	if (i == (size_t) -`1`)
4132	break;
4133	if (n->simdclone->args[i].alignment > arginfo [i].align)
4134	{
4135	i = -`1`;
4136	break;
4137	}
4138	if (arginfo [i].align)
4139	this_badness += (exact_log2 (x: arginfo [i].align)
4140	- exact_log2 (x: n->simdclone->args[i].alignment));
4141	}
4142	if (i == (size_t) -`1`)
4143	continue;
4144	if (masked_call_offset == `0`
4145	&& n->simdclone->inbranch
4146	&& n->simdclone->nargs > nargs)
4147	{
4148	gcc_assert (n->simdclone->args[n->simdclone->nargs - `1`].arg_type ==
4149	SIMD_CLONE_ARG_TYPE_MASK);
4150	/ Penalize using a masked SIMD clone in a non-masked loop, that is*
4151	not in a branch, as we'd have to construct an all-true mask. /*
4152	if (!loop_vinfo \|\| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4153	this_badness += `64`;
4154	}
4155	if (bestn == NULL \|\| this_badness < badness)
4156	{
4157	bestn = n;
4158	badness = this_badness;
4159	}
4160	}
4161
4162	if (bestn == NULL)
4163	return false;
4164
4165	unsigned int num_mask_args = `0`;
4166	if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4167	for (i = `0`; i < nargs; i++)
4168	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4169	num_mask_args++;
4170
4171	for (i = `0`; i < nargs; i++)
4172	{
4173	if ((arginfo [i].dt == vect_constant_def
4174	\|\| arginfo [i].dt == vect_external_def)
4175	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4176	{
4177	tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4178	i + masked_call_offset));
4179	arginfo [i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4180	slp_node);
4181	if (arginfo [i].vectype == NULL
4182	\|\| !constant_multiple_p (a: bestn->simdclone->simdlen,
4183	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4184	return false;
4185	}
4186
4187	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4188	&& VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4189	{
4190	if (dump_enabled_p ())
4191	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4192	"vector mask arguments are not supported.\n");
4193	return false;
4194	}
4195
4196	if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4197	{
4198	tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4199	if (bestn->simdclone->mask_mode == VOIDmode)
4200	{
4201	if (maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: clone_arg_vectype),
4202	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4203	{
4204	/ FORNOW we only have partial support for vector-type masks*
4205	that can't hold all of simdlen. /*
4206	if (dump_enabled_p ())
4207	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4208	vect_location,
4209	"in-branch vector clones are not yet"
4210	" supported for mismatched vector sizes.\n");
4211	return false;
4212	}
4213	if (!expand_vec_cond_expr_p (clone_arg_vectype,
4214	arginfo [i].vectype, ERROR_MARK))
4215	{
4216	if (dump_enabled_p ())
4217	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4218	vect_location,
4219	"cannot compute mask argument for"
4220	" in-branch vector clones.\n");
4221	return false;
4222	}
4223	}
4224	else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4225	{
4226	if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4227	\|\| maybe_ne (a: exact_div (a: bestn->simdclone->simdlen,
4228	b: num_mask_args),
4229	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4230	{
4231	/ FORNOW we only have partial support for integer-type masks*
4232	that represent the same number of lanes as the
4233	vectorized mask inputs. /*
4234	if (dump_enabled_p ())
4235	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4236	vect_location,
4237	"in-branch vector clones are not yet "
4238	"supported for mismatched vector sizes.\n");
4239	return false;
4240	}
4241	}
4242	else
4243	{
4244	if (dump_enabled_p ())
4245	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4246	vect_location,
4247	"in-branch vector clones not supported"
4248	" on this target.\n");
4249	return false;
4250	}
4251	}
4252	}
4253
4254	fndecl = bestn->decl;
4255	nunits = bestn->simdclone->simdlen;
4256	if (slp_node)
4257	ncopies = vector_unroll_factor (vf * group_size, nunits);
4258	else
4259	ncopies = vector_unroll_factor (vf, nunits);
4260
4261	/ If the function isn't const, only allow it in simd loops where user*
4262	has asserted that at least nunits consecutive iterations can be
4263	performed using SIMD instructions. /*
4264	if ((loop == NULL \|\| maybe_lt (a: (unsigned) loop->safelen, b: nunits))
4265	&& gimple_vuse (g: stmt))
4266	return false;
4267
4268	/ Sanity check: make sure that at least one copy of the vectorized stmt*
4269	needs to be generated. /*
4270	gcc_assert (ncopies >= `1`);
4271
4272	if (!vec_stmt) / transformation not required. /
4273	{
4274	if (slp_node)
4275	for (unsigned i = `0`; i < nargs; ++i)
4276	if (!vect_maybe_update_slp_op_vectype (slp_op [i], arginfo [i].vectype))
4277	{
4278	if (dump_enabled_p ())
4279	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4280	"incompatible vector types for invariants\n");
4281	return false;
4282	}
4283	/ When the original call is pure or const but the SIMD ABI dictates*
4284	an aggregate return we will have to use a virtual definition and
4285	in a loop eventually even need to add a virtual PHI. That's
4286	not straight-forward so allow to fix this up via renaming. /*
4287	if (gimple_call_lhs (gs: stmt)
4288	&& !gimple_vdef (g: stmt)
4289	&& TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4290	vinfo->any_known_not_updated_vssa = true;
4291	/ ??? For SLP code-gen we end up inserting after the last*
4292	vector argument def rather than at the original call position
4293	so automagic virtual operand updating doesn't work. /*
4294	if (gimple_vuse (g: stmt) && slp_node)
4295	vinfo->any_known_not_updated_vssa = true;
4296	simd_clone_info.safe_push (obj: bestn->decl);
4297	for (i = `0`; i < bestn->simdclone->nargs; i++)
4298	{
4299	switch (bestn->simdclone->args[i].arg_type)
4300	{
4301	default:
4302	continue;
4303	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4304	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4305	{
4306	simd_clone_info.safe_grow_cleared (len: i * `3` + `1`, exact: true);
4307	simd_clone_info.safe_push (obj: arginfo [i].op);
4308	tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4309	? size_type_node : TREE_TYPE (arginfo[i].op);
4310	tree ls = build_int_cst (lst, arginfo [i].linear_step);
4311	simd_clone_info.safe_push (obj: ls);
4312	tree sll = arginfo [i].simd_lane_linear
4313	? boolean_true_node : boolean_false_node;
4314	simd_clone_info.safe_push (obj: sll);
4315	}
4316	break;
4317	case SIMD_CLONE_ARG_TYPE_MASK:
4318	if (loop_vinfo
4319	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4320	vect_record_loop_mask (loop_vinfo,
4321	&LOOP_VINFO_MASKS (loop_vinfo),
4322	ncopies, vectype, op);
4323
4324	break;
4325	}
4326	}
4327
4328	if (!bestn->simdclone->inbranch && loop_vinfo)
4329	{
4330	if (dump_enabled_p ()
4331	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4332	dump_printf_loc (MSG_NOTE, vect_location,
4333	"can't use a fully-masked loop because a"
4334	" non-masked simd clone was selected.\n");
4335	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4336	}
4337
4338	STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4339	DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4340	/ vect_model_simple_cost (vinfo, stmt_info, ncopies,*
4341	dt, slp_node, cost_vec); /*
4342	return true;
4343	}
4344
4345	/ Transform. /
4346
4347	if (dump_enabled_p ())
4348	dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4349
4350	/ Handle def. /
4351	scalar_dest = gimple_call_lhs (gs: stmt);
4352	vec_dest = NULL_TREE;
4353	rtype = NULL_TREE;
4354	ratype = NULL_TREE;
4355	if (scalar_dest)
4356	{
4357	vec_dest = vect_create_destination_var (scalar_dest, vectype);
4358	rtype = TREE_TYPE (TREE_TYPE (fndecl));
4359	if (TREE_CODE (rtype) == ARRAY_TYPE)
4360	{
4361	ratype = rtype;
4362	rtype = TREE_TYPE (ratype);
4363	}
4364	}
4365
4366	auto_vec<vec<tree> > vec_oprnds;
4367	auto_vec<unsigned> vec_oprnds_i;
4368	vec_oprnds_i.safe_grow_cleared (len: nargs, exact: true);
4369	if (slp_node)
4370	{
4371	vec_oprnds.reserve_exact (nelems: nargs);
4372	vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4373	}
4374	else
4375	vec_oprnds.safe_grow_cleared (len: nargs, exact: true);
4376	for (j = `0`; j < ncopies; ++j)
4377	{
4378	poly_uint64 callee_nelements;
4379	poly_uint64 caller_nelements;
4380	/ Build argument list for the vectorized call. /
4381	if (j == `0`)
4382	vargs.create (nelems: nargs);
4383	else
4384	vargs.truncate (size: `0`);
4385
4386	for (i = `0`; i < nargs; i++)
4387	{
4388	unsigned int k, l, m, o;
4389	tree atype;
4390	op = gimple_call_arg (gs: stmt, index: i + masked_call_offset);
4391	switch (bestn->simdclone->args[i].arg_type)
4392	{
4393	case SIMD_CLONE_ARG_TYPE_VECTOR:
4394	atype = bestn->simdclone->args[i].vector_type;
4395	caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype);
4396	callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4397	o = vector_unroll_factor (nunits, callee_nelements);
4398	for (m = j * o; m < (j + `1`) * o; m++)
4399	{
4400	if (known_lt (callee_nelements, caller_nelements))
4401	{
4402	poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4403	if (!constant_multiple_p (a: caller_nelements,
4404	b: callee_nelements, multiple: &k))
4405	gcc_unreachable ();
4406
4407	gcc_assert ((k & (k - `1`)) == `0`);
4408	if (m == `0`)
4409	{
4410	if (!slp_node)
4411	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4412	ncopies: ncopies * o / k, op,
4413	vec_oprnds: &vec_oprnds [i]);
4414	vec_oprnds_i [i] = `0`;
4415	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4416	}
4417	else
4418	{
4419	vec_oprnd0 = arginfo [i].op;
4420	if ((m & (k - `1`)) == `0`)
4421	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4422	}
4423	arginfo [i].op = vec_oprnd0;
4424	vec_oprnd0
4425	= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4426	bitsize_int (prec),
4427	bitsize_int ((m & (k - `1`)) * prec));
4428	gassign *new_stmt
4429	= gimple_build_assign (make_ssa_name (var: atype),
4430	vec_oprnd0);
4431	vect_finish_stmt_generation (vinfo, stmt_info,
4432	vec_stmt: new_stmt, gsi);
4433	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4434	}
4435	else
4436	{
4437	if (!constant_multiple_p (a: callee_nelements,
4438	b: caller_nelements, multiple: &k))
4439	gcc_unreachable ();
4440	gcc_assert ((k & (k - `1`)) == `0`);
4441	vec<constructor_elt, va_gc> *ctor_elts;
4442	if (k != `1`)
4443	vec_alloc (v&: ctor_elts, nelems: k);
4444	else
4445	ctor_elts = NULL;
4446	for (l = `0`; l < k; l++)
4447	{
4448	if (m == `0` && l == `0`)
4449	{
4450	if (!slp_node)
4451	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4452	ncopies: k * o * ncopies,
4453	op,
4454	vec_oprnds: &vec_oprnds [i]);
4455	vec_oprnds_i [i] = `0`;
4456	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4457	}
4458	else
4459	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4460	arginfo [i].op = vec_oprnd0;
4461	if (k == `1`)
4462	break;
4463	CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4464	vec_oprnd0);
4465	}
4466	if (k == `1`)
4467	if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4468	atype))
4469	{
4470	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4471	vec_oprnd0);
4472	gassign *new_stmt
4473	= gimple_build_assign (make_ssa_name (var: atype),
4474	vec_oprnd0);
4475	vect_finish_stmt_generation (vinfo, stmt_info,
4476	vec_stmt: new_stmt, gsi);
4477	vargs.safe_push (obj: gimple_get_lhs (new_stmt));
4478	}
4479	else
4480	vargs.safe_push (obj: vec_oprnd0);
4481	else
4482	{
4483	vec_oprnd0 = build_constructor (atype, ctor_elts);
4484	gassign *new_stmt
4485	= gimple_build_assign (make_ssa_name (var: atype),
4486	vec_oprnd0);
4487	vect_finish_stmt_generation (vinfo, stmt_info,
4488	vec_stmt: new_stmt, gsi);
4489	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4490	}
4491	}
4492	}
4493	break;
4494	case SIMD_CLONE_ARG_TYPE_MASK:
4495	if (bestn->simdclone->mask_mode == VOIDmode)
4496	{
4497	atype = bestn->simdclone->args[i].vector_type;
4498	tree elt_type = TREE_TYPE (atype);
4499	tree one = fold_convert (elt_type, integer_one_node);
4500	tree zero = fold_convert (elt_type, integer_zero_node);
4501	callee_nelements = TYPE_VECTOR_SUBPARTS (node: atype);
4502	caller_nelements = TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype);
4503	o = vector_unroll_factor (nunits, callee_nelements);
4504	for (m = j * o; m < (j + `1`) * o; m++)
4505	{
4506	if (maybe_lt (a: callee_nelements, b: caller_nelements))
4507	{
4508	/ The mask type has fewer elements than simdlen. /
4509
4510	/ FORNOW /
4511	gcc_unreachable ();
4512	}
4513	else if (known_eq (callee_nelements, caller_nelements))
4514	{
4515	/ The SIMD clone function has the same number of*
4516	elements as the current function. /*
4517	if (m == `0`)
4518	{
4519	if (!slp_node)
4520	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4521	ncopies: o * ncopies,
4522	op,
4523	vec_oprnds: &vec_oprnds [i]);
4524	vec_oprnds_i [i] = `0`;
4525	}
4526	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4527	if (loop_vinfo
4528	&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4529	{
4530	vec_loop_masks *loop_masks
4531	= &LOOP_VINFO_MASKS (loop_vinfo);
4532	tree loop_mask
4533	= vect_get_loop_mask (loop_vinfo, gsi,
4534	loop_masks, ncopies,
4535	vectype, j);
4536	vec_oprnd0
4537	= prepare_vec_mask (loop_vinfo,
4538	TREE_TYPE (loop_mask),
4539	loop_mask, vec_mask: vec_oprnd0,
4540	gsi);
4541	loop_vinfo->vec_cond_masked_set.add (k: { vec_oprnd0,
4542	loop_mask });
4543
4544	}
4545	vec_oprnd0
4546	= build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4547	build_vector_from_val (atype, one),
4548	build_vector_from_val (atype, zero));
4549	gassign *new_stmt
4550	= gimple_build_assign (make_ssa_name (var: atype),
4551	vec_oprnd0);
4552	vect_finish_stmt_generation (vinfo, stmt_info,
4553	vec_stmt: new_stmt, gsi);
4554	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4555	}
4556	else
4557	{
4558	/ The mask type has more elements than simdlen. /
4559
4560	/ FORNOW /
4561	gcc_unreachable ();
4562	}
4563	}
4564	}
4565	else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4566	{
4567	atype = bestn->simdclone->args[i].vector_type;
4568	/ Guess the number of lanes represented by atype. /
4569	poly_uint64 atype_subparts
4570	= exact_div (a: bestn->simdclone->simdlen,
4571	b: num_mask_args);
4572	o = vector_unroll_factor (nunits, atype_subparts);
4573	for (m = j * o; m < (j + `1`) * o; m++)
4574	{
4575	if (m == `0`)
4576	{
4577	if (!slp_node)
4578	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
4579	ncopies: o * ncopies,
4580	op,
4581	vec_oprnds: &vec_oprnds [i]);
4582	vec_oprnds_i [i] = `0`;
4583	}
4584	if (maybe_lt (a: atype_subparts,
4585	b: TYPE_VECTOR_SUBPARTS (node: arginfo [i].vectype)))
4586	{
4587	/ The mask argument has fewer elements than the*
4588	input vector. /*
4589	/ FORNOW /
4590	gcc_unreachable ();
4591	}
4592	else if (known_eq (atype_subparts,
4593	TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4594	{
4595	/ The vector mask argument matches the input*
4596	in the number of lanes, but not necessarily
4597	in the mode. /*
4598	vec_oprnd0 = vec_oprnds [i][vec_oprnds_i [i]++];
4599	tree st = lang_hooks.types.type_for_mode
4600	(TYPE_MODE (TREE_TYPE (vec_oprnd0)), `1`);
4601	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4602	vec_oprnd0);
4603	gassign *new_stmt
4604	= gimple_build_assign (make_ssa_name (var: st),
4605	vec_oprnd0);
4606	vect_finish_stmt_generation (vinfo, stmt_info,
4607	vec_stmt: new_stmt, gsi);
4608	if (!types_compatible_p (type1: atype, type2: st))
4609	{
4610	new_stmt
4611	= gimple_build_assign (make_ssa_name (var: atype),
4612	NOP_EXPR,
4613	gimple_assign_lhs
4614	(gs: new_stmt));
4615	vect_finish_stmt_generation (vinfo, stmt_info,
4616	vec_stmt: new_stmt, gsi);
4617	}
4618	vargs.safe_push (obj: gimple_assign_lhs (gs: new_stmt));
4619	}
4620	else
4621	{
4622	/ The mask argument has more elements than the*
4623	input vector. /*
4624	/ FORNOW /
4625	gcc_unreachable ();
4626	}
4627	}
4628	}
4629	else
4630	gcc_unreachable ();
4631	break;
4632	case SIMD_CLONE_ARG_TYPE_UNIFORM:
4633	vargs.safe_push (obj: op);
4634	break;
4635	case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4636	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4637	if (j == `0`)
4638	{
4639	gimple_seq stmts;
4640	arginfo [i].op
4641	= force_gimple_operand (unshare_expr (arginfo [i].op),
4642	&stmts, true, NULL_TREE);
4643	if (stmts != NULL)
4644	{
4645	basic_block new_bb;
4646	edge pe = loop_preheader_edge (loop);
4647	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4648	gcc_assert (!new_bb);
4649	}
4650	if (arginfo [i].simd_lane_linear)
4651	{
4652	vargs.safe_push (obj: arginfo [i].op);
4653	break;
4654	}
4655	tree phi_res = copy_ssa_name (var: op);
4656	gphi *new_phi = create_phi_node (phi_res, loop->header);
4657	add_phi_arg (new_phi, arginfo [i].op,
4658	loop_preheader_edge (loop), UNKNOWN_LOCATION);
4659	enum tree_code code
4660	= POINTER_TYPE_P (TREE_TYPE (op))
4661	? POINTER_PLUS_EXPR : PLUS_EXPR;
4662	tree type = POINTER_TYPE_P (TREE_TYPE (op))
4663	? sizetype : TREE_TYPE (op);
4664	poly_widest_int cst
4665	= wi::mul (a: bestn->simdclone->args[i].linear_step,
4666	b: ncopies * nunits);
4667	tree tcst = wide_int_to_tree (type, cst);
4668	tree phi_arg = copy_ssa_name (var: op);
4669	gassign *new_stmt
4670	= gimple_build_assign (phi_arg, code, phi_res, tcst);
4671	gimple_stmt_iterator si = gsi_after_labels (bb: loop->header);
4672	gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4673	add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4674	UNKNOWN_LOCATION);
4675	arginfo [i].op = phi_res;
4676	vargs.safe_push (obj: phi_res);
4677	}
4678	else
4679	{
4680	enum tree_code code
4681	= POINTER_TYPE_P (TREE_TYPE (op))
4682	? POINTER_PLUS_EXPR : PLUS_EXPR;
4683	tree type = POINTER_TYPE_P (TREE_TYPE (op))
4684	? sizetype : TREE_TYPE (op);
4685	poly_widest_int cst
4686	= wi::mul (a: bestn->simdclone->args[i].linear_step,
4687	b: j * nunits);
4688	tree tcst = wide_int_to_tree (type, cst);
4689	new_temp = make_ssa_name (TREE_TYPE (op));
4690	gassign *new_stmt
4691	= gimple_build_assign (new_temp, code,
4692	arginfo [i].op, tcst);
4693	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4694	vargs.safe_push (obj: new_temp);
4695	}
4696	break;
4697	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4698	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4699	case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4700	case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4701	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4702	case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4703	default:
4704	gcc_unreachable ();
4705	}
4706	}
4707
4708	if (masked_call_offset == `0`
4709	&& bestn->simdclone->inbranch
4710	&& bestn->simdclone->nargs > nargs)
4711	{
4712	unsigned long m, o;
4713	size_t mask_i = bestn->simdclone->nargs - `1`;
4714	tree mask;
4715	gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4716	SIMD_CLONE_ARG_TYPE_MASK);
4717
4718	tree masktype = bestn->simdclone->args[mask_i].vector_type;
4719	callee_nelements = TYPE_VECTOR_SUBPARTS (node: masktype);
4720	o = vector_unroll_factor (nunits, callee_nelements);
4721	for (m = j * o; m < (j + `1`) * o; m++)
4722	{
4723	if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4724	{
4725	vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4726	mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4727	ncopies, vectype, j);
4728	}
4729	else
4730	mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4731
4732	gassign *new_stmt;
4733	if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4734	{
4735	/ This means we are dealing with integer mask modes.*
4736	First convert to an integer type with the same size as
4737	the current vector type. /*
4738	unsigned HOST_WIDE_INT intermediate_size
4739	= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4740	tree mid_int_type =
4741	build_nonstandard_integer_type (intermediate_size, `1`);
4742	mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4743	new_stmt
4744	= gimple_build_assign (make_ssa_name (var: mid_int_type),
4745	mask);
4746	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4747	/ Then zero-extend to the mask mode. /
4748	mask = fold_build1 (NOP_EXPR, masktype,
4749	gimple_get_lhs (new_stmt));
4750	}
4751	else if (bestn->simdclone->mask_mode == VOIDmode)
4752	{
4753	tree one = fold_convert (TREE_TYPE (masktype),
4754	integer_one_node);
4755	tree zero = fold_convert (TREE_TYPE (masktype),
4756	integer_zero_node);
4757	mask = build3 (VEC_COND_EXPR, masktype, mask,
4758	build_vector_from_val (masktype, one),
4759	build_vector_from_val (masktype, zero));
4760	}
4761	else
4762	gcc_unreachable ();
4763
4764	new_stmt = gimple_build_assign (make_ssa_name (var: masktype), mask);
4765	vect_finish_stmt_generation (vinfo, stmt_info,
4766	vec_stmt: new_stmt, gsi);
4767	mask = gimple_assign_lhs (gs: new_stmt);
4768	vargs.safe_push (obj: mask);
4769	}
4770	}
4771
4772	gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4773	if (vec_dest)
4774	{
4775	gcc_assert (ratype
4776	\|\| known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4777	if (ratype)
4778	new_temp = create_tmp_var (ratype);
4779	else if (useless_type_conversion_p (vectype, rtype))
4780	new_temp = make_ssa_name (var: vec_dest, stmt: new_call);
4781	else
4782	new_temp = make_ssa_name (var: rtype, stmt: new_call);
4783	gimple_call_set_lhs (gs: new_call, lhs: new_temp);
4784	}
4785	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_call, gsi);
4786	gimple *new_stmt = new_call;
4787
4788	if (vec_dest)
4789	{
4790	if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
4791	{
4792	unsigned int k, l;
4793	poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4794	poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4795	k = vector_unroll_factor (nunits,
4796	TYPE_VECTOR_SUBPARTS (vectype));
4797	gcc_assert ((k & (k - `1`)) == `0`);
4798	for (l = `0`; l < k; l++)
4799	{
4800	tree t;
4801	if (ratype)
4802	{
4803	t = build_fold_addr_expr (new_temp);
4804	t = build2 (MEM_REF, vectype, t,
4805	build_int_cst (TREE_TYPE (t), l * bytes));
4806	}
4807	else
4808	t = build3 (BIT_FIELD_REF, vectype, new_temp,
4809	bitsize_int (prec), bitsize_int (l * prec));
4810	new_stmt = gimple_build_assign (make_ssa_name (var: vectype), t);
4811	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4812
4813	if (j == `0` && l == `0`)
4814	*vec_stmt = new_stmt;
4815	if (slp_node)
4816	SLP_TREE_VEC_DEFS (slp_node)
4817	.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4818	else
4819	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4820	}
4821
4822	if (ratype)
4823	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4824	continue;
4825	}
4826	else if (!multiple_p (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
4827	{
4828	unsigned int k;
4829	if (!constant_multiple_p (a: TYPE_VECTOR_SUBPARTS (node: vectype),
4830	b: TYPE_VECTOR_SUBPARTS (node: rtype), multiple: &k))
4831	gcc_unreachable ();
4832	gcc_assert ((k & (k - `1`)) == `0`);
4833	if ((j & (k - `1`)) == `0`)
4834	vec_alloc (v&: ret_ctor_elts, nelems: k);
4835	if (ratype)
4836	{
4837	unsigned int m, o;
4838	o = vector_unroll_factor (nunits,
4839	TYPE_VECTOR_SUBPARTS (rtype));
4840	for (m = `0`; m < o; m++)
4841	{
4842	tree tem = build4 (ARRAY_REF, rtype, new_temp,
4843	size_int (m), NULL_TREE, NULL_TREE);
4844	new_stmt = gimple_build_assign (make_ssa_name (var: rtype),
4845	tem);
4846	vect_finish_stmt_generation (vinfo, stmt_info,
4847	vec_stmt: new_stmt, gsi);
4848	CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4849	gimple_assign_lhs (new_stmt));
4850	}
4851	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4852	}
4853	else
4854	CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4855	if ((j & (k - `1`)) != k - `1`)
4856	continue;
4857	vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4858	new_stmt
4859	= gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4860	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4861
4862	if ((unsigned) j == k - `1`)
4863	*vec_stmt = new_stmt;
4864	if (slp_node)
4865	SLP_TREE_VEC_DEFS (slp_node)
4866	.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
4867	else
4868	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4869	continue;
4870	}
4871	else if (ratype)
4872	{
4873	tree t = build_fold_addr_expr (new_temp);
4874	t = build2 (MEM_REF, vectype, t,
4875	build_int_cst (TREE_TYPE (t), `0`));
4876	new_stmt = gimple_build_assign (make_ssa_name (var: vec_dest), t);
4877	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4878	vect_clobber_variable (vinfo, stmt_info, gsi, var: new_temp);
4879	}
4880	else if (!useless_type_conversion_p (vectype, rtype))
4881	{
4882	vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4883	new_stmt
4884	= gimple_build_assign (make_ssa_name (var: vec_dest), vec_oprnd0);
4885	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4886	}
4887	}
4888
4889	if (j == `0`)
4890	*vec_stmt = new_stmt;
4891	if (slp_node)
4892	SLP_TREE_VEC_DEFS (slp_node).quick_push (obj: gimple_get_lhs (new_stmt));
4893	else
4894	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
4895	}
4896
4897	for (i = `0`; i < nargs; ++i)
4898	{
4899	vec<tree> oprndsi = vec_oprnds [i];
4900	oprndsi.release ();
4901	}
4902	vargs.release ();
4903
4904	/ Mark the clone as no longer being a candidate for GC. /
4905	bestn->gc_candidate = false;
4906
4907	/ The call in STMT might prevent it from being removed in dce.*
4908	We however cannot remove it here, due to the way the ssa name
4909	it defines is mapped to the new definition. So just replace
4910	rhs of the statement with something harmless. /*
4911
4912	if (slp_node)
4913	return true;
4914
4915	gimple *new_stmt;
4916	if (scalar_dest)
4917	{
4918	type = TREE_TYPE (scalar_dest);
4919	lhs = gimple_call_lhs (gs: vect_orig_stmt (stmt_info)->stmt);
4920	new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4921	}
4922	else
4923	new_stmt = gimple_build_nop ();
4924	vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4925	unlink_stmt_vdef (stmt);
4926
4927	return true;
4928	}
4929
4930
4931	/ Function vect_gen_widened_results_half*
4932
4933	Create a vector stmt whose code, type, number of arguments, and result
4934	variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4935	VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4936	In the case that CODE is a CALL_EXPR, this means that a call to DECL
4937	needs to be created (DECL is a function-decl of a target-builtin).
4938	STMT_INFO is the original scalar stmt that we are vectorizing. /*
4939
4940	static gimple *
4941	vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4942	tree vec_oprnd0, tree vec_oprnd1, int op_type,
4943	tree vec_dest, gimple_stmt_iterator *gsi,
4944	stmt_vec_info stmt_info)
4945	{
4946	gimple *new_stmt;
4947	tree new_temp;
4948
4949	/ Generate half of the widened result: /
4950	if (op_type != binary_op)
4951	vec_oprnd1 = NULL;
4952	new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4953	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4954	gimple_set_lhs (new_stmt, new_temp);
4955	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4956
4957	return new_stmt;
4958	}
4959
4960
4961	/ Create vectorized demotion statements for vector operands from VEC_OPRNDS.*
4962	For multi-step conversions store the resulting vectors and call the function
4963	recursively. When NARROW_SRC_P is true, there's still a conversion after
4964	narrowing, don't store the vectors in the SLP_NODE or in vector info of
4965	the scalar statement(or in STMT_VINFO_RELATED_STMT chain). /*
4966
4967	static void
4968	vect_create_vectorized_demotion_stmts (vec_info vinfo, vec<tree> vec_oprnds,
4969	int multi_step_cvt,
4970	stmt_vec_info stmt_info,
4971	vec<tree> &vec_dsts,
4972	gimple_stmt_iterator *gsi,
4973	slp_tree slp_node, code_helper code,
4974	bool narrow_src_p)
4975	{
4976	unsigned int i;
4977	tree vop0, vop1, new_tmp, vec_dest;
4978
4979	vec_dest = vec_dsts.pop ();
4980
4981	for (i = `0`; i < vec_oprnds->length (); i += `2`)
4982	{
4983	/ Create demotion operation. /
4984	vop0 = (*vec_oprnds)[i];
4985	vop1 = (*vec_oprnds)[i + `1`];
4986	gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4987	new_tmp = make_ssa_name (var: vec_dest, stmt: new_stmt);
4988	gimple_set_lhs (new_stmt, new_tmp);
4989	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
4990	if (multi_step_cvt \|\| narrow_src_p)
4991	/ Store the resulting vector for next recursive call,*
4992	or return the resulting vector_tmp for NARROW FLOAT_EXPR. /*
4993	(*vec_oprnds)[i/`2`] = new_tmp;
4994	else
4995	{
4996	/ This is the last step of the conversion sequence. Store the*
4997	vectors in SLP_NODE or in vector info of the scalar statement
4998	(or in STMT_VINFO_RELATED_STMT chain). /*
4999	if (slp_node)
5000	slp_node->push_vec_def (def: new_stmt);
5001	else
5002	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5003	}
5004	}
5005
5006	/ For multi-step demotion operations we first generate demotion operations*
5007	from the source type to the intermediate types, and then combine the
5008	results (stored in VEC_OPRNDS) in demotion operation to the destination
5009	type. /*
5010	if (multi_step_cvt)
5011	{
5012	/ At each level of recursion we have half of the operands we had at the*
5013	previous level. /*
5014	vec_oprnds->truncate (size: (i+`1`)/`2`);
5015	vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5016	multi_step_cvt: multi_step_cvt - `1`,
5017	stmt_info, vec_dsts, gsi,
5018	slp_node, code: VEC_PACK_TRUNC_EXPR,
5019	narrow_src_p);
5020	}
5021
5022	vec_dsts.quick_push (obj: vec_dest);
5023	}
5024
5025
5026	/ Create vectorized promotion statements for vector operands from VEC_OPRNDS0*
5027	and VEC_OPRNDS1, for a binary operation associated with scalar statement
5028	STMT_INFO. For multi-step conversions store the resulting vectors and
5029	call the function recursively. /*
5030
5031	static void
5032	vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5033	vec<tree> *vec_oprnds0,
5034	vec<tree> *vec_oprnds1,
5035	stmt_vec_info stmt_info, tree vec_dest,
5036	gimple_stmt_iterator *gsi,
5037	code_helper ch1,
5038	code_helper ch2, int op_type)
5039	{
5040	int i;
5041	tree vop0, vop1, new_tmp1, new_tmp2;
5042	gimple new_stmt1, new_stmt2;
5043	vec<tree> vec_tmp = vNULL;
5044
5045	vec_tmp.create (nelems: vec_oprnds0->length () * `2`);
5046	FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5047	{
5048	if (op_type == binary_op)
5049	vop1 = (*vec_oprnds1)[i];
5050	else
5051	vop1 = NULL_TREE;
5052
5053	/ Generate the two halves of promotion operation. /
5054	new_stmt1 = vect_gen_widened_results_half (vinfo, ch: ch1, vec_oprnd0: vop0, vec_oprnd1: vop1,
5055	op_type, vec_dest, gsi,
5056	stmt_info);
5057	new_stmt2 = vect_gen_widened_results_half (vinfo, ch: ch2, vec_oprnd0: vop0, vec_oprnd1: vop1,
5058	op_type, vec_dest, gsi,
5059	stmt_info);
5060	if (is_gimple_call (gs: new_stmt1))
5061	{
5062	new_tmp1 = gimple_call_lhs (gs: new_stmt1);
5063	new_tmp2 = gimple_call_lhs (gs: new_stmt2);
5064	}
5065	else
5066	{
5067	new_tmp1 = gimple_assign_lhs (gs: new_stmt1);
5068	new_tmp2 = gimple_assign_lhs (gs: new_stmt2);
5069	}
5070
5071	/ Store the results for the next step. /
5072	vec_tmp.quick_push (obj: new_tmp1);
5073	vec_tmp.quick_push (obj: new_tmp2);
5074	}
5075
5076	vec_oprnds0->release ();
5077	*vec_oprnds0 = vec_tmp;
5078	}
5079
5080	/ Create vectorized promotion stmts for widening stmts using only half the*
5081	potential vector size for input. /*
5082	static void
5083	vect_create_half_widening_stmts (vec_info *vinfo,
5084	vec<tree> *vec_oprnds0,
5085	vec<tree> *vec_oprnds1,
5086	stmt_vec_info stmt_info, tree vec_dest,
5087	gimple_stmt_iterator *gsi,
5088	code_helper code1,
5089	int op_type)
5090	{
5091	int i;
5092	tree vop0, vop1;
5093	gimple *new_stmt1;
5094	gimple *new_stmt2;
5095	gimple *new_stmt3;
5096	vec<tree> vec_tmp = vNULL;
5097
5098	vec_tmp.create (nelems: vec_oprnds0->length ());
5099	FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5100	{
5101	tree new_tmp1, new_tmp2, new_tmp3, out_type;
5102
5103	gcc_assert (op_type == binary_op);
5104	vop1 = (*vec_oprnds1)[i];
5105
5106	/ Widen the first vector input. /
5107	out_type = TREE_TYPE (vec_dest);
5108	new_tmp1 = make_ssa_name (var: out_type);
5109	new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5110	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt1, gsi);
5111	if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5112	{
5113	/ Widen the second vector input. /
5114	new_tmp2 = make_ssa_name (var: out_type);
5115	new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5116	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt2, gsi);
5117	/ Perform the operation. With both vector inputs widened. /
5118	new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5119	}
5120	else
5121	{
5122	/ Perform the operation. With the single vector input widened. /
5123	new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5124	}
5125
5126	new_tmp3 = make_ssa_name (var: vec_dest, stmt: new_stmt3);
5127	gimple_assign_set_lhs (gs: new_stmt3, lhs: new_tmp3);
5128	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt3, gsi);
5129
5130	/ Store the results for the next step. /
5131	vec_tmp.quick_push (obj: new_tmp3);
5132	}
5133
5134	vec_oprnds0->release ();
5135	*vec_oprnds0 = vec_tmp;
5136	}
5137
5138
5139	/ Check if STMT_INFO performs a conversion operation that can be vectorized.*
5140	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5141	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5142	Return true if STMT_INFO is vectorizable in this way. /*
5143
5144	static bool
5145	vectorizable_conversion (vec_info *vinfo,
5146	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5147	gimple **vec_stmt, slp_tree slp_node,
5148	stmt_vector_for_cost *cost_vec)
5149	{
5150	tree vec_dest, cvt_op = NULL_TREE;
5151	tree scalar_dest;
5152	tree op0, op1 = NULL_TREE;
5153	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5154	tree_code tc1, tc2;
5155	code_helper code, code1, code2;
5156	code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5157	tree new_temp;
5158	enum vect_def_type dt[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
5159	int ndts = `2`;
5160	poly_uint64 nunits_in;
5161	poly_uint64 nunits_out;
5162	tree vectype_out, vectype_in;
5163	int ncopies, i;
5164	tree lhs_type, rhs_type;
5165	/ For conversions between floating point and integer, there're 2 NARROW*
5166	cases. NARROW_SRC is for FLOAT_EXPR, means
5167	integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5168	This is safe when the range of the source integer can fit into the lower
5169	precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5170	floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5171	For other conversions, when there's narrowing, NARROW_DST is used as
5172	default. /*
5173	enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5174	vec<tree> vec_oprnds0 = vNULL;
5175	vec<tree> vec_oprnds1 = vNULL;
5176	tree vop0;
5177	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5178	int multi_step_cvt = `0`;
5179	vec<tree> interm_types = vNULL;
5180	tree intermediate_type, cvt_type = NULL_TREE;
5181	int op_type;
5182	unsigned short fltsz;
5183
5184	/ Is STMT a vectorizable conversion? /
5185
5186	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5187	return false;
5188
5189	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5190	&& ! vec_stmt)
5191	return false;
5192
5193	gimple* stmt = stmt_info->stmt;
5194	if (!(is_gimple_assign (gs: stmt) \|\| is_gimple_call (gs: stmt)))
5195	return false;
5196
5197	if (gimple_get_lhs (stmt) == NULL_TREE
5198	\|\| TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5199	return false;
5200
5201	if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5202	return false;
5203
5204	if (is_gimple_assign (gs: stmt))
5205	{
5206	code = gimple_assign_rhs_code (gs: stmt);
5207	op_type = TREE_CODE_LENGTH ((tree_code) code);
5208	}
5209	else if (gimple_call_internal_p (gs: stmt))
5210	{
5211	code = gimple_call_internal_fn (gs: stmt);
5212	op_type = gimple_call_num_args (gs: stmt);
5213	}
5214	else
5215	return false;
5216
5217	bool widen_arith = (code == WIDEN_MULT_EXPR
5218	\|\| code == WIDEN_LSHIFT_EXPR
5219	\|\| widening_fn_p (code));
5220
5221	if (!widen_arith
5222	&& !CONVERT_EXPR_CODE_P (code)
5223	&& code != FIX_TRUNC_EXPR
5224	&& code != FLOAT_EXPR)
5225	return false;
5226
5227	/ Check types of lhs and rhs. /
5228	scalar_dest = gimple_get_lhs (stmt);
5229	lhs_type = TREE_TYPE (scalar_dest);
5230	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5231
5232	/ Check the operands of the operation. /
5233	slp_tree slp_op0, slp_op1 = NULL;
5234	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5235	`0`, &op0, &slp_op0, &dt[`0`], &vectype_in))
5236	{
5237	if (dump_enabled_p ())
5238	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5239	"use not simple.\n");
5240	return false;
5241	}
5242
5243	rhs_type = TREE_TYPE (op0);
5244	if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5245	&& !((INTEGRAL_TYPE_P (lhs_type)
5246	&& INTEGRAL_TYPE_P (rhs_type))
5247	\|\| (SCALAR_FLOAT_TYPE_P (lhs_type)
5248	&& SCALAR_FLOAT_TYPE_P (rhs_type))))
5249	return false;
5250
5251	if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5252	&& ((INTEGRAL_TYPE_P (lhs_type)
5253	&& !type_has_mode_precision_p (t: lhs_type))
5254	\|\| (INTEGRAL_TYPE_P (rhs_type)
5255	&& !type_has_mode_precision_p (t: rhs_type))))
5256	{
5257	if (dump_enabled_p ())
5258	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5259	"type conversion to/from bit-precision unsupported."
5260	"\n");
5261	return false;
5262	}
5263
5264	if (op_type == binary_op)
5265	{
5266	gcc_assert (code == WIDEN_MULT_EXPR
5267	\|\| code == WIDEN_LSHIFT_EXPR
5268	\|\| widening_fn_p (code));
5269
5270	op1 = is_gimple_assign (gs: stmt) ? gimple_assign_rhs2 (gs: stmt) :
5271	gimple_call_arg (gs: stmt, index: `0`);
5272	tree vectype1_in;
5273	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`,
5274	&op1, &slp_op1, &dt[`1`], &vectype1_in))
5275	{
5276	if (dump_enabled_p ())
5277	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5278	"use not simple.\n");
5279	return false;
5280	}
5281	/ For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of*
5282	OP1. /*
5283	if (!vectype_in)
5284	vectype_in = vectype1_in;
5285	}
5286
5287	/ If op0 is an external or constant def, infer the vector type*
5288	from the scalar type. /*
5289	if (!vectype_in)
5290	vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5291	if (vec_stmt)
5292	gcc_assert (vectype_in);
5293	if (!vectype_in)
5294	{
5295	if (dump_enabled_p ())
5296	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5297	"no vectype for scalar type %T\n", rhs_type);
5298
5299	return false;
5300	}
5301
5302	if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5303	&& !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5304	{
5305	if (dump_enabled_p ())
5306	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5307	"can't convert between boolean and non "
5308	"boolean vectors %T\n", rhs_type);
5309
5310	return false;
5311	}
5312
5313	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype_in);
5314	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
5315	if (known_eq (nunits_out, nunits_in))
5316	if (widen_arith)
5317	modifier = WIDEN;
5318	else
5319	modifier = NONE;
5320	else if (multiple_p (a: nunits_out, b: nunits_in))
5321	modifier = NARROW_DST;
5322	else
5323	{
5324	gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5325	modifier = WIDEN;
5326	}
5327
5328	/ Multiple types in SLP are handled by creating the appropriate number of*
5329	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5330	case of SLP. /*
5331	if (slp_node)
5332	ncopies = `1`;
5333	else if (modifier == NARROW_DST)
5334	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_out);
5335	else
5336	ncopies = vect_get_num_copies (loop_vinfo, vectype: vectype_in);
5337
5338	/ Sanity check: make sure that at least one copy of the vectorized stmt*
5339	needs to be generated. /*
5340	gcc_assert (ncopies >= `1`);
5341
5342	bool found_mode = false;
5343	scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5344	scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5345	opt_scalar_mode rhs_mode_iter;
5346
5347	/ Supportable by target? /
5348	switch (modifier)
5349	{
5350	case NONE:
5351	if (code != FIX_TRUNC_EXPR
5352	&& code != FLOAT_EXPR
5353	&& !CONVERT_EXPR_CODE_P (code))
5354	return false;
5355	gcc_assert (code.is_tree_code ());
5356	if (supportable_convert_operation ((tree_code) code, vectype_out,
5357	vectype_in, &tc1))
5358	{
5359	code1 = tc1;
5360	break;
5361	}
5362
5363	/ For conversions between float and integer types try whether*
5364	we can use intermediate signed integer types to support the
5365	conversion. /*
5366	if (GET_MODE_SIZE (mode: lhs_mode) != GET_MODE_SIZE (mode: rhs_mode)
5367	&& (code == FLOAT_EXPR \|\|
5368	(code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5369	{
5370	bool demotion = GET_MODE_SIZE (mode: rhs_mode) > GET_MODE_SIZE (mode: lhs_mode);
5371	bool float_expr_p = code == FLOAT_EXPR;
5372	unsigned short target_size;
5373	scalar_mode intermediate_mode;
5374	if (demotion)
5375	{
5376	intermediate_mode = lhs_mode;
5377	target_size = GET_MODE_SIZE (mode: rhs_mode);
5378	}
5379	else
5380	{
5381	target_size = GET_MODE_SIZE (mode: lhs_mode);
5382	if (!int_mode_for_size
5383	(size: GET_MODE_BITSIZE (mode: rhs_mode), limit: `0`).exists (mode: &intermediate_mode))
5384	goto unsupported;
5385	}
5386	code1 = float_expr_p ? code : NOP_EXPR;
5387	codecvt1 = float_expr_p ? NOP_EXPR : code;
5388	opt_scalar_mode mode_iter;
5389	FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5390	{
5391	intermediate_mode = mode_iter.require ();
5392
5393	if (GET_MODE_SIZE (mode: intermediate_mode) > target_size)
5394	break;
5395
5396	scalar_mode cvt_mode;
5397	if (!int_mode_for_size
5398	(size: GET_MODE_BITSIZE (mode: intermediate_mode), limit: `0`).exists (mode: &cvt_mode))
5399	break;
5400
5401	cvt_type = build_nonstandard_integer_type
5402	(GET_MODE_BITSIZE (mode: cvt_mode), `0`);
5403
5404	/ Check if the intermediate type can hold OP0's range.*
5405	When converting from float to integer this is not necessary
5406	because values that do not fit the (smaller) target type are
5407	unspecified anyway. /*
5408	if (demotion && float_expr_p)
5409	{
5410	wide_int op_min_value, op_max_value;
5411	if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5412	break;
5413
5414	if (cvt_type == NULL_TREE
5415	\|\| (wi::min_precision (x: op_max_value, sgn: SIGNED)
5416	> TYPE_PRECISION (cvt_type))
5417	\|\| (wi::min_precision (x: op_min_value, sgn: SIGNED)
5418	> TYPE_PRECISION (cvt_type)))
5419	continue;
5420	}
5421
5422	cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5423	/ This should only happened for SLP as long as loop vectorizer*
5424	only supports same-sized vector. /*
5425	if (cvt_type == NULL_TREE
5426	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: cvt_type), b: nunits_in)
5427	\|\| !supportable_convert_operation ((tree_code) code1,
5428	vectype_out,
5429	cvt_type, &tc1)
5430	\|\| !supportable_convert_operation ((tree_code) codecvt1,
5431	cvt_type,
5432	vectype_in, &tc2))
5433	continue;
5434
5435	found_mode = true;
5436	break;
5437	}
5438
5439	if (found_mode)
5440	{
5441	multi_step_cvt++;
5442	interm_types.safe_push (obj: cvt_type);
5443	cvt_type = NULL_TREE;
5444	code1 = tc1;
5445	codecvt1 = tc2;
5446	break;
5447	}
5448	}
5449	/ FALLTHRU /
5450	unsupported:
5451	if (dump_enabled_p ())
5452	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5453	"conversion not supported by target.\n");
5454	return false;
5455
5456	case WIDEN:
5457	if (known_eq (nunits_in, nunits_out))
5458	{
5459	if (!(code.is_tree_code ()
5460	&& supportable_half_widening_operation ((tree_code) code,
5461	vectype_out, vectype_in,
5462	&tc1)))
5463	goto unsupported;
5464	code1 = tc1;
5465	gcc_assert (!(multi_step_cvt && op_type == binary_op));
5466	break;
5467	}
5468	if (supportable_widening_operation (vinfo, code, stmt_info,
5469	vectype_out, vectype_in, &code1,
5470	&code2, &multi_step_cvt,
5471	&interm_types))
5472	{
5473	/ Binary widening operation can only be supported directly by the*
5474	architecture. /*
5475	gcc_assert (!(multi_step_cvt && op_type == binary_op));
5476	break;
5477	}
5478
5479	if (code != FLOAT_EXPR
5480	\|\| GET_MODE_SIZE (mode: lhs_mode) <= GET_MODE_SIZE (mode: rhs_mode))
5481	goto unsupported;
5482
5483	fltsz = GET_MODE_SIZE (mode: lhs_mode);
5484	FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5485	{
5486	rhs_mode = rhs_mode_iter.require ();
5487	if (GET_MODE_SIZE (mode: rhs_mode) > fltsz)
5488	break;
5489
5490	cvt_type
5491	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), `0`);
5492	cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5493	if (cvt_type == NULL_TREE)
5494	goto unsupported;
5495
5496	if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5497	{
5498	tc1 = ERROR_MARK;
5499	gcc_assert (code.is_tree_code ());
5500	if (!supportable_convert_operation ((tree_code) code, vectype_out,
5501	cvt_type, &tc1))
5502	goto unsupported;
5503	codecvt1 = tc1;
5504	}
5505	else if (!supportable_widening_operation (vinfo, code,
5506	stmt_info, vectype_out,
5507	cvt_type, &codecvt1,
5508	&codecvt2, &multi_step_cvt,
5509	&interm_types))
5510	continue;
5511	else
5512	gcc_assert (multi_step_cvt == `0`);
5513
5514	if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5515	cvt_type,
5516	vectype_in, &code1,
5517	&code2, &multi_step_cvt,
5518	&interm_types))
5519	{
5520	found_mode = true;
5521	break;
5522	}
5523	}
5524
5525	if (!found_mode)
5526	goto unsupported;
5527
5528	if (GET_MODE_SIZE (mode: rhs_mode) == fltsz)
5529	codecvt2 = ERROR_MARK;
5530	else
5531	{
5532	multi_step_cvt++;
5533	interm_types.safe_push (obj: cvt_type);
5534	cvt_type = NULL_TREE;
5535	}
5536	break;
5537
5538	case NARROW_DST:
5539	gcc_assert (op_type == unary_op);
5540	if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5541	&code1, &multi_step_cvt,
5542	&interm_types))
5543	break;
5544
5545	if (GET_MODE_SIZE (mode: lhs_mode) >= GET_MODE_SIZE (mode: rhs_mode))
5546	goto unsupported;
5547
5548	if (code == FIX_TRUNC_EXPR)
5549	{
5550	cvt_type
5551	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: rhs_mode), `0`);
5552	cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5553	if (cvt_type == NULL_TREE)
5554	goto unsupported;
5555	if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5556	&tc1))
5557	codecvt1 = tc1;
5558	else
5559	goto unsupported;
5560	if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5561	&code1, &multi_step_cvt,
5562	&interm_types))
5563	break;
5564	}
5565	/ If op0 can be represented with low precision integer,*
5566	truncate it to cvt_type and the do FLOAT_EXPR. /*
5567	else if (code == FLOAT_EXPR)
5568	{
5569	wide_int op_min_value, op_max_value;
5570	if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5571	goto unsupported;
5572
5573	cvt_type
5574	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: lhs_mode), `0`);
5575	if (cvt_type == NULL_TREE
5576	\|\| (wi::min_precision (x: op_max_value, sgn: SIGNED)
5577	> TYPE_PRECISION (cvt_type))
5578	\|\| (wi::min_precision (x: op_min_value, sgn: SIGNED)
5579	> TYPE_PRECISION (cvt_type)))
5580	goto unsupported;
5581
5582	cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5583	if (cvt_type == NULL_TREE)
5584	goto unsupported;
5585	if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5586	&code1, &multi_step_cvt,
5587	&interm_types))
5588	goto unsupported;
5589	if (supportable_convert_operation ((tree_code) code, vectype_out,
5590	cvt_type, &tc1))
5591	{
5592	codecvt1 = tc1;
5593	modifier = NARROW_SRC;
5594	break;
5595	}
5596	}
5597
5598	goto unsupported;
5599
5600	default:
5601	gcc_unreachable ();
5602	}
5603
5604	if (!vec_stmt) / transformation not required. /
5605	{
5606	if (slp_node
5607	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5608	\|\| !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5609	{
5610	if (dump_enabled_p ())
5611	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5612	"incompatible vector types for invariants\n");
5613	return false;
5614	}
5615	DUMP_VECT_SCOPE ("vectorizable_conversion");
5616	if (modifier == NONE)
5617	{
5618	STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5619	vect_model_simple_cost (vinfo, stmt_info,
5620	ncopies: ncopies * (`1` + multi_step_cvt),
5621	dt, ndts, node: slp_node, cost_vec);
5622	}
5623	else if (modifier == NARROW_SRC \|\| modifier == NARROW_DST)
5624	{
5625	STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5626	/ The final packing step produces one vector result per copy. /
5627	unsigned int nvectors
5628	= (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5629	vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5630	pwr: multi_step_cvt, cost_vec,
5631	widen_arith);
5632	}
5633	else
5634	{
5635	STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5636	/ The initial unpacking step produces two vector results*
5637	per copy. MULTI_STEP_CVT is 0 for a single conversion,
5638	so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). /*
5639	unsigned int nvectors
5640	= (slp_node
5641	? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5642	: ncopies * `2`);
5643	vect_model_promotion_demotion_cost (stmt_info, dt, ncopies: nvectors,
5644	pwr: multi_step_cvt, cost_vec,
5645	widen_arith);
5646	}
5647	interm_types.release ();
5648	return true;
5649	}
5650
5651	/ Transform. /
5652	if (dump_enabled_p ())
5653	dump_printf_loc (MSG_NOTE, vect_location,
5654	"transform conversion. ncopies = %d.\n", ncopies);
5655
5656	if (op_type == binary_op)
5657	{
5658	if (CONSTANT_CLASS_P (op0))
5659	op0 = fold_convert (TREE_TYPE (op1), op0);
5660	else if (CONSTANT_CLASS_P (op1))
5661	op1 = fold_convert (TREE_TYPE (op0), op1);
5662	}
5663
5664	/ In case of multi-step conversion, we first generate conversion operations*
5665	to the intermediate types, and then from that types to the final one.
5666	We create vector destinations for the intermediate type (TYPES) received
5667	from supportable__operation, and store them in the correct order*
5668	for future use in vect_create_vectorized__stmts (). /
5669	auto_vec<tree> vec_dsts (multi_step_cvt + `1`);
5670	bool widen_or_narrow_float_p
5671	= cvt_type && (modifier == WIDEN \|\| modifier == NARROW_SRC);
5672	vec_dest = vect_create_destination_var (scalar_dest,
5673	widen_or_narrow_float_p
5674	? cvt_type : vectype_out);
5675	vec_dsts.quick_push (obj: vec_dest);
5676
5677	if (multi_step_cvt)
5678	{
5679	for (i = interm_types.length () - `1`;
5680	interm_types.iterate (ix: i, ptr: &intermediate_type); i--)
5681	{
5682	vec_dest = vect_create_destination_var (scalar_dest,
5683	intermediate_type);
5684	vec_dsts.quick_push (obj: vec_dest);
5685	}
5686	}
5687
5688	if (cvt_type)
5689	vec_dest = vect_create_destination_var (scalar_dest,
5690	widen_or_narrow_float_p
5691	? vectype_out : cvt_type);
5692
5693	int ninputs = `1`;
5694	if (!slp_node)
5695	{
5696	if (modifier == WIDEN)
5697	;
5698	else if (modifier == NARROW_SRC \|\| modifier == NARROW_DST)
5699	{
5700	if (multi_step_cvt)
5701	ninputs = vect_pow2 (x: multi_step_cvt);
5702	ninputs *= `2`;
5703	}
5704	}
5705
5706	switch (modifier)
5707	{
5708	case NONE:
5709	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5710	op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0);
5711	/ vec_dest is intermediate type operand when multi_step_cvt. /
5712	if (multi_step_cvt)
5713	{
5714	cvt_op = vec_dest;
5715	vec_dest = vec_dsts [`0`];
5716	}
5717
5718	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5719	{
5720	/ Arguments are ready, create the new vector stmt. /
5721	gimple* new_stmt;
5722	if (multi_step_cvt)
5723	{
5724	gcc_assert (multi_step_cvt == `1`);
5725	new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5726	new_temp = make_ssa_name (var: cvt_op, stmt: new_stmt);
5727	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
5728	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5729	vop0 = new_temp;
5730	}
5731	new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5732	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5733	gimple_set_lhs (new_stmt, new_temp);
5734	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5735
5736	if (slp_node)
5737	slp_node->push_vec_def (def: new_stmt);
5738	else
5739	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5740	}
5741	break;
5742
5743	case WIDEN:
5744	/ In case the vectorization factor (VF) is bigger than the number*
5745	of elements that we can fit in a vectype (nunits), we have to
5746	generate more than one vector stmt - i.e - we need to "unroll"
5747	the vector stmt by a factor VF/nunits. /*
5748	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5749	op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0,
5750	op1: code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5751	vectype1: vectype_in, vec_oprnds1: &vec_oprnds1);
5752	if (code == WIDEN_LSHIFT_EXPR)
5753	{
5754	int oprnds_size = vec_oprnds0.length ();
5755	vec_oprnds1.create (nelems: oprnds_size);
5756	for (i = `0`; i < oprnds_size; ++i)
5757	vec_oprnds1.quick_push (obj: op1);
5758	}
5759	/ Arguments are ready. Create the new vector stmts. /
5760	for (i = multi_step_cvt; i >= `0`; i--)
5761	{
5762	tree this_dest = vec_dsts [i];
5763	code_helper c1 = code1, c2 = code2;
5764	if (i == `0` && codecvt2 != ERROR_MARK)
5765	{
5766	c1 = codecvt1;
5767	c2 = codecvt2;
5768	}
5769	if (known_eq (nunits_out, nunits_in))
5770	vect_create_half_widening_stmts (vinfo, vec_oprnds0: &vec_oprnds0, vec_oprnds1: &vec_oprnds1,
5771	stmt_info, vec_dest: this_dest, gsi, code1: c1,
5772	op_type);
5773	else
5774	vect_create_vectorized_promotion_stmts (vinfo, vec_oprnds0: &vec_oprnds0,
5775	vec_oprnds1: &vec_oprnds1, stmt_info,
5776	vec_dest: this_dest, gsi,
5777	ch1: c1, ch2: c2, op_type);
5778	}
5779
5780	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5781	{
5782	gimple *new_stmt;
5783	if (cvt_type)
5784	{
5785	new_temp = make_ssa_name (var: vec_dest);
5786	new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5787	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5788	}
5789	else
5790	new_stmt = SSA_NAME_DEF_STMT (vop0);
5791
5792	if (slp_node)
5793	slp_node->push_vec_def (def: new_stmt);
5794	else
5795	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5796	}
5797	break;
5798
5799	case NARROW_SRC:
5800	case NARROW_DST:
5801	/ In case the vectorization factor (VF) is bigger than the number*
5802	of elements that we can fit in a vectype (nunits), we have to
5803	generate more than one vector stmt - i.e - we need to "unroll"
5804	the vector stmt by a factor VF/nunits. /*
5805	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: ncopies * ninputs,
5806	op0, vectype0: vectype_in, vec_oprnds0: &vec_oprnds0);
5807	/ Arguments are ready. Create the new vector stmts. /
5808	if (cvt_type && modifier == NARROW_DST)
5809	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5810	{
5811	new_temp = make_ssa_name (var: vec_dest);
5812	gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5813	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5814	vec_oprnds0 [i] = new_temp;
5815	}
5816
5817	vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds: &vec_oprnds0,
5818	multi_step_cvt,
5819	stmt_info, vec_dsts, gsi,
5820	slp_node, code: code1,
5821	narrow_src_p: modifier == NARROW_SRC);
5822	/ After demoting op0 to cvt_type, convert it to dest. /
5823	if (cvt_type && code == FLOAT_EXPR)
5824	{
5825	for (unsigned int i = `0`; i != vec_oprnds0.length() / `2`; i++)
5826	{
5827	/ Arguments are ready, create the new vector stmt. /
5828	gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5829	gimple *new_stmt
5830	= vect_gimple_build (vec_dest, codecvt1, vec_oprnds0 [i]);
5831	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
5832	gimple_set_lhs (new_stmt, new_temp);
5833	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
5834
5835	/ This is the last step of the conversion sequence. Store the*
5836	vectors in SLP_NODE or in vector info of the scalar statement
5837	(or in STMT_VINFO_RELATED_STMT chain). /*
5838	if (slp_node)
5839	slp_node->push_vec_def (def: new_stmt);
5840	else
5841	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
5842	}
5843	}
5844	break;
5845	}
5846	if (!slp_node)
5847	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
5848
5849	vec_oprnds0.release ();
5850	vec_oprnds1.release ();
5851	interm_types.release ();
5852
5853	return true;
5854	}
5855
5856	/ Return true if we can assume from the scalar form of STMT_INFO that*
5857	neither the scalar nor the vector forms will generate code. STMT_INFO
5858	is known not to involve a data reference. /*
5859
5860	bool
5861	vect_nop_conversion_p (stmt_vec_info stmt_info)
5862	{
5863	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
5864	if (!stmt)
5865	return false;
5866
5867	tree lhs = gimple_assign_lhs (gs: stmt);
5868	tree_code code = gimple_assign_rhs_code (gs: stmt);
5869	tree rhs = gimple_assign_rhs1 (gs: stmt);
5870
5871	if (code == SSA_NAME \|\| code == VIEW_CONVERT_EXPR)
5872	return true;
5873
5874	if (CONVERT_EXPR_CODE_P (code))
5875	return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5876
5877	return false;
5878	}
5879
5880	/ Function vectorizable_assignment.*
5881
5882	Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5883	If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5884	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5885	Return true if STMT_INFO is vectorizable in this way. /*
5886
5887	static bool
5888	vectorizable_assignment (vec_info *vinfo,
5889	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5890	gimple **vec_stmt, slp_tree slp_node,
5891	stmt_vector_for_cost *cost_vec)
5892	{
5893	tree vec_dest;
5894	tree scalar_dest;
5895	tree op;
5896	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5897	tree new_temp;
5898	enum vect_def_type dt[`1`] = {vect_unknown_def_type};
5899	int ndts = `1`;
5900	int ncopies;
5901	int i;
5902	vec<tree> vec_oprnds = vNULL;
5903	tree vop;
5904	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5905	enum tree_code code;
5906	tree vectype_in;
5907
5908	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5909	return false;
5910
5911	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5912	&& ! vec_stmt)
5913	return false;
5914
5915	/ Is vectorizable assignment? /
5916	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
5917	if (!stmt)
5918	return false;
5919
5920	scalar_dest = gimple_assign_lhs (gs: stmt);
5921	if (TREE_CODE (scalar_dest) != SSA_NAME)
5922	return false;
5923
5924	if (STMT_VINFO_DATA_REF (stmt_info))
5925	return false;
5926
5927	code = gimple_assign_rhs_code (gs: stmt);
5928	if (!(gimple_assign_single_p (gs: stmt)
5929	\|\| code == PAREN_EXPR
5930	\|\| CONVERT_EXPR_CODE_P (code)))
5931	return false;
5932
5933	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5934	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
5935
5936	/ Multiple types in SLP are handled by creating the appropriate number of*
5937	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5938	case of SLP. /*
5939	if (slp_node)
5940	ncopies = `1`;
5941	else
5942	ncopies = vect_get_num_copies (loop_vinfo, vectype);
5943
5944	gcc_assert (ncopies >= `1`);
5945
5946	slp_tree slp_op;
5947	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`, &op, &slp_op,
5948	&dt[`0`], &vectype_in))
5949	{
5950	if (dump_enabled_p ())
5951	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5952	"use not simple.\n");
5953	return false;
5954	}
5955	if (!vectype_in)
5956	vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5957
5958	/ We can handle NOP_EXPR conversions that do not change the number*
5959	of elements or the vector size. /*
5960	if ((CONVERT_EXPR_CODE_P (code)
5961	\|\| code == VIEW_CONVERT_EXPR)
5962	&& (!vectype_in
5963	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype_in), b: nunits)
5964	\|\| maybe_ne (a: GET_MODE_SIZE (TYPE_MODE (vectype)),
5965	b: GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5966	return false;
5967
5968	if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5969	{
5970	if (dump_enabled_p ())
5971	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5972	"can't convert between boolean and non "
5973	"boolean vectors %T\n", TREE_TYPE (op));
5974
5975	return false;
5976	}
5977
5978	/ We do not handle bit-precision changes. /
5979	if ((CONVERT_EXPR_CODE_P (code)
5980	\|\| code == VIEW_CONVERT_EXPR)
5981	&& ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5982	&& !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5983	\|\| (INTEGRAL_TYPE_P (TREE_TYPE (op))
5984	&& !type_has_mode_precision_p (TREE_TYPE (op))))
5985	/ But a conversion that does not change the bit-pattern is ok. /
5986	&& !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5987	&& INTEGRAL_TYPE_P (TREE_TYPE (op))
5988	&& (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5989	> TYPE_PRECISION (TREE_TYPE (op)))
5990	&& TYPE_UNSIGNED (TREE_TYPE (op)))
5991	\|\| (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5992	== TYPE_PRECISION (TREE_TYPE (op))))))
5993	{
5994	if (dump_enabled_p ())
5995	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5996	"type conversion to/from bit-precision "
5997	"unsupported.\n");
5998	return false;
5999	}
6000
6001	if (!vec_stmt) / transformation not required. /
6002	{
6003	if (slp_node
6004	&& !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6005	{
6006	if (dump_enabled_p ())
6007	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6008	"incompatible vector types for invariants\n");
6009	return false;
6010	}
6011	STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6012	DUMP_VECT_SCOPE ("vectorizable_assignment");
6013	if (!vect_nop_conversion_p (stmt_info))
6014	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, node: slp_node,
6015	cost_vec);
6016	return true;
6017	}
6018
6019	/ Transform. /
6020	if (dump_enabled_p ())
6021	dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6022
6023	/ Handle def. /
6024	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6025
6026	/ Handle use. /
6027	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op0: op, vec_oprnds0: &vec_oprnds);
6028
6029	/ Arguments are ready. create the new vector stmt. /
6030	FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6031	{
6032	if (CONVERT_EXPR_CODE_P (code)
6033	\|\| code == VIEW_CONVERT_EXPR)
6034	vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6035	gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6036	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6037	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6038	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6039	if (slp_node)
6040	slp_node->push_vec_def (def: new_stmt);
6041	else
6042	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6043	}
6044	if (!slp_node)
6045	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
6046
6047	vec_oprnds.release ();
6048	return true;
6049	}
6050
6051
6052	/ Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE*
6053	either as shift by a scalar or by a vector. /*
6054
6055	bool
6056	vect_supportable_shift (vec_info vinfo, enum* tree_code code, tree scalar_type)
6057	{
6058
6059	machine_mode vec_mode;
6060	optab optab;
6061	int icode;
6062	tree vectype;
6063
6064	vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6065	if (!vectype)
6066	return false;
6067
6068	optab = optab_for_tree_code (code, vectype, optab_scalar);
6069	if (!optab
6070	\|\| optab_handler (op: optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6071	{
6072	optab = optab_for_tree_code (code, vectype, optab_vector);
6073	if (!optab
6074	\|\| (optab_handler (op: optab, TYPE_MODE (vectype))
6075	== CODE_FOR_nothing))
6076	return false;
6077	}
6078
6079	vec_mode = TYPE_MODE (vectype);
6080	icode = (int) optab_handler (op: optab, mode: vec_mode);
6081	if (icode == CODE_FOR_nothing)
6082	return false;
6083
6084	return true;
6085	}
6086
6087
6088	/ Function vectorizable_shift.*
6089
6090	Check if STMT_INFO performs a shift operation that can be vectorized.
6091	If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6092	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6093	Return true if STMT_INFO is vectorizable in this way. /*
6094
6095	static bool
6096	vectorizable_shift (vec_info *vinfo,
6097	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6098	gimple **vec_stmt, slp_tree slp_node,
6099	stmt_vector_for_cost *cost_vec)
6100	{
6101	tree vec_dest;
6102	tree scalar_dest;
6103	tree op0, op1 = NULL;
6104	tree vec_oprnd1 = NULL_TREE;
6105	tree vectype;
6106	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6107	enum tree_code code;
6108	machine_mode vec_mode;
6109	tree new_temp;
6110	optab optab;
6111	int icode;
6112	machine_mode optab_op2_mode;
6113	enum vect_def_type dt[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
6114	int ndts = `2`;
6115	poly_uint64 nunits_in;
6116	poly_uint64 nunits_out;
6117	tree vectype_out;
6118	tree op1_vectype;
6119	int ncopies;
6120	int i;
6121	vec<tree> vec_oprnds0 = vNULL;
6122	vec<tree> vec_oprnds1 = vNULL;
6123	tree vop0, vop1;
6124	unsigned int k;
6125	bool scalar_shift_arg = true;
6126	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6127	bool incompatible_op1_vectype_p = false;
6128
6129	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6130	return false;
6131
6132	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6133	&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6134	&& ! vec_stmt)
6135	return false;
6136
6137	/ Is STMT a vectorizable binary/unary operation? /
6138	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
6139	if (!stmt)
6140	return false;
6141
6142	if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6143	return false;
6144
6145	code = gimple_assign_rhs_code (gs: stmt);
6146
6147	if (!(code == LSHIFT_EXPR \|\| code == RSHIFT_EXPR \|\| code == LROTATE_EXPR
6148	\|\| code == RROTATE_EXPR))
6149	return false;
6150
6151	scalar_dest = gimple_assign_lhs (gs: stmt);
6152	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6153	if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6154	{
6155	if (dump_enabled_p ())
6156	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157	"bit-precision shifts not supported.\n");
6158	return false;
6159	}
6160
6161	slp_tree slp_op0;
6162	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6163	`0`, &op0, &slp_op0, &dt[`0`], &vectype))
6164	{
6165	if (dump_enabled_p ())
6166	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6167	"use not simple.\n");
6168	return false;
6169	}
6170	/ If op0 is an external or constant def, infer the vector type*
6171	from the scalar type. /*
6172	if (!vectype)
6173	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6174	if (vec_stmt)
6175	gcc_assert (vectype);
6176	if (!vectype)
6177	{
6178	if (dump_enabled_p ())
6179	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6180	"no vectype for scalar type\n");
6181	return false;
6182	}
6183
6184	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6185	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6186	if (maybe_ne (a: nunits_out, b: nunits_in))
6187	return false;
6188
6189	stmt_vec_info op1_def_stmt_info;
6190	slp_tree slp_op1;
6191	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`, &op1, &slp_op1,
6192	&dt[`1`], &op1_vectype, &op1_def_stmt_info))
6193	{
6194	if (dump_enabled_p ())
6195	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6196	"use not simple.\n");
6197	return false;
6198	}
6199
6200	/ Multiple types in SLP are handled by creating the appropriate number of*
6201	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6202	case of SLP. /*
6203	if (slp_node)
6204	ncopies = `1`;
6205	else
6206	ncopies = vect_get_num_copies (loop_vinfo, vectype);
6207
6208	gcc_assert (ncopies >= `1`);
6209
6210	/ Determine whether the shift amount is a vector, or scalar. If the*
6211	shift/rotate amount is a vector, use the vector/vector shift optabs. /*
6212
6213	if ((dt[`1`] == vect_internal_def
6214	\|\| dt[`1`] == vect_induction_def
6215	\|\| dt[`1`] == vect_nested_cycle)
6216	&& !slp_node)
6217	scalar_shift_arg = false;
6218	else if (dt[`1`] == vect_constant_def
6219	\|\| dt[`1`] == vect_external_def
6220	\|\| dt[`1`] == vect_internal_def)
6221	{
6222	/ In SLP, need to check whether the shift count is the same,*
6223	in loops if it is a constant or invariant, it is always
6224	a scalar shift. /*
6225	if (slp_node)
6226	{
6227	vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6228	stmt_vec_info slpstmt_info;
6229
6230	FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6231	{
6232	gassign slpstmt = as_a <gassign > (p: slpstmt_info->stmt);
6233	if (!operand_equal_p (gimple_assign_rhs2 (gs: slpstmt), op1, flags: `0`))
6234	scalar_shift_arg = false;
6235	}
6236
6237	/ For internal SLP defs we have to make sure we see scalar stmts*
6238	for all vector elements.
6239	??? For different vectors we could resort to a different
6240	scalar shift operand but code-generation below simply always
6241	takes the first. /*
6242	if (dt[`1`] == vect_internal_def
6243	&& maybe_ne (a: nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6244	b: stmts.length ()))
6245	scalar_shift_arg = false;
6246	}
6247
6248	/ If the shift amount is computed by a pattern stmt we cannot*
6249	use the scalar amount directly thus give up and use a vector
6250	shift. /*
6251	if (op1_def_stmt_info && is_pattern_stmt_p (stmt_info: op1_def_stmt_info))
6252	scalar_shift_arg = false;
6253	}
6254	else
6255	{
6256	if (dump_enabled_p ())
6257	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258	"operand mode requires invariant argument.\n");
6259	return false;
6260	}
6261
6262	/ Vector shifted by vector. /
6263	bool was_scalar_shift_arg = scalar_shift_arg;
6264	if (!scalar_shift_arg)
6265	{
6266	optab = optab_for_tree_code (code, vectype, optab_vector);
6267	if (dump_enabled_p ())
6268	dump_printf_loc (MSG_NOTE, vect_location,
6269	"vector/vector shift/rotate found.\n");
6270
6271	if (!op1_vectype)
6272	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6273	slp_op1);
6274	incompatible_op1_vectype_p
6275	= (op1_vectype == NULL_TREE
6276	\|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: op1_vectype),
6277	b: TYPE_VECTOR_SUBPARTS (node: vectype))
6278	\|\| TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6279	if (incompatible_op1_vectype_p
6280	&& (!slp_node
6281	\|\| SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6282	\|\| slp_op1->refcnt != `1`))
6283	{
6284	if (dump_enabled_p ())
6285	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6286	"unusable type for last operand in"
6287	" vector/vector shift/rotate.\n");
6288	return false;
6289	}
6290	}
6291	/ See if the machine has a vector shifted by scalar insn and if not*
6292	then see if it has a vector shifted by vector insn. /*
6293	else
6294	{
6295	optab = optab_for_tree_code (code, vectype, optab_scalar);
6296	if (optab
6297	&& optab_handler (op: optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6298	{
6299	if (dump_enabled_p ())
6300	dump_printf_loc (MSG_NOTE, vect_location,
6301	"vector/scalar shift/rotate found.\n");
6302	}
6303	else
6304	{
6305	optab = optab_for_tree_code (code, vectype, optab_vector);
6306	if (optab
6307	&& (optab_handler (op: optab, TYPE_MODE (vectype))
6308	!= CODE_FOR_nothing))
6309	{
6310	scalar_shift_arg = false;
6311
6312	if (dump_enabled_p ())
6313	dump_printf_loc (MSG_NOTE, vect_location,
6314	"vector/vector shift/rotate found.\n");
6315
6316	if (!op1_vectype)
6317	op1_vectype = get_vectype_for_scalar_type (vinfo,
6318	TREE_TYPE (op1),
6319	slp_op1);
6320
6321	/ Unlike the other binary operators, shifts/rotates have*
6322	the rhs being int, instead of the same type as the lhs,
6323	so make sure the scalar is the right type if we are
6324	dealing with vectors of long long/long/short/char. /*
6325	incompatible_op1_vectype_p
6326	= (!op1_vectype
6327	\|\| !tree_nop_conversion_p (TREE_TYPE (vectype),
6328	TREE_TYPE (op1)));
6329	if (incompatible_op1_vectype_p
6330	&& dt[`1`] == vect_internal_def)
6331	{
6332	if (dump_enabled_p ())
6333	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334	"unusable type for last operand in"
6335	" vector/vector shift/rotate.\n");
6336	return false;
6337	}
6338	}
6339	}
6340	}
6341
6342	/ Supportable by target? /
6343	if (!optab)
6344	{
6345	if (dump_enabled_p ())
6346	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6347	"no optab.\n");
6348	return false;
6349	}
6350	vec_mode = TYPE_MODE (vectype);
6351	icode = (int) optab_handler (op: optab, mode: vec_mode);
6352	if (icode == CODE_FOR_nothing)
6353	{
6354	if (dump_enabled_p ())
6355	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356	"op not supported by target.\n");
6357	return false;
6358	}
6359	/ vector lowering cannot optimize vector shifts using word arithmetic. /
6360	if (vect_emulated_vector_p (vectype))
6361	return false;
6362
6363	if (!vec_stmt) / transformation not required. /
6364	{
6365	if (slp_node
6366	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6367	\|\| ((!scalar_shift_arg \|\| dt[`1`] == vect_internal_def)
6368	&& (!incompatible_op1_vectype_p
6369	\|\| dt[`1`] == vect_constant_def)
6370	&& !vect_maybe_update_slp_op_vectype
6371	(slp_op1,
6372	incompatible_op1_vectype_p ? vectype : op1_vectype))))
6373	{
6374	if (dump_enabled_p ())
6375	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6376	"incompatible vector types for invariants\n");
6377	return false;
6378	}
6379	/ Now adjust the constant shift amount in place. /
6380	if (slp_node
6381	&& incompatible_op1_vectype_p
6382	&& dt[`1`] == vect_constant_def)
6383	{
6384	for (unsigned i = `0`;
6385	i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6386	{
6387	SLP_TREE_SCALAR_OPS (slp_op1)[i]
6388	= fold_convert (TREE_TYPE (vectype),
6389	SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6390	gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6391	== INTEGER_CST));
6392	}
6393	}
6394	STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6395	DUMP_VECT_SCOPE ("vectorizable_shift");
6396	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6397	ndts: scalar_shift_arg ? `1` : ndts, node: slp_node, cost_vec);
6398	return true;
6399	}
6400
6401	/ Transform. /
6402
6403	if (dump_enabled_p ())
6404	dump_printf_loc (MSG_NOTE, vect_location,
6405	"transform binary/unary operation.\n");
6406
6407	if (incompatible_op1_vectype_p && !slp_node)
6408	{
6409	gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6410	op1 = fold_convert (TREE_TYPE (vectype), op1);
6411	if (dt[`1`] != vect_constant_def)
6412	op1 = vect_init_vector (vinfo, stmt_info, val: op1,
6413	TREE_TYPE (vectype), NULL);
6414	}
6415
6416	/ Handle def. /
6417	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6418
6419	if (scalar_shift_arg && dt[`1`] != vect_internal_def)
6420	{
6421	/ Vector shl and shr insn patterns can be defined with scalar*
6422	operand 2 (shift operand). In this case, use constant or loop
6423	invariant op1 directly, without extending it to vector mode
6424	first. /*
6425	optab_op2_mode = insn_data[icode].operand[`2`].mode;
6426	if (!VECTOR_MODE_P (optab_op2_mode))
6427	{
6428	if (dump_enabled_p ())
6429	dump_printf_loc (MSG_NOTE, vect_location,
6430	"operand 1 using scalar mode.\n");
6431	vec_oprnd1 = op1;
6432	vec_oprnds1.create (nelems: slp_node ? slp_node->vec_stmts_size : ncopies);
6433	vec_oprnds1.quick_push (obj: vec_oprnd1);
6434	/ Store vec_oprnd1 for every vector stmt to be created.*
6435	We check during the analysis that all the shift arguments
6436	are the same.
6437	TODO: Allow different constants for different vector
6438	stmts generated for an SLP instance. /*
6439	for (k = `0`;
6440	k < (slp_node ? slp_node->vec_stmts_size - `1` : ncopies - `1`); k++)
6441	vec_oprnds1.quick_push (obj: vec_oprnd1);
6442	}
6443	}
6444	else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6445	{
6446	if (was_scalar_shift_arg)
6447	{
6448	/ If the argument was the same in all lanes create*
6449	the correctly typed vector shift amount directly. /*
6450	op1 = fold_convert (TREE_TYPE (vectype), op1);
6451	op1 = vect_init_vector (vinfo, stmt_info, val: op1, TREE_TYPE (vectype),
6452	gsi: !loop_vinfo ? gsi : NULL);
6453	vec_oprnd1 = vect_init_vector (vinfo, stmt_info, val: op1, type: vectype,
6454	gsi: !loop_vinfo ? gsi : NULL);
6455	vec_oprnds1.create (nelems: slp_node->vec_stmts_size);
6456	for (k = `0`; k < slp_node->vec_stmts_size; k++)
6457	vec_oprnds1.quick_push (obj: vec_oprnd1);
6458	}
6459	else if (dt[`1`] == vect_constant_def)
6460	/ The constant shift amount has been adjusted in place. /
6461	;
6462	else
6463	gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6464	}
6465
6466	/ vec_oprnd1 is available if operand 1 should be of a scalar-type*
6467	(a special case for certain kind of vector shifts); otherwise,
6468	operand 1 should be of a vector type (the usual case). /*
6469	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6470	op0, vec_oprnds0: &vec_oprnds0,
6471	op1: vec_oprnd1 ? NULL_TREE : op1, vec_oprnds1: &vec_oprnds1);
6472
6473	/ Arguments are ready. Create the new vector stmt. /
6474	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6475	{
6476	/ For internal defs where we need to use a scalar shift arg*
6477	extract the first lane. /*
6478	if (scalar_shift_arg && dt[`1`] == vect_internal_def)
6479	{
6480	vop1 = vec_oprnds1 [`0`];
6481	new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6482	gassign *new_stmt
6483	= gimple_build_assign (new_temp,
6484	build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6485	vop1,
6486	TYPE_SIZE (TREE_TYPE (new_temp)),
6487	bitsize_zero_node));
6488	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6489	vop1 = new_temp;
6490	}
6491	else
6492	vop1 = vec_oprnds1 [i];
6493	gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6494	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6495	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6496	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6497	if (slp_node)
6498	slp_node->push_vec_def (def: new_stmt);
6499	else
6500	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
6501	}
6502
6503	if (!slp_node)
6504	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
6505
6506	vec_oprnds0.release ();
6507	vec_oprnds1.release ();
6508
6509	return true;
6510	}
6511
6512	/ Function vectorizable_operation.*
6513
6514	Check if STMT_INFO performs a binary, unary or ternary operation that can
6515	be vectorized.
6516	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6517	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6518	Return true if STMT_INFO is vectorizable in this way. /*
6519
6520	static bool
6521	vectorizable_operation (vec_info *vinfo,
6522	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6523	gimple **vec_stmt, slp_tree slp_node,
6524	stmt_vector_for_cost *cost_vec)
6525	{
6526	tree vec_dest;
6527	tree scalar_dest;
6528	tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6529	tree vectype;
6530	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6531	enum tree_code code, orig_code;
6532	machine_mode vec_mode;
6533	tree new_temp;
6534	int op_type;
6535	optab optab;
6536	bool target_support_p;
6537	enum vect_def_type dt[`3`]
6538	= {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6539	int ndts = `3`;
6540	poly_uint64 nunits_in;
6541	poly_uint64 nunits_out;
6542	tree vectype_out;
6543	int ncopies, vec_num;
6544	int i;
6545	vec<tree> vec_oprnds0 = vNULL;
6546	vec<tree> vec_oprnds1 = vNULL;
6547	vec<tree> vec_oprnds2 = vNULL;
6548	tree vop0, vop1, vop2;
6549	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
6550
6551	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6552	return false;
6553
6554	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6555	&& ! vec_stmt)
6556	return false;
6557
6558	/ Is STMT a vectorizable binary/unary operation? /
6559	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
6560	if (!stmt)
6561	return false;
6562
6563	/ Loads and stores are handled in vectorizable_{load,store}. /
6564	if (STMT_VINFO_DATA_REF (stmt_info))
6565	return false;
6566
6567	orig_code = code = gimple_assign_rhs_code (gs: stmt);
6568
6569	/ Shifts are handled in vectorizable_shift. /
6570	if (code == LSHIFT_EXPR
6571	\|\| code == RSHIFT_EXPR
6572	\|\| code == LROTATE_EXPR
6573	\|\| code == RROTATE_EXPR)
6574	return false;
6575
6576	/ Comparisons are handled in vectorizable_comparison. /
6577	if (TREE_CODE_CLASS (code) == tcc_comparison)
6578	return false;
6579
6580	/ Conditions are handled in vectorizable_condition. /
6581	if (code == COND_EXPR)
6582	return false;
6583
6584	/ For pointer addition and subtraction, we should use the normal*
6585	plus and minus for the vector operation. /*
6586	if (code == POINTER_PLUS_EXPR)
6587	code = PLUS_EXPR;
6588	if (code == POINTER_DIFF_EXPR)
6589	code = MINUS_EXPR;
6590
6591	/ Support only unary or binary operations. /
6592	op_type = TREE_CODE_LENGTH (code);
6593	if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6594	{
6595	if (dump_enabled_p ())
6596	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6597	"num. args = %d (not unary/binary/ternary op).\n",
6598	op_type);
6599	return false;
6600	}
6601
6602	scalar_dest = gimple_assign_lhs (gs: stmt);
6603	vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6604
6605	/ Most operations cannot handle bit-precision types without extra*
6606	truncations. /*
6607	bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6608	if (!mask_op_p
6609	&& !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6610	/ Exception are bitwise binary operations. /
6611	&& code != BIT_IOR_EXPR
6612	&& code != BIT_XOR_EXPR
6613	&& code != BIT_AND_EXPR)
6614	{
6615	if (dump_enabled_p ())
6616	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6617	"bit-precision arithmetic not supported.\n");
6618	return false;
6619	}
6620
6621	slp_tree slp_op0;
6622	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6623	`0`, &op0, &slp_op0, &dt[`0`], &vectype))
6624	{
6625	if (dump_enabled_p ())
6626	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6627	"use not simple.\n");
6628	return false;
6629	}
6630	bool is_invariant = (dt[`0`] == vect_external_def
6631	\|\| dt[`0`] == vect_constant_def);
6632	/ If op0 is an external or constant def, infer the vector type*
6633	from the scalar type. /*
6634	if (!vectype)
6635	{
6636	/ For boolean type we cannot determine vectype by*
6637	invariant value (don't know whether it is a vector
6638	of booleans or vector of integers). We use output
6639	vectype because operations on boolean don't change
6640	type. /*
6641	if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6642	{
6643	if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6644	{
6645	if (dump_enabled_p ())
6646	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6647	"not supported operation on bool value.\n");
6648	return false;
6649	}
6650	vectype = vectype_out;
6651	}
6652	else
6653	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6654	slp_node);
6655	}
6656	if (vec_stmt)
6657	gcc_assert (vectype);
6658	if (!vectype)
6659	{
6660	if (dump_enabled_p ())
6661	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6662	"no vectype for scalar type %T\n",
6663	TREE_TYPE (op0));
6664
6665	return false;
6666	}
6667
6668	nunits_out = TYPE_VECTOR_SUBPARTS (node: vectype_out);
6669	nunits_in = TYPE_VECTOR_SUBPARTS (node: vectype);
6670	if (maybe_ne (a: nunits_out, b: nunits_in)
6671	\|\| !tree_nop_conversion_p (TREE_TYPE (vectype_out), TREE_TYPE (vectype)))
6672	return false;
6673
6674	tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6675	slp_tree slp_op1 = NULL, slp_op2 = NULL;
6676	if (op_type == binary_op \|\| op_type == ternary_op)
6677	{
6678	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6679	`1`, &op1, &slp_op1, &dt[`1`], &vectype2))
6680	{
6681	if (dump_enabled_p ())
6682	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6683	"use not simple.\n");
6684	return false;
6685	}
6686	is_invariant &= (dt[`1`] == vect_external_def
6687	\|\| dt[`1`] == vect_constant_def);
6688	if (vectype2
6689	&& (maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype2))
6690	\|\| !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6691	TREE_TYPE (vectype2))))
6692	return false;
6693	}
6694	if (op_type == ternary_op)
6695	{
6696	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6697	`2`, &op2, &slp_op2, &dt[`2`], &vectype3))
6698	{
6699	if (dump_enabled_p ())
6700	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701	"use not simple.\n");
6702	return false;
6703	}
6704	is_invariant &= (dt[`2`] == vect_external_def
6705	\|\| dt[`2`] == vect_constant_def);
6706	if (vectype3
6707	&& (maybe_ne (a: nunits_out, b: TYPE_VECTOR_SUBPARTS (node: vectype3))
6708	\|\| !tree_nop_conversion_p (TREE_TYPE (vectype_out),
6709	TREE_TYPE (vectype3))))
6710	return false;
6711	}
6712
6713	/ Multiple types in SLP are handled by creating the appropriate number of*
6714	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6715	case of SLP. /*
6716	if (slp_node)
6717	{
6718	ncopies = `1`;
6719	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6720	}
6721	else
6722	{
6723	ncopies = vect_get_num_copies (loop_vinfo, vectype);
6724	vec_num = `1`;
6725	}
6726
6727	gcc_assert (ncopies >= `1`);
6728
6729	/ Reject attempts to combine mask types with nonmask types, e.g. if*
6730	we have an AND between a (nonmask) boolean loaded from memory and
6731	a (mask) boolean result of a comparison.
6732
6733	TODO: We could easily fix these cases up using pattern statements. /*
6734	if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6735	\|\| (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6736	\|\| (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6737	{
6738	if (dump_enabled_p ())
6739	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6740	"mixed mask and nonmask vector types\n");
6741	return false;
6742	}
6743
6744	/ Supportable by target? /
6745
6746	vec_mode = TYPE_MODE (vectype);
6747	if (code == MULT_HIGHPART_EXPR)
6748	target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6749	else
6750	{
6751	optab = optab_for_tree_code (code, vectype, optab_default);
6752	if (!optab)
6753	{
6754	if (dump_enabled_p ())
6755	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6756	"no optab.\n");
6757	return false;
6758	}
6759	target_support_p = (optab_handler (op: optab, mode: vec_mode) != CODE_FOR_nothing
6760	\|\| optab_libfunc (optab, vec_mode));
6761	}
6762
6763	bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6764	if (!target_support_p \|\| using_emulated_vectors_p)
6765	{
6766	if (dump_enabled_p ())
6767	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6768	"op not supported by target.\n");
6769	/ When vec_mode is not a vector mode and we verified ops we*
6770	do not have to lower like AND are natively supported let
6771	those through even when the mode isn't word_mode. For
6772	ops we have to lower the lowering code assumes we are
6773	dealing with word_mode. /*
6774	if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
6775	\|\| (((code == PLUS_EXPR \|\| code == MINUS_EXPR \|\| code == NEGATE_EXPR)
6776	\|\| !target_support_p)
6777	&& maybe_ne (a: GET_MODE_SIZE (mode: vec_mode), UNITS_PER_WORD))
6778	/ Check only during analysis. /
6779	\|\| (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6780	{
6781	if (dump_enabled_p ())
6782	dump_printf (MSG_NOTE, "using word mode not possible.\n");
6783	return false;
6784	}
6785	if (dump_enabled_p ())
6786	dump_printf_loc (MSG_NOTE, vect_location,
6787	"proceeding using word mode.\n");
6788	using_emulated_vectors_p = true;
6789	}
6790
6791	int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6792	vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6793	vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6794	internal_fn cond_fn = get_conditional_internal_fn (code);
6795	internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6796
6797	/ If operating on inactive elements could generate spurious traps,*
6798	we need to restrict the operation to active lanes. Note that this
6799	specifically doesn't apply to unhoisted invariants, since they
6800	operate on the same value for every lane.
6801
6802	Similarly, if this operation is part of a reduction, a fully-masked
6803	loop should only change the active lanes of the reduction chain,
6804	keeping the inactive lanes as-is. /*
6805	bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6806	\|\| reduc_idx >= `0`);
6807
6808	if (!vec_stmt) / transformation not required. /
6809	{
6810	if (loop_vinfo
6811	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6812	&& mask_out_inactive)
6813	{
6814	if (cond_len_fn != IFN_LAST
6815	&& direct_internal_fn_supported_p (cond_len_fn, vectype,
6816	OPTIMIZE_FOR_SPEED))
6817	vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6818	`1`);
6819	else if (cond_fn != IFN_LAST
6820	&& direct_internal_fn_supported_p (cond_fn, vectype,
6821	OPTIMIZE_FOR_SPEED))
6822	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6823	vectype, NULL);
6824	else
6825	{
6826	if (dump_enabled_p ())
6827	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6828	"can't use a fully-masked loop because no"
6829	" conditional operation is available.\n");
6830	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6831	}
6832	}
6833
6834	/ Put types on constant and invariant SLP children. /
6835	if (slp_node
6836	&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6837	\|\| !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6838	\|\| !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6839	{
6840	if (dump_enabled_p ())
6841	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6842	"incompatible vector types for invariants\n");
6843	return false;
6844	}
6845
6846	STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6847	DUMP_VECT_SCOPE ("vectorizable_operation");
6848	vect_model_simple_cost (vinfo, stmt_info,
6849	ncopies, dt, ndts, node: slp_node, cost_vec);
6850	if (using_emulated_vectors_p)
6851	{
6852	/ The above vect_model_simple_cost call handles constants*
6853	in the prologue and (mis-)costs one of the stmts as
6854	vector stmt. See below for the actual lowering that will
6855	be applied. /*
6856	unsigned n
6857	= slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6858	switch (code)
6859	{
6860	case PLUS_EXPR:
6861	n *= `5`;
6862	break;
6863	case MINUS_EXPR:
6864	n *= `6`;
6865	break;
6866	case NEGATE_EXPR:
6867	n *= `4`;
6868	break;
6869	default:
6870	/ Bit operations do not have extra cost and are accounted*
6871	as vector stmt by vect_model_simple_cost. /*
6872	n = `0`;
6873	break;
6874	}
6875	if (n != `0`)
6876	{
6877	/ We also need to materialize two large constants. /
6878	record_stmt_cost (body_cost_vec: cost_vec, count: `2`, kind: scalar_stmt, stmt_info,
6879	misalign: `0`, where: vect_prologue);
6880	record_stmt_cost (body_cost_vec: cost_vec, count: n, kind: scalar_stmt, stmt_info,
6881	misalign: `0`, where: vect_body);
6882	}
6883	}
6884	return true;
6885	}
6886
6887	/ Transform. /
6888
6889	if (dump_enabled_p ())
6890	dump_printf_loc (MSG_NOTE, vect_location,
6891	"transform binary/unary operation.\n");
6892
6893	bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6894	bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6895
6896	/ POINTER_DIFF_EXPR has pointer arguments which are vectorized as*
6897	vectors with unsigned elements, but the result is signed. So, we
6898	need to compute the MINUS_EXPR into vectype temporary and
6899	VIEW_CONVERT_EXPR it into the final vectype_out result. /*
6900	tree vec_cvt_dest = NULL_TREE;
6901	if (orig_code == POINTER_DIFF_EXPR)
6902	{
6903	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6904	vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6905	}
6906	/ Handle def. /
6907	else
6908	vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6909
6910	/ In case the vectorization factor (VF) is bigger than the number*
6911	of elements that we can fit in a vectype (nunits), we have to generate
6912	more than one vector stmt - i.e - we need to "unroll" the
6913	vector stmt by a factor VF/nunits. In doing so, we record a pointer
6914	from one copy of the vector stmt to the next, in the field
6915	STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6916	stages to find the correct vector defs to be used when vectorizing
6917	stmts that use the defs of the current stmt. The example below
6918	illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6919	we need to create 4 vectorized stmts):
6920
6921	before vectorization:
6922	RELATED_STMT VEC_STMT
6923	S1: x = memref - -
6924	S2: z = x + 1 - -
6925
6926	step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6927	there):
6928	RELATED_STMT VEC_STMT
6929	VS1_0: vx0 = memref0 VS1_1 -
6930	VS1_1: vx1 = memref1 VS1_2 -
6931	VS1_2: vx2 = memref2 VS1_3 -
6932	VS1_3: vx3 = memref3 - -
6933	S1: x = load - VS1_0
6934	S2: z = x + 1 - -
6935
6936	step2: vectorize stmt S2 (done here):
6937	To vectorize stmt S2 we first need to find the relevant vector
6938	def for the first operand 'x'. This is, as usual, obtained from
6939	the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6940	that defines 'x' (S1). This way we find the stmt VS1_0, and the
6941	relevant vector def 'vx0'. Having found 'vx0' we can generate
6942	the vector stmt VS2_0, and as usual, record it in the
6943	STMT_VINFO_VEC_STMT of stmt S2.
6944	When creating the second copy (VS2_1), we obtain the relevant vector
6945	def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6946	stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6947	vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6948	pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6949	Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6950	chain of stmts and pointers:
6951	RELATED_STMT VEC_STMT
6952	VS1_0: vx0 = memref0 VS1_1 -
6953	VS1_1: vx1 = memref1 VS1_2 -
6954	VS1_2: vx2 = memref2 VS1_3 -
6955	VS1_3: vx3 = memref3 - -
6956	S1: x = load - VS1_0
6957	VS2_0: vz0 = vx0 + v1 VS2_1 -
6958	VS2_1: vz1 = vx1 + v1 VS2_2 -
6959	VS2_2: vz2 = vx2 + v1 VS2_3 -
6960	VS2_3: vz3 = vx3 + v1 - -
6961	S2: z = x + 1 - VS2_0 /*
6962
6963	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6964	op0, vec_oprnds0: &vec_oprnds0, op1, vec_oprnds1: &vec_oprnds1, op2, vec_oprnds2: &vec_oprnds2);
6965	/ Arguments are ready. Create the new vector stmt. /
6966	FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6967	{
6968	gimple *new_stmt = NULL;
6969	vop1 = ((op_type == binary_op \|\| op_type == ternary_op)
6970	? vec_oprnds1 [i] : NULL_TREE);
6971	vop2 = ((op_type == ternary_op) ? vec_oprnds2 [i] : NULL_TREE);
6972	if (using_emulated_vectors_p
6973	&& (code == PLUS_EXPR \|\| code == MINUS_EXPR \|\| code == NEGATE_EXPR))
6974	{
6975	/ Lower the operation. This follows vector lowering. /
6976	unsigned int width = vector_element_bits (vectype);
6977	tree inner_type = TREE_TYPE (vectype);
6978	tree word_type
6979	= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: word_mode), `1`);
6980	HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6981	tree low_bits = build_replicated_int_cst (word_type, width, max >> `1`);
6982	tree high_bits
6983	= build_replicated_int_cst (word_type, width, max & ~(max >> `1`));
6984	tree wvop0 = make_ssa_name (var: word_type);
6985	new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6986	build1 (VIEW_CONVERT_EXPR,
6987	word_type, vop0));
6988	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6989	tree result_low, signs;
6990	if (code == PLUS_EXPR \|\| code == MINUS_EXPR)
6991	{
6992	tree wvop1 = make_ssa_name (var: word_type);
6993	new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6994	build1 (VIEW_CONVERT_EXPR,
6995	word_type, vop1));
6996	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
6997	signs = make_ssa_name (var: word_type);
6998	new_stmt = gimple_build_assign (signs,
6999	BIT_XOR_EXPR, wvop0, wvop1);
7000	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7001	tree b_low = make_ssa_name (var: word_type);
7002	new_stmt = gimple_build_assign (b_low,
7003	BIT_AND_EXPR, wvop1, low_bits);
7004	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7005	tree a_low = make_ssa_name (var: word_type);
7006	if (code == PLUS_EXPR)
7007	new_stmt = gimple_build_assign (a_low,
7008	BIT_AND_EXPR, wvop0, low_bits);
7009	else
7010	new_stmt = gimple_build_assign (a_low,
7011	BIT_IOR_EXPR, wvop0, high_bits);
7012	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7013	if (code == MINUS_EXPR)
7014	{
7015	new_stmt = gimple_build_assign (NULL_TREE,
7016	BIT_NOT_EXPR, signs);
7017	signs = make_ssa_name (var: word_type);
7018	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7019	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7020	}
7021	new_stmt = gimple_build_assign (NULL_TREE,
7022	BIT_AND_EXPR, signs, high_bits);
7023	signs = make_ssa_name (var: word_type);
7024	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7025	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7026	result_low = make_ssa_name (var: word_type);
7027	new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7028	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7029	}
7030	else
7031	{
7032	tree a_low = make_ssa_name (var: word_type);
7033	new_stmt = gimple_build_assign (a_low,
7034	BIT_AND_EXPR, wvop0, low_bits);
7035	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7036	signs = make_ssa_name (var: word_type);
7037	new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7038	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7039	new_stmt = gimple_build_assign (NULL_TREE,
7040	BIT_AND_EXPR, signs, high_bits);
7041	signs = make_ssa_name (var: word_type);
7042	gimple_assign_set_lhs (gs: new_stmt, lhs: signs);
7043	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7044	result_low = make_ssa_name (var: word_type);
7045	new_stmt = gimple_build_assign (result_low,
7046	MINUS_EXPR, high_bits, a_low);
7047	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7048	}
7049	new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7050	signs);
7051	result_low = make_ssa_name (var: word_type);
7052	gimple_assign_set_lhs (gs: new_stmt, lhs: result_low);
7053	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7054	new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7055	build1 (VIEW_CONVERT_EXPR,
7056	vectype, result_low));
7057	new_temp = make_ssa_name (var: vectype);
7058	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7059	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7060	}
7061	else if ((masked_loop_p \|\| len_loop_p) && mask_out_inactive)
7062	{
7063	tree mask;
7064	if (masked_loop_p)
7065	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7066	vec_num * ncopies, vectype, i);
7067	else
7068	/ Dummy mask. /
7069	mask = build_minus_one_cst (truth_type_for (vectype));
7070	auto_vec<tree> vops (`6`);
7071	vops.quick_push (obj: mask);
7072	vops.quick_push (obj: vop0);
7073	if (vop1)
7074	vops.quick_push (obj: vop1);
7075	if (vop2)
7076	vops.quick_push (obj: vop2);
7077	if (reduc_idx >= `0`)
7078	{
7079	/ Perform the operation on active elements only and take*
7080	inactive elements from the reduction chain input. /*
7081	gcc_assert (!vop2);
7082	vops.quick_push (obj: reduc_idx == `1` ? vop1 : vop0);
7083	}
7084	else
7085	{
7086	auto else_value = targetm.preferred_else_value
7087	(cond_fn, vectype, vops.length () - `1`, &vops [`1`]);
7088	vops.quick_push (obj: else_value);
7089	}
7090	if (len_loop_p)
7091	{
7092	tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7093	vec_num * ncopies, vectype, i, `1`);
7094	signed char biasval
7095	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7096	tree bias = build_int_cst (intQI_type_node, biasval);
7097	vops.quick_push (obj: len);
7098	vops.quick_push (obj: bias);
7099	}
7100	gcall *call
7101	= gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7102	: cond_len_fn,
7103	vops);
7104	new_temp = make_ssa_name (var: vec_dest, stmt: call);
7105	gimple_call_set_lhs (gs: call, lhs: new_temp);
7106	gimple_call_set_nothrow (s: call, nothrow_p: true);
7107	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
7108	new_stmt = call;
7109	}
7110	else
7111	{
7112	tree mask = NULL_TREE;
7113	/ When combining two masks check if either of them is elsewhere*
7114	combined with a loop mask, if that's the case we can mark that the
7115	new combined mask doesn't need to be combined with a loop mask. /*
7116	if (masked_loop_p
7117	&& code == BIT_AND_EXPR
7118	&& VECTOR_BOOLEAN_TYPE_P (vectype))
7119	{
7120	if (loop_vinfo->scalar_cond_masked_set.contains (k: { op0,
7121	ncopies}))
7122	{
7123	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7124	vec_num * ncopies, vectype, i);
7125
7126	vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7127	vec_mask: vop0, gsi);
7128	}
7129
7130	if (loop_vinfo->scalar_cond_masked_set.contains (k: { op1,
7131	ncopies }))
7132	{
7133	mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7134	vec_num * ncopies, vectype, i);
7135
7136	vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
7137	vec_mask: vop1, gsi);
7138	}
7139	}
7140
7141	new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7142	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
7143	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7144	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
7145	if (using_emulated_vectors_p)
7146	suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7147
7148	/ Enter the combined value into the vector cond hash so we don't*
7149	AND it with a loop mask again. /*
7150	if (mask)
7151	loop_vinfo->vec_cond_masked_set.add (k: { new_temp, mask });
7152	}
7153
7154	if (vec_cvt_dest)
7155	{
7156	new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7157	new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7158	new_temp);
7159	new_temp = make_ssa_name (var: vec_cvt_dest, stmt: new_stmt);
7160	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
7161	vect_finish_stmt_generation (vinfo, stmt_info,
7162	vec_stmt: new_stmt, gsi);
7163	}
7164
7165	if (slp_node)
7166	slp_node->push_vec_def (def: new_stmt);
7167	else
7168	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
7169	}
7170
7171	if (!slp_node)
7172	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7173
7174	vec_oprnds0.release ();
7175	vec_oprnds1.release ();
7176	vec_oprnds2.release ();
7177
7178	return true;
7179	}
7180
7181	/ A helper function to ensure data reference DR_INFO's base alignment. /
7182
7183	static void
7184	ensure_base_align (dr_vec_info *dr_info)
7185	{
7186	/ Alignment is only analyzed for the first element of a DR group,*
7187	use that to look at base alignment we need to enforce. /*
7188	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7189	dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7190
7191	gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7192
7193	if (dr_info->base_misaligned)
7194	{
7195	tree base_decl = dr_info->base_decl;
7196
7197	// We should only be able to increase the alignment of a base object if
7198	// we know what its new alignment should be at compile time.
7199	unsigned HOST_WIDE_INT align_base_to =
7200	DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7201
7202	if (decl_in_symtab_p (decl: base_decl))
7203	symtab_node::get (decl: base_decl)->increase_alignment (align: align_base_to);
7204	else if (DECL_ALIGN (base_decl) < align_base_to)
7205	{
7206	SET_DECL_ALIGN (base_decl, align_base_to);
7207	DECL_USER_ALIGN (base_decl) = `1`;
7208	}
7209	dr_info->base_misaligned = false;
7210	}
7211	}
7212
7213
7214	/ Function get_group_alias_ptr_type.*
7215
7216	Return the alias type for the group starting at FIRST_STMT_INFO. /*
7217
7218	static tree
7219	get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7220	{
7221	struct data_reference first_dr, next_dr;
7222
7223	first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7224	stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7225	while (next_stmt_info)
7226	{
7227	next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7228	if (get_alias_set (DR_REF (first_dr))
7229	!= get_alias_set (DR_REF (next_dr)))
7230	{
7231	if (dump_enabled_p ())
7232	dump_printf_loc (MSG_NOTE, vect_location,
7233	"conflicting alias set types.\n");
7234	return ptr_type_node;
7235	}
7236	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7237	}
7238	return reference_alias_ptr_type (DR_REF (first_dr));
7239	}
7240
7241
7242	/ Function scan_operand_equal_p.*
7243
7244	Helper function for check_scan_store. Compare two references
7245	with .GOMP_SIMD_LANE bases. /*
7246
7247	static bool
7248	scan_operand_equal_p (tree ref1, tree ref2)
7249	{
7250	tree ref[`2`] = { ref1, ref2 };
7251	poly_int64 bitsize[`2`], bitpos[`2`];
7252	tree offset[`2`], base[`2`];
7253	for (int i = `0`; i < `2`; ++i)
7254	{
7255	machine_mode mode;
7256	int unsignedp, reversep, volatilep = `0`;
7257	base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7258	&offset[i], &mode, &unsignedp,
7259	&reversep, &volatilep);
7260	if (reversep \|\| volatilep \|\| maybe_ne (a: bitpos[i], b: `0`))
7261	return false;
7262	if (TREE_CODE (base[i]) == MEM_REF
7263	&& offset[i] == NULL_TREE
7264	&& TREE_CODE (TREE_OPERAND (base[i], `0`)) == SSA_NAME)
7265	{
7266	gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], `0`));
7267	if (is_gimple_assign (gs: def_stmt)
7268	&& gimple_assign_rhs_code (gs: def_stmt) == POINTER_PLUS_EXPR
7269	&& TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7270	&& TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7271	{
7272	if (maybe_ne (a: mem_ref_offset (base[i]), b: `0`))
7273	return false;
7274	base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), `0`);
7275	offset[i] = gimple_assign_rhs2 (gs: def_stmt);
7276	}
7277	}
7278	}
7279
7280	if (!operand_equal_p (base[`0`], base[`1`], flags: `0`))
7281	return false;
7282	if (maybe_ne (a: bitsize[`0`], b: bitsize[`1`]))
7283	return false;
7284	if (offset[`0`] != offset[`1`])
7285	{
7286	if (!offset[`0`] \|\| !offset[`1`])
7287	return false;
7288	if (!operand_equal_p (offset[`0`], offset[`1`], flags: `0`))
7289	{
7290	tree step[`2`];
7291	for (int i = `0`; i < `2`; ++i)
7292	{
7293	step[i] = integer_one_node;
7294	if (TREE_CODE (offset[i]) == SSA_NAME)
7295	{
7296	gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7297	if (is_gimple_assign (gs: def_stmt)
7298	&& gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
7299	&& (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7300	== INTEGER_CST))
7301	{
7302	step[i] = gimple_assign_rhs2 (gs: def_stmt);
7303	offset[i] = gimple_assign_rhs1 (gs: def_stmt);
7304	}
7305	}
7306	else if (TREE_CODE (offset[i]) == MULT_EXPR)
7307	{
7308	step[i] = TREE_OPERAND (offset[i], `1`);
7309	offset[i] = TREE_OPERAND (offset[i], `0`);
7310	}
7311	tree rhs1 = NULL_TREE;
7312	if (TREE_CODE (offset[i]) == SSA_NAME)
7313	{
7314	gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7315	if (gimple_assign_cast_p (s: def_stmt))
7316	rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7317	}
7318	else if (CONVERT_EXPR_P (offset[i]))
7319	rhs1 = TREE_OPERAND (offset[i], `0`);
7320	if (rhs1
7321	&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7322	&& INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7323	&& (TYPE_PRECISION (TREE_TYPE (offset[i]))
7324	>= TYPE_PRECISION (TREE_TYPE (rhs1))))
7325	offset[i] = rhs1;
7326	}
7327	if (!operand_equal_p (offset[`0`], offset[`1`], flags: `0`)
7328	\|\| !operand_equal_p (step[`0`], step[`1`], flags: `0`))
7329	return false;
7330	}
7331	}
7332	return true;
7333	}
7334
7335
7336	enum scan_store_kind {
7337	/ Normal permutation. /
7338	scan_store_kind_perm,
7339
7340	/ Whole vector left shift permutation with zero init. /
7341	scan_store_kind_lshift_zero,
7342
7343	/ Whole vector left shift permutation and VEC_COND_EXPR. /
7344	scan_store_kind_lshift_cond
7345	};
7346
7347	/ Function check_scan_store.*
7348
7349	Verify if we can perform the needed permutations or whole vector shifts.
7350	Return -1 on failure, otherwise exact log2 of vectype's nunits.
7351	USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7352	to do at each step. /*
7353
7354	static int
7355	scan_store_can_perm_p (tree vectype, tree init,
7356	vec<enum scan_store_kind> *use_whole_vector = NULL)
7357	{
7358	enum machine_mode vec_mode = TYPE_MODE (vectype);
7359	unsigned HOST_WIDE_INT nunits;
7360	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7361	return -`1`;
7362	int units_log2 = exact_log2 (x: nunits);
7363	if (units_log2 <= `0`)
7364	return -`1`;
7365
7366	int i;
7367	enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7368	for (i = `0`; i <= units_log2; ++i)
7369	{
7370	unsigned HOST_WIDE_INT j, k;
7371	enum scan_store_kind kind = scan_store_kind_perm;
7372	vec_perm_builder sel (nunits, nunits, `1`);
7373	sel.quick_grow (len: nunits);
7374	if (i == units_log2)
7375	{
7376	for (j = `0`; j < nunits; ++j)
7377	sel [j] = nunits - `1`;
7378	}
7379	else
7380	{
7381	for (j = `0`; j < (HOST_WIDE_INT_1U << i); ++j)
7382	sel [j] = j;
7383	for (k = `0`; j < nunits; ++j, ++k)
7384	sel [j] = nunits + k;
7385	}
7386	vec_perm_indices indices (sel, i == units_log2 ? `1` : `2`, nunits);
7387	if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7388	{
7389	if (i == units_log2)
7390	return -`1`;
7391
7392	if (whole_vector_shift_kind == scan_store_kind_perm)
7393	{
7394	if (optab_handler (op: vec_shl_optab, mode: vec_mode) == CODE_FOR_nothing)
7395	return -`1`;
7396	whole_vector_shift_kind = scan_store_kind_lshift_zero;
7397	/ Whole vector shifts shift in zeros, so if init is all zero*
7398	constant, there is no need to do anything further. /*
7399	if ((TREE_CODE (init) != INTEGER_CST
7400	&& TREE_CODE (init) != REAL_CST)
7401	\|\| !initializer_zerop (init))
7402	{
7403	tree masktype = truth_type_for (vectype);
7404	if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7405	return -`1`;
7406	whole_vector_shift_kind = scan_store_kind_lshift_cond;
7407	}
7408	}
7409	kind = whole_vector_shift_kind;
7410	}
7411	if (use_whole_vector)
7412	{
7413	if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7414	use_whole_vector->safe_grow_cleared (len: i, exact: true);
7415	if (kind != scan_store_kind_perm \|\| !use_whole_vector->is_empty ())
7416	use_whole_vector->safe_push (obj: kind);
7417	}
7418	}
7419
7420	return units_log2;
7421	}
7422
7423
7424	/ Function check_scan_store.*
7425
7426	Check magic stores for #pragma omp scan {in,ex}clusive reductions. /*
7427
7428	static bool
7429	check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7430	enum vect_def_type rhs_dt, bool slp, tree mask,
7431	vect_memory_access_type memory_access_type)
7432	{
7433	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7434	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7435	tree ref_type;
7436
7437	gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > `1`);
7438	if (slp
7439	\|\| mask
7440	\|\| memory_access_type != VMAT_CONTIGUOUS
7441	\|\| TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7442	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`))
7443	\|\| loop_vinfo == NULL
7444	\|\| LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7445	\|\| STMT_VINFO_GROUPED_ACCESS (stmt_info)
7446	\|\| !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7447	\|\| !integer_zerop (DR_INIT (dr_info->dr))
7448	\|\| !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7449	\|\| !alias_sets_conflict_p (get_alias_set (vectype),
7450	get_alias_set (TREE_TYPE (ref_type))))
7451	{
7452	if (dump_enabled_p ())
7453	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7454	"unsupported OpenMP scan store.\n");
7455	return false;
7456	}
7457
7458	/ We need to pattern match code built by OpenMP lowering and simplified*
7459	by following optimizations into something we can handle.
7460	#pragma omp simd reduction(inscan,+:r)
7461	for (...)
7462	{
7463	r += something ();
7464	#pragma omp scan inclusive (r)
7465	use (r);
7466	}
7467	shall have body with:
7468	// Initialization for input phase, store the reduction initializer:
7469	_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7470	_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7471	D.2042[_21] = 0;
7472	// Actual input phase:
7473	...
7474	r.0_5 = D.2042[_20];
7475	_6 = _4 + r.0_5;
7476	D.2042[_20] = _6;
7477	// Initialization for scan phase:
7478	_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7479	_26 = D.2043[_25];
7480	_27 = D.2042[_25];
7481	_28 = _26 + _27;
7482	D.2043[_25] = _28;
7483	D.2042[_25] = _28;
7484	// Actual scan phase:
7485	...
7486	r.1_8 = D.2042[_20];
7487	...
7488	The "omp simd array" variable D.2042 holds the privatized copy used
7489	inside of the loop and D.2043 is another one that holds copies of
7490	the current original list item. The separate GOMP_SIMD_LANE ifn
7491	kinds are there in order to allow optimizing the initializer store
7492	and combiner sequence, e.g. if it is originally some C++ish user
7493	defined reduction, but allow the vectorizer to pattern recognize it
7494	and turn into the appropriate vectorized scan.
7495
7496	For exclusive scan, this is slightly different:
7497	#pragma omp simd reduction(inscan,+:r)
7498	for (...)
7499	{
7500	use (r);
7501	#pragma omp scan exclusive (r)
7502	r += something ();
7503	}
7504	shall have body with:
7505	// Initialization for input phase, store the reduction initializer:
7506	_20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7507	_21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7508	D.2042[_21] = 0;
7509	// Actual input phase:
7510	...
7511	r.0_5 = D.2042[_20];
7512	_6 = _4 + r.0_5;
7513	D.2042[_20] = _6;
7514	// Initialization for scan phase:
7515	_25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7516	_26 = D.2043[_25];
7517	D.2044[_25] = _26;
7518	_27 = D.2042[_25];
7519	_28 = _26 + _27;
7520	D.2043[_25] = _28;
7521	// Actual scan phase:
7522	...
7523	r.1_8 = D.2044[_20];
7524	... /*
7525
7526	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `2`)
7527	{
7528	/ Match the D.2042[_21] = 0; store above. Just require that*
7529	it is a constant or external definition store. /*
7530	if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7531	{
7532	fail_init:
7533	if (dump_enabled_p ())
7534	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7535	"unsupported OpenMP scan initializer store.\n");
7536	return false;
7537	}
7538
7539	if (! loop_vinfo->scan_map)
7540	loop_vinfo->scan_map = new hash_map<tree, tree>;
7541	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7542	tree &cached = loop_vinfo->scan_map->get_or_insert (k: var);
7543	if (cached)
7544	goto fail_init;
7545	cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7546
7547	/ These stores can be vectorized normally. /
7548	return true;
7549	}
7550
7551	if (rhs_dt != vect_internal_def)
7552	{
7553	fail:
7554	if (dump_enabled_p ())
7555	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556	"unsupported OpenMP scan combiner pattern.\n");
7557	return false;
7558	}
7559
7560	gimple *stmt = STMT_VINFO_STMT (stmt_info);
7561	tree rhs = gimple_assign_rhs1 (gs: stmt);
7562	if (TREE_CODE (rhs) != SSA_NAME)
7563	goto fail;
7564
7565	gimple *other_store_stmt = NULL;
7566	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7567	bool inscan_var_store
7568	= lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7569
7570	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7571	{
7572	if (!inscan_var_store)
7573	{
7574	use_operand_p use_p;
7575	imm_use_iterator iter;
7576	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7577	{
7578	gimple *use_stmt = USE_STMT (use_p);
7579	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7580	continue;
7581	if (gimple_bb (g: use_stmt) != gimple_bb (g: stmt)
7582	\|\| !is_gimple_assign (gs: use_stmt)
7583	\|\| gimple_assign_rhs_class (gs: use_stmt) != GIMPLE_BINARY_RHS
7584	\|\| other_store_stmt
7585	\|\| TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7586	goto fail;
7587	other_store_stmt = use_stmt;
7588	}
7589	if (other_store_stmt == NULL)
7590	goto fail;
7591	rhs = gimple_assign_lhs (gs: other_store_stmt);
7592	if (!single_imm_use (var: rhs, use_p: &use_p, stmt: &other_store_stmt))
7593	goto fail;
7594	}
7595	}
7596	else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `3`)
7597	{
7598	use_operand_p use_p;
7599	imm_use_iterator iter;
7600	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7601	{
7602	gimple *use_stmt = USE_STMT (use_p);
7603	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7604	continue;
7605	if (other_store_stmt)
7606	goto fail;
7607	other_store_stmt = use_stmt;
7608	}
7609	}
7610	else
7611	goto fail;
7612
7613	gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7614	if (gimple_bb (g: def_stmt) != gimple_bb (g: stmt)
7615	\|\| !is_gimple_assign (gs: def_stmt)
7616	\|\| gimple_assign_rhs_class (gs: def_stmt) != GIMPLE_BINARY_RHS)
7617	goto fail;
7618
7619	enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7620	/ For pointer addition, we should use the normal plus for the vector*
7621	operation. /*
7622	switch (code)
7623	{
7624	case POINTER_PLUS_EXPR:
7625	code = PLUS_EXPR;
7626	break;
7627	case MULT_HIGHPART_EXPR:
7628	goto fail;
7629	default:
7630	break;
7631	}
7632	if (TREE_CODE_LENGTH (code) != binary_op \|\| !commutative_tree_code (code))
7633	goto fail;
7634
7635	tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7636	tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7637	if (TREE_CODE (rhs1) != SSA_NAME \|\| TREE_CODE (rhs2) != SSA_NAME)
7638	goto fail;
7639
7640	gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7641	gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7642	if (gimple_bb (g: load1_stmt) != gimple_bb (g: stmt)
7643	\|\| !gimple_assign_load_p (load1_stmt)
7644	\|\| gimple_bb (g: load2_stmt) != gimple_bb (g: stmt)
7645	\|\| !gimple_assign_load_p (load2_stmt))
7646	goto fail;
7647
7648	stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7649	stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7650	if (load1_stmt_info == NULL
7651	\|\| load2_stmt_info == NULL
7652	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7653	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7654	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7655	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7656	goto fail;
7657
7658	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && inscan_var_store)
7659	{
7660	dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7661	if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7662	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`)))
7663	goto fail;
7664	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`);
7665	tree lrhs;
7666	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7667	lrhs = rhs1;
7668	else
7669	lrhs = rhs2;
7670	use_operand_p use_p;
7671	imm_use_iterator iter;
7672	FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7673	{
7674	gimple *use_stmt = USE_STMT (use_p);
7675	if (use_stmt == def_stmt \|\| is_gimple_debug (gs: use_stmt))
7676	continue;
7677	if (other_store_stmt)
7678	goto fail;
7679	other_store_stmt = use_stmt;
7680	}
7681	}
7682
7683	if (other_store_stmt == NULL)
7684	goto fail;
7685	if (gimple_bb (g: other_store_stmt) != gimple_bb (g: stmt)
7686	\|\| !gimple_store_p (gs: other_store_stmt))
7687	goto fail;
7688
7689	stmt_vec_info other_store_stmt_info
7690	= loop_vinfo->lookup_stmt (other_store_stmt);
7691	if (other_store_stmt_info == NULL
7692	\|\| (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7693	!= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7694	goto fail;
7695
7696	gimple *stmt1 = stmt;
7697	gimple *stmt2 = other_store_stmt;
7698	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7699	std::swap (a&: stmt1, b&: stmt2);
7700	if (scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7701	ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7702	{
7703	std::swap (a&: rhs1, b&: rhs2);
7704	std::swap (a&: load1_stmt, b&: load2_stmt);
7705	std::swap (a&: load1_stmt_info, b&: load2_stmt_info);
7706	}
7707	if (!scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt1),
7708	ref2: gimple_assign_rhs1 (gs: load1_stmt)))
7709	goto fail;
7710
7711	tree var3 = NULL_TREE;
7712	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `3`
7713	&& !scan_operand_equal_p (ref1: gimple_assign_lhs (gs: stmt2),
7714	ref2: gimple_assign_rhs1 (gs: load2_stmt)))
7715	goto fail;
7716	else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7717	{
7718	dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7719	if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7720	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`)))
7721	goto fail;
7722	var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`);
7723	if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var3))
7724	\|\| lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var3))
7725	\|\| lookup_attribute (attr_name: "omp simd inscan exclusive",
7726	DECL_ATTRIBUTES (var3)))
7727	goto fail;
7728	}
7729
7730	dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7731	if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7732	\|\| !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), `0`)))
7733	goto fail;
7734
7735	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7736	tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), `0`);
7737	if (!lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var1))
7738	\|\| !lookup_attribute (attr_name: "omp simd array", DECL_ATTRIBUTES (var2))
7739	\|\| (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7740	== (!lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var2))))
7741	goto fail;
7742
7743	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7744	std::swap (a&: var1, b&: var2);
7745
7746	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
7747	{
7748	if (!lookup_attribute (attr_name: "omp simd inscan exclusive",
7749	DECL_ATTRIBUTES (var1)))
7750	goto fail;
7751	var1 = var3;
7752	}
7753
7754	if (loop_vinfo->scan_map == NULL)
7755	goto fail;
7756	tree *init = loop_vinfo->scan_map->get (k: var1);
7757	if (init == NULL)
7758	goto fail;
7759
7760	/ The IL is as expected, now check if we can actually vectorize it.*
7761	Inclusive scan:
7762	_26 = D.2043[_25];
7763	_27 = D.2042[_25];
7764	_28 = _26 + _27;
7765	D.2043[_25] = _28;
7766	D.2042[_25] = _28;
7767	should be vectorized as (where _40 is the vectorized rhs
7768	from the D.2042[_21] = 0; store):
7769	_30 = MEM <vector(8) int> [(int )&D.2043];*
7770	_31 = MEM <vector(8) int> [(int )&D.2042];*
7771	_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7772	_33 = _31 + _32;
7773	// _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7774	_34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7775	_35 = _33 + _34;
7776	// _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7777	// _31[1]+.._31[4], ... _31[4]+.._31[7] };
7778	_36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7779	_37 = _35 + _36;
7780	// _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7781	// _31[0]+.._31[4], ... _31[0]+.._31[7] };
7782	_38 = _30 + _37;
7783	_39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7784	MEM <vector(8) int> [(int )&D.2043] = _39;*
7785	MEM <vector(8) int> [(int )&D.2042] = _38;*
7786	Exclusive scan:
7787	_26 = D.2043[_25];
7788	D.2044[_25] = _26;
7789	_27 = D.2042[_25];
7790	_28 = _26 + _27;
7791	D.2043[_25] = _28;
7792	should be vectorized as (where _40 is the vectorized rhs
7793	from the D.2042[_21] = 0; store):
7794	_30 = MEM <vector(8) int> [(int )&D.2043];*
7795	_31 = MEM <vector(8) int> [(int )&D.2042];*
7796	_32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7797	_33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7798	_34 = _32 + _33;
7799	// _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7800	// _31[3]+_31[4], ... _31[5]+.._31[6] };
7801	_35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7802	_36 = _34 + _35;
7803	// _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7804	// _31[1]+.._31[4], ... _31[3]+.._31[6] };
7805	_37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7806	_38 = _36 + _37;
7807	// _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7808	// _31[0]+.._31[4], ... _31[0]+.._31[6] };
7809	_39 = _30 + _38;
7810	_50 = _31 + _39;
7811	_51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7812	MEM <vector(8) int> [(int )&D.2044] = _39;*
7813	MEM <vector(8) int> [(int )&D.2042] = _51; /
7814	enum machine_mode vec_mode = TYPE_MODE (vectype);
7815	optab optab = optab_for_tree_code (code, vectype, optab_default);
7816	if (!optab \|\| optab_handler (op: optab, mode: vec_mode) == CODE_FOR_nothing)
7817	goto fail;
7818
7819	int units_log2 = scan_store_can_perm_p (vectype, init: *init);
7820	if (units_log2 == -`1`)
7821	goto fail;
7822
7823	return true;
7824	}
7825
7826
7827	/ Function vectorizable_scan_store.*
7828
7829	Helper of vectorizable_score, arguments like on vectorizable_store.
7830	Handle only the transformation, checking is done in check_scan_store. /*
7831
7832	static bool
7833	vectorizable_scan_store (vec_info *vinfo,
7834	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7835	gimple *vec_stmt, int* ncopies)
7836	{
7837	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7838	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7839	tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7840	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7841
7842	if (dump_enabled_p ())
7843	dump_printf_loc (MSG_NOTE, vect_location,
7844	"transform scan store. ncopies = %d\n", ncopies);
7845
7846	gimple *stmt = STMT_VINFO_STMT (stmt_info);
7847	tree rhs = gimple_assign_rhs1 (gs: stmt);
7848	gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7849
7850	tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), `0`);
7851	bool inscan_var_store
7852	= lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7853
7854	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7855	{
7856	use_operand_p use_p;
7857	imm_use_iterator iter;
7858	FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7859	{
7860	gimple *use_stmt = USE_STMT (use_p);
7861	if (use_stmt == stmt \|\| is_gimple_debug (gs: use_stmt))
7862	continue;
7863	rhs = gimple_assign_lhs (gs: use_stmt);
7864	break;
7865	}
7866	}
7867
7868	gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7869	enum tree_code code = gimple_assign_rhs_code (gs: def_stmt);
7870	if (code == POINTER_PLUS_EXPR)
7871	code = PLUS_EXPR;
7872	gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7873	&& commutative_tree_code (code));
7874	tree rhs1 = gimple_assign_rhs1 (gs: def_stmt);
7875	tree rhs2 = gimple_assign_rhs2 (gs: def_stmt);
7876	gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7877	gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7878	gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7879	stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7880	stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7881	dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7882	dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7883	tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), `0`);
7884	tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), `0`);
7885
7886	if (lookup_attribute (attr_name: "omp simd inscan", DECL_ATTRIBUTES (var1)))
7887	{
7888	std::swap (a&: rhs1, b&: rhs2);
7889	std::swap (a&: var1, b&: var2);
7890	std::swap (a&: load1_dr_info, b&: load2_dr_info);
7891	}
7892
7893	tree *init = loop_vinfo->scan_map->get (k: var1);
7894	gcc_assert (init);
7895
7896	unsigned HOST_WIDE_INT nunits;
7897	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits))
7898	gcc_unreachable ();
7899	auto_vec<enum scan_store_kind, `16`> use_whole_vector;
7900	int units_log2 = scan_store_can_perm_p (vectype, init: *init, use_whole_vector: &use_whole_vector);
7901	gcc_assert (units_log2 > `0`);
7902	auto_vec<tree, `16`> perms;
7903	perms.quick_grow (len: units_log2 + `1`);
7904	tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7905	for (int i = `0`; i <= units_log2; ++i)
7906	{
7907	unsigned HOST_WIDE_INT j, k;
7908	vec_perm_builder sel (nunits, nunits, `1`);
7909	sel.quick_grow (len: nunits);
7910	if (i == units_log2)
7911	for (j = `0`; j < nunits; ++j)
7912	sel [j] = nunits - `1`;
7913	else
7914	{
7915	for (j = `0`; j < (HOST_WIDE_INT_1U << i); ++j)
7916	sel [j] = j;
7917	for (k = `0`; j < nunits; ++j, ++k)
7918	sel [j] = nunits + k;
7919	}
7920	vec_perm_indices indices (sel, i == units_log2 ? `1` : `2`, nunits);
7921	if (!use_whole_vector.is_empty ()
7922	&& use_whole_vector [i] != scan_store_kind_perm)
7923	{
7924	if (zero_vec == NULL_TREE)
7925	zero_vec = build_zero_cst (vectype);
7926	if (masktype == NULL_TREE
7927	&& use_whole_vector [i] == scan_store_kind_lshift_cond)
7928	masktype = truth_type_for (vectype);
7929	perms [i] = vect_gen_perm_mask_any (vectype, indices);
7930	}
7931	else
7932	perms [i] = vect_gen_perm_mask_checked (vectype, indices);
7933	}
7934
7935	tree vec_oprnd1 = NULL_TREE;
7936	tree vec_oprnd2 = NULL_TREE;
7937	tree vec_oprnd3 = NULL_TREE;
7938	tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7939	tree dataref_offset = build_int_cst (ref_type, `0`);
7940	tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7941	aggr_type: vectype, memory_access_type: VMAT_CONTIGUOUS);
7942	tree ldataref_ptr = NULL_TREE;
7943	tree orig = NULL_TREE;
7944	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4` && !inscan_var_store)
7945	ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7946	auto_vec<tree> vec_oprnds1;
7947	auto_vec<tree> vec_oprnds2;
7948	auto_vec<tree> vec_oprnds3;
7949	vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7950	op0: *init, vec_oprnds0: &vec_oprnds1,
7951	op1: ldataref_ptr == NULL ? rhs1 : NULL, vec_oprnds1: &vec_oprnds2,
7952	op2: rhs2, vec_oprnds2: &vec_oprnds3);
7953	for (int j = `0`; j < ncopies; j++)
7954	{
7955	vec_oprnd1 = vec_oprnds1 [j];
7956	if (ldataref_ptr == NULL)
7957	vec_oprnd2 = vec_oprnds2 [j];
7958	vec_oprnd3 = vec_oprnds3 [j];
7959	if (j == `0`)
7960	orig = vec_oprnd3;
7961	else if (!inscan_var_store)
7962	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7963
7964	if (ldataref_ptr)
7965	{
7966	vec_oprnd2 = make_ssa_name (var: vectype);
7967	tree data_ref = fold_build2 (MEM_REF, vectype,
7968	unshare_expr (ldataref_ptr),
7969	dataref_offset);
7970	vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7971	gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7972	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7973	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7974	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7975	}
7976
7977	tree v = vec_oprnd2;
7978	for (int i = `0`; i < units_log2; ++i)
7979	{
7980	tree new_temp = make_ssa_name (var: vectype);
7981	gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7982	(zero_vec
7983	&& (use_whole_vector [i]
7984	!= scan_store_kind_perm))
7985	? zero_vec : vec_oprnd1, v,
7986	perms [i]);
7987	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
7988	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
7989	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
7990
7991	if (zero_vec && use_whole_vector [i] == scan_store_kind_lshift_cond)
7992	{
7993	/ Whole vector shift shifted in zero bits, but if init
7994	is not initializer_zerop, we need to replace those elements
7995	with elements from vec_oprnd1. /*
7996	tree_vector_builder vb (masktype, nunits, `1`);
7997	for (unsigned HOST_WIDE_INT k = `0`; k < nunits; ++k)
7998	vb.quick_push (obj: k < (HOST_WIDE_INT_1U << i)
7999	? boolean_false_node : boolean_true_node);
8000
8001	tree new_temp2 = make_ssa_name (var: vectype);
8002	g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8003	new_temp, vec_oprnd1);
8004	vect_finish_stmt_generation (vinfo, stmt_info,
8005	vec_stmt: g, gsi);
8006	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8007	new_temp = new_temp2;
8008	}
8009
8010	/ For exclusive scan, perform the perms[i] permutation once*
8011	more. /*
8012	if (i == `0`
8013	&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`
8014	&& v == vec_oprnd2)
8015	{
8016	v = new_temp;
8017	--i;
8018	continue;
8019	}
8020
8021	tree new_temp2 = make_ssa_name (var: vectype);
8022	g = gimple_build_assign (new_temp2, code, v, new_temp);
8023	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8024	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8025
8026	v = new_temp2;
8027	}
8028
8029	tree new_temp = make_ssa_name (var: vectype);
8030	gimple *g = gimple_build_assign (new_temp, code, orig, v);
8031	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8032	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8033
8034	tree last_perm_arg = new_temp;
8035	/ For exclusive scan, new_temp computed above is the exclusive scan*
8036	prefix sum. Turn it into inclusive prefix sum for the broadcast
8037	of the last element into orig. /*
8038	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == `4`)
8039	{
8040	last_perm_arg = make_ssa_name (var: vectype);
8041	g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8042	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8043	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8044	}
8045
8046	orig = make_ssa_name (var: vectype);
8047	g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8048	last_perm_arg, perms [units_log2]);
8049	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8050	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8051
8052	if (!inscan_var_store)
8053	{
8054	tree data_ref = fold_build2 (MEM_REF, vectype,
8055	unshare_expr (dataref_ptr),
8056	dataref_offset);
8057	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8058	g = gimple_build_assign (data_ref, new_temp);
8059	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8060	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8061	}
8062	}
8063
8064	if (inscan_var_store)
8065	for (int j = `0`; j < ncopies; j++)
8066	{
8067	if (j != `0`)
8068	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8069
8070	tree data_ref = fold_build2 (MEM_REF, vectype,
8071	unshare_expr (dataref_ptr),
8072	dataref_offset);
8073	vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8074	gimple *g = gimple_build_assign (data_ref, orig);
8075	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
8076	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: g);
8077	}
8078	return true;
8079	}
8080
8081
8082	/ Function vectorizable_store.*
8083
8084	Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8085	that can be vectorized.
8086	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8087	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8088	Return true if STMT_INFO is vectorizable in this way. /*
8089
8090	static bool
8091	vectorizable_store (vec_info *vinfo,
8092	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8093	gimple **vec_stmt, slp_tree slp_node,
8094	stmt_vector_for_cost *cost_vec)
8095	{
8096	tree data_ref;
8097	tree vec_oprnd = NULL_TREE;
8098	tree elem_type;
8099	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
8100	class loop *loop = NULL;
8101	machine_mode vec_mode;
8102	tree dummy;
8103	enum vect_def_type rhs_dt = vect_unknown_def_type;
8104	enum vect_def_type mask_dt = vect_unknown_def_type;
8105	tree dataref_ptr = NULL_TREE;
8106	tree dataref_offset = NULL_TREE;
8107	gimple *ptr_incr = NULL;
8108	int ncopies;
8109	int j;
8110	stmt_vec_info first_stmt_info;
8111	bool grouped_store;
8112	unsigned int group_size, i;
8113	bool slp = (slp_node != NULL);
8114	unsigned int vec_num;
8115	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
8116	tree aggr_type;
8117	gather_scatter_info gs_info;
8118	poly_uint64 vf;
8119	vec_load_store_type vls_type;
8120	tree ref_type;
8121
8122	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8123	return false;
8124
8125	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8126	&& ! vec_stmt)
8127	return false;
8128
8129	/ Is vectorizable store? /
8130
8131	tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8132	slp_tree mask_node = NULL;
8133	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
8134	{
8135	tree scalar_dest = gimple_assign_lhs (gs: assign);
8136	if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8137	&& is_pattern_stmt_p (stmt_info))
8138	scalar_dest = TREE_OPERAND (scalar_dest, `0`);
8139	if (TREE_CODE (scalar_dest) != ARRAY_REF
8140	&& TREE_CODE (scalar_dest) != BIT_FIELD_REF
8141	&& TREE_CODE (scalar_dest) != INDIRECT_REF
8142	&& TREE_CODE (scalar_dest) != COMPONENT_REF
8143	&& TREE_CODE (scalar_dest) != IMAGPART_EXPR
8144	&& TREE_CODE (scalar_dest) != REALPART_EXPR
8145	&& TREE_CODE (scalar_dest) != MEM_REF)
8146	return false;
8147	}
8148	else
8149	{
8150	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
8151	if (!call \|\| !gimple_call_internal_p (gs: call))
8152	return false;
8153
8154	internal_fn ifn = gimple_call_internal_fn (gs: call);
8155	if (!internal_store_fn_p (ifn))
8156	return false;
8157
8158	int mask_index = internal_fn_mask_index (ifn);
8159	if (mask_index >= `0` && slp_node)
8160	mask_index = vect_slp_child_index_for_operand
8161	(call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8162	if (mask_index >= `0`
8163	&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8164	mask: &mask, mask_node: &mask_node, mask_dt_out: &mask_dt,
8165	mask_vectype_out: &mask_vectype))
8166	return false;
8167	}
8168
8169	/ Cannot have hybrid store SLP -- that would mean storing to the*
8170	same location twice. /*
8171	gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8172
8173	tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8174	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
8175
8176	if (loop_vinfo)
8177	{
8178	loop = LOOP_VINFO_LOOP (loop_vinfo);
8179	vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8180	}
8181	else
8182	vf = `1`;
8183
8184	/ Multiple types in SLP are handled by creating the appropriate number of*
8185	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8186	case of SLP. /*
8187	if (slp)
8188	ncopies = `1`;
8189	else
8190	ncopies = vect_get_num_copies (loop_vinfo, vectype);
8191
8192	gcc_assert (ncopies >= `1`);
8193
8194	/ FORNOW. This restriction should be relaxed. /
8195	if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > `1`)
8196	{
8197	if (dump_enabled_p ())
8198	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8199	"multiple types in nested loop.\n");
8200	return false;
8201	}
8202
8203	tree op;
8204	slp_tree op_node;
8205	if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8206	rhs: &op, rhs_node: &op_node, rhs_dt_out: &rhs_dt, rhs_vectype_out: &rhs_vectype, vls_type_out: &vls_type))
8207	return false;
8208
8209	elem_type = TREE_TYPE (vectype);
8210	vec_mode = TYPE_MODE (vectype);
8211
8212	if (!STMT_VINFO_DATA_REF (stmt_info))
8213	return false;
8214
8215	vect_memory_access_type memory_access_type;
8216	enum dr_alignment_support alignment_support_scheme;
8217	int misalignment;
8218	poly_int64 poffset;
8219	internal_fn lanes_ifn;
8220	if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type,
8221	ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
8222	alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
8223	lanes_ifn: &lanes_ifn))
8224	return false;
8225
8226	if (mask)
8227	{
8228	if (memory_access_type == VMAT_CONTIGUOUS)
8229	{
8230	if (!VECTOR_MODE_P (vec_mode)
8231	\|\| !can_vec_mask_load_store_p (vec_mode,
8232	TYPE_MODE (mask_vectype), false))
8233	return false;
8234	}
8235	else if (memory_access_type != VMAT_LOAD_STORE_LANES
8236	&& (memory_access_type != VMAT_GATHER_SCATTER
8237	\|\| (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8238	{
8239	if (dump_enabled_p ())
8240	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8241	"unsupported access type for masked store.\n");
8242	return false;
8243	}
8244	else if (memory_access_type == VMAT_GATHER_SCATTER
8245	&& gs_info.ifn == IFN_LAST
8246	&& !gs_info.decl)
8247	{
8248	if (dump_enabled_p ())
8249	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250	"unsupported masked emulated scatter.\n");
8251	return false;
8252	}
8253	}
8254	else
8255	{
8256	/ FORNOW. In some cases can vectorize even if data-type not supported*
8257	(e.g. - array initialization with 0). /*
8258	if (optab_handler (op: mov_optab, mode: vec_mode) == CODE_FOR_nothing)
8259	return false;
8260	}
8261
8262	dr_vec_info dr_info = STMT_VINFO_DR_INFO (stmt_info), first_dr_info = NULL;
8263	grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8264	&& memory_access_type != VMAT_GATHER_SCATTER
8265	&& (slp \|\| memory_access_type != VMAT_CONTIGUOUS));
8266	if (grouped_store)
8267	{
8268	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8269	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8270	group_size = DR_GROUP_SIZE (first_stmt_info);
8271	}
8272	else
8273	{
8274	first_stmt_info = stmt_info;
8275	first_dr_info = dr_info;
8276	group_size = vec_num = `1`;
8277	}
8278
8279	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > `1` && !vec_stmt)
8280	{
8281	if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8282	memory_access_type))
8283	return false;
8284	}
8285
8286	bool costing_p = !vec_stmt;
8287	if (costing_p) / transformation not required. /
8288	{
8289	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8290
8291	if (loop_vinfo
8292	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8293	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8294	vls_type, group_size,
8295	memory_access_type, gs_info: &gs_info,
8296	scalar_mask: mask);
8297
8298	if (slp_node
8299	&& (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8300	\|\| (mask
8301	&& !vect_maybe_update_slp_op_vectype (mask_node,
8302	mask_vectype))))
8303	{
8304	if (dump_enabled_p ())
8305	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8306	"incompatible vector types for invariants\n");
8307	return false;
8308	}
8309
8310	if (dump_enabled_p ()
8311	&& memory_access_type != VMAT_ELEMENTWISE
8312	&& memory_access_type != VMAT_GATHER_SCATTER
8313	&& alignment_support_scheme != dr_aligned)
8314	dump_printf_loc (MSG_NOTE, vect_location,
8315	"Vectorizing an unaligned access.\n");
8316
8317	STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8318
8319	/ As function vect_transform_stmt shows, for interleaving stores*
8320	the whole chain is vectorized when the last store in the chain
8321	is reached, the other stores in the group are skipped. So we
8322	want to only cost the last one here, but it's not trivial to
8323	get the last, as it's equivalent to use the first one for
8324	costing, use the first one instead. /*
8325	if (grouped_store
8326	&& !slp
8327	&& first_stmt_info != stmt_info)
8328	return true;
8329	}
8330	gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8331
8332	/ Transform. /
8333
8334	ensure_base_align (dr_info);
8335
8336	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= `3`)
8337	{
8338	gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8339	gcc_assert (!slp);
8340	if (costing_p)
8341	{
8342	unsigned int inside_cost = `0`, prologue_cost = `0`;
8343	if (vls_type == VLS_STORE_INVARIANT)
8344	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
8345	stmt_info, misalign: `0`, where: vect_prologue);
8346	vect_get_store_cost (vinfo, stmt_info, ncopies,
8347	alignment_support_scheme, misalignment,
8348	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8349
8350	if (dump_enabled_p ())
8351	dump_printf_loc (MSG_NOTE, vect_location,
8352	"vect_model_store_cost: inside_cost = %d, "
8353	"prologue_cost = %d .\n",
8354	inside_cost, prologue_cost);
8355
8356	return true;
8357	}
8358	return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8359	}
8360
8361	if (grouped_store)
8362	{
8363	/ FORNOW /
8364	gcc_assert (!loop \|\| !nested_in_vect_loop_p (loop, stmt_info));
8365
8366	if (slp)
8367	{
8368	grouped_store = false;
8369	/ VEC_NUM is the number of vect stmts to be created for this*
8370	group. /*
8371	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8372	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
8373	gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8374	== first_stmt_info);
8375	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8376	op = vect_get_store_rhs (stmt_info: first_stmt_info);
8377	}
8378	else
8379	/ VEC_NUM is the number of vect stmts to be created for this*
8380	group. /*
8381	vec_num = group_size;
8382
8383	ref_type = get_group_alias_ptr_type (first_stmt_info);
8384	}
8385	else
8386	ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8387
8388	if (!costing_p && dump_enabled_p ())
8389	dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8390	ncopies);
8391
8392	/ Check if we need to update prologue cost for invariant,*
8393	and update it accordingly if so. If it's not for
8394	interleaving store, we can just check vls_type; but if
8395	it's for interleaving store, need to check the def_type
8396	of the stored value since the current vls_type is just
8397	for first_stmt_info. /*
8398	auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8399	{
8400	gcc_assert (costing_p);
8401	if (slp)
8402	return;
8403	if (grouped_store)
8404	{
8405	gcc_assert (store_rhs);
8406	enum vect_def_type cdt;
8407	gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8408	if (cdt != vect_constant_def && cdt != vect_external_def)
8409	return;
8410	}
8411	else if (vls_type != VLS_STORE_INVARIANT)
8412	return;
8413	*prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec, stmt_info,
8414	misalign: `0`, where: vect_prologue);
8415	};
8416
8417	if (memory_access_type == VMAT_ELEMENTWISE
8418	\|\| memory_access_type == VMAT_STRIDED_SLP)
8419	{
8420	unsigned inside_cost = `0`, prologue_cost = `0`;
8421	gimple_stmt_iterator incr_gsi;
8422	bool insert_after;
8423	gimple *incr;
8424	tree offvar;
8425	tree ivstep;
8426	tree running_off;
8427	tree stride_base, stride_step, alias_off;
8428	tree vec_oprnd = NULL_TREE;
8429	tree dr_offset;
8430	unsigned int g;
8431	/ Checked by get_load_store_type. /
8432	unsigned int const_nunits = nunits.to_constant ();
8433
8434	gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8435	gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8436
8437	dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
8438	stride_base
8439	= fold_build_pointer_plus
8440	(DR_BASE_ADDRESS (first_dr_info->dr),
8441	size_binop (PLUS_EXPR,
8442	convert_to_ptrofftype (dr_offset),
8443	convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8444	stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8445
8446	/ For a store with loop-invariant (but other than power-of-2)*
8447	stride (i.e. not a grouped access) like so:
8448
8449	for (i = 0; i < n; i += stride)
8450	array[i] = ...;
8451
8452	we generate a new induction variable and new stores from
8453	the components of the (vectorized) rhs:
8454
8455	for (j = 0; ; j += VFstride)*
8456	vectemp = ...;
8457	tmp1 = vectemp[0];
8458	array[j] = tmp1;
8459	tmp2 = vectemp[1];
8460	array[j + stride] = tmp2;
8461	...
8462	*/
8463
8464	unsigned nstores = const_nunits;
8465	unsigned lnel = `1`;
8466	tree ltype = elem_type;
8467	tree lvectype = vectype;
8468	if (slp)
8469	{
8470	if (group_size < const_nunits
8471	&& const_nunits % group_size == `0`)
8472	{
8473	nstores = const_nunits / group_size;
8474	lnel = group_size;
8475	ltype = build_vector_type (elem_type, group_size);
8476	lvectype = vectype;
8477
8478	/ First check if vec_extract optab doesn't support extraction*
8479	of vector elts directly. /*
8480	scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8481	machine_mode vmode;
8482	if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8483	\|\| !related_vector_mode (TYPE_MODE (vectype), elmode,
8484	group_size).exists (mode: &vmode)
8485	\|\| (convert_optab_handler (op: vec_extract_optab,
8486	TYPE_MODE (vectype), from_mode: vmode)
8487	== CODE_FOR_nothing))
8488	{
8489	/ Try to avoid emitting an extract of vector elements*
8490	by performing the extracts using an integer type of the
8491	same size, extracting from a vector of those and then
8492	re-interpreting it as the original vector type if
8493	supported. /*
8494	unsigned lsize
8495	= group_size * GET_MODE_BITSIZE (mode: elmode);
8496	unsigned int lnunits = const_nunits / group_size;
8497	/ If we can't construct such a vector fall back to*
8498	element extracts from the original vector type and
8499	element size stores. /*
8500	if (int_mode_for_size (size: lsize, limit: `0`).exists (mode: &elmode)
8501	&& VECTOR_MODE_P (TYPE_MODE (vectype))
8502	&& related_vector_mode (TYPE_MODE (vectype), elmode,
8503	lnunits).exists (mode: &vmode)
8504	&& (convert_optab_handler (op: vec_extract_optab,
8505	to_mode: vmode, from_mode: elmode)
8506	!= CODE_FOR_nothing))
8507	{
8508	nstores = lnunits;
8509	lnel = group_size;
8510	ltype = build_nonstandard_integer_type (lsize, `1`);
8511	lvectype = build_vector_type (ltype, nstores);
8512	}
8513	/ Else fall back to vector extraction anyway.*
8514	Fewer stores are more important than avoiding spilling
8515	of the vector we extract from. Compared to the
8516	construction case in vectorizable_load no store-forwarding
8517	issue exists here for reasonable archs. /*
8518	}
8519	}
8520	else if (group_size >= const_nunits
8521	&& group_size % const_nunits == `0`)
8522	{
8523	int mis_align = dr_misalignment (dr_info: first_dr_info, vectype);
8524	dr_alignment_support dr_align
8525	= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8526	mis_align);
8527	if (dr_align == dr_aligned
8528	\|\| dr_align == dr_unaligned_supported)
8529	{
8530	nstores = `1`;
8531	lnel = const_nunits;
8532	ltype = vectype;
8533	lvectype = vectype;
8534	alignment_support_scheme = dr_align;
8535	misalignment = mis_align;
8536	}
8537	}
8538	ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8539	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8540	}
8541
8542	if (!costing_p)
8543	{
8544	ivstep = stride_step;
8545	ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8546	build_int_cst (TREE_TYPE (ivstep), vf));
8547
8548	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8549
8550	stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8551	ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8552	create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8553	insert_after, &offvar, NULL);
8554	incr = gsi_stmt (i: incr_gsi);
8555
8556	stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8557	}
8558
8559	alias_off = build_int_cst (ref_type, `0`);
8560	stmt_vec_info next_stmt_info = first_stmt_info;
8561	auto_vec<tree> vec_oprnds;
8562	/ For costing some adjacent vector stores, we'd like to cost with*
8563	the total number of them once instead of cost each one by one. /*
8564	unsigned int n_adjacent_stores = `0`;
8565	for (g = `0`; g < group_size; g++)
8566	{
8567	running_off = offvar;
8568	if (!costing_p)
8569	{
8570	if (g)
8571	{
8572	tree size = TYPE_SIZE_UNIT (ltype);
8573	tree pos
8574	= fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8575	tree newoff = copy_ssa_name (var: running_off, NULL);
8576	incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8577	running_off, pos);
8578	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8579	running_off = newoff;
8580	}
8581	}
8582	if (!slp)
8583	op = vect_get_store_rhs (stmt_info: next_stmt_info);
8584	if (!costing_p)
8585	vect_get_vec_defs (vinfo, stmt_info: next_stmt_info, slp_node, ncopies, op0: op,
8586	vec_oprnds0: &vec_oprnds);
8587	else
8588	update_prologue_cost (&prologue_cost, op);
8589	unsigned int group_el = `0`;
8590	unsigned HOST_WIDE_INT
8591	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8592	for (j = `0`; j < ncopies; j++)
8593	{
8594	if (!costing_p)
8595	{
8596	vec_oprnd = vec_oprnds [j];
8597	/ Pun the vector to extract from if necessary. /
8598	if (lvectype != vectype)
8599	{
8600	tree tem = make_ssa_name (var: lvectype);
8601	tree cvt
8602	= build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8603	gimple *pun = gimple_build_assign (tem, cvt);
8604	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: pun, gsi);
8605	vec_oprnd = tem;
8606	}
8607	}
8608	for (i = `0`; i < nstores; i++)
8609	{
8610	if (costing_p)
8611	{
8612	/ Only need vector extracting when there are more*
8613	than one stores. /*
8614	if (nstores > `1`)
8615	inside_cost
8616	+= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_to_scalar,
8617	stmt_info, misalign: `0`, where: vect_body);
8618	/ Take a single lane vector type store as scalar*
8619	store to avoid ICE like 110776. /*
8620	if (VECTOR_TYPE_P (ltype)
8621	&& known_ne (TYPE_VECTOR_SUBPARTS (ltype), `1U`))
8622	n_adjacent_stores++;
8623	else
8624	inside_cost
8625	+= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_store,
8626	stmt_info, misalign: `0`, where: vect_body);
8627	continue;
8628	}
8629	tree newref, newoff;
8630	gimple incr, assign;
8631	tree size = TYPE_SIZE (ltype);
8632	/ Extract the i'th component. /
8633	tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8634	bitsize_int (i), size);
8635	tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8636	size, pos);
8637
8638	elem = force_gimple_operand_gsi (gsi, elem, true,
8639	NULL_TREE, true,
8640	GSI_SAME_STMT);
8641
8642	tree this_off = build_int_cst (TREE_TYPE (alias_off),
8643	group_el * elsz);
8644	newref = build2 (MEM_REF, ltype,
8645	running_off, this_off);
8646	vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8647
8648	/ And store it to running_off. /*
8649	assign = gimple_build_assign (newref, elem);
8650	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: assign, gsi);
8651
8652	group_el += lnel;
8653	if (! slp
8654	\|\| group_el == group_size)
8655	{
8656	newoff = copy_ssa_name (var: running_off, NULL);
8657	incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8658	running_off, stride_step);
8659	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
8660
8661	running_off = newoff;
8662	group_el = `0`;
8663	}
8664	if (g == group_size - `1`
8665	&& !slp)
8666	{
8667	if (j == `0` && i == `0`)
8668	*vec_stmt = assign;
8669	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: assign);
8670	}
8671	}
8672	}
8673	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8674	vec_oprnds.truncate(size: `0`);
8675	if (slp)
8676	break;
8677	}
8678
8679	if (costing_p)
8680	{
8681	if (n_adjacent_stores > `0`)
8682	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8683	alignment_support_scheme, misalignment,
8684	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8685	if (dump_enabled_p ())
8686	dump_printf_loc (MSG_NOTE, vect_location,
8687	"vect_model_store_cost: inside_cost = %d, "
8688	"prologue_cost = %d .\n",
8689	inside_cost, prologue_cost);
8690	}
8691
8692	return true;
8693	}
8694
8695	gcc_assert (alignment_support_scheme);
8696	vec_loop_masks *loop_masks
8697	= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8698	? &LOOP_VINFO_MASKS (loop_vinfo)
8699	: NULL);
8700	vec_loop_lens *loop_lens
8701	= (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8702	? &LOOP_VINFO_LENS (loop_vinfo)
8703	: NULL);
8704
8705	/ The vect_transform_stmt and vect_analyze_stmt will go here but there*
8706	are some difference here. We cannot enable both the lens and masks
8707	during transform but it is allowed during analysis.
8708	Shouldn't go with length-based approach if fully masked. /*
8709	if (cost_vec == NULL)
8710	/ The cost_vec is NULL during transfrom. /
8711	gcc_assert ((!loop_lens \|\| !loop_masks));
8712
8713	/ Targets with store-lane instructions must not require explicit*
8714	realignment. vect_supportable_dr_alignment always returns either
8715	dr_aligned or dr_unaligned_supported for masked operations. /*
8716	gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8717	&& !mask
8718	&& !loop_masks)
8719	\|\| alignment_support_scheme == dr_aligned
8720	\|\| alignment_support_scheme == dr_unaligned_supported);
8721
8722	tree offset = NULL_TREE;
8723	if (!known_eq (poffset, `0`))
8724	offset = size_int (poffset);
8725
8726	tree bump;
8727	tree vec_offset = NULL_TREE;
8728	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8729	{
8730	aggr_type = NULL_TREE;
8731	bump = NULL_TREE;
8732	}
8733	else if (memory_access_type == VMAT_GATHER_SCATTER)
8734	{
8735	aggr_type = elem_type;
8736	if (!costing_p)
8737	vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
8738	dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
8739	}
8740	else
8741	{
8742	if (memory_access_type == VMAT_LOAD_STORE_LANES)
8743	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8744	else
8745	aggr_type = vectype;
8746	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8747	memory_access_type, loop_lens);
8748	}
8749
8750	if (mask && !costing_p)
8751	LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8752
8753	/ In case the vectorization factor (VF) is bigger than the number*
8754	of elements that we can fit in a vectype (nunits), we have to generate
8755	more than one vector stmt - i.e - we need to "unroll" the
8756	vector stmt by a factor VF/nunits. /*
8757
8758	/ In case of interleaving (non-unit grouped access):*
8759
8760	S1: &base + 2 = x2
8761	S2: &base = x0
8762	S3: &base + 1 = x1
8763	S4: &base + 3 = x3
8764
8765	We create vectorized stores starting from base address (the access of the
8766	first stmt in the chain (S2 in the above example), when the last store stmt
8767	of the chain (S4) is reached:
8768
8769	VS1: &base = vx2
8770	VS2: &base + vec_size1 = vx0*
8771	VS3: &base + vec_size2 = vx1*
8772	VS4: &base + vec_size3 = vx3*
8773
8774	Then permutation statements are generated:
8775
8776	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8777	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8778	...
8779
8780	And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8781	(the order of the data-refs in the output of vect_permute_store_chain
8782	corresponds to the order of scalar stmts in the interleaving chain - see
8783	the documentation of vect_permute_store_chain()).
8784
8785	In case of both multiple types and interleaving, above vector stores and
8786	permutation stmts are created for every copy. The result vector stmts are
8787	put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8788	STMT_VINFO_RELATED_STMT for the next copies.
8789	*/
8790
8791	auto_vec<tree> dr_chain (group_size);
8792	auto_vec<tree> vec_masks;
8793	tree vec_mask = NULL;
8794	auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8795	for (i = `0`; i < group_size; i++)
8796	gvec_oprnds.quick_push (obj: new auto_vec<tree> ());
8797
8798	if (memory_access_type == VMAT_LOAD_STORE_LANES)
8799	{
8800	gcc_assert (!slp && grouped_store);
8801	unsigned inside_cost = `0`, prologue_cost = `0`;
8802	/ For costing some adjacent vector stores, we'd like to cost with*
8803	the total number of them once instead of cost each one by one. /*
8804	unsigned int n_adjacent_stores = `0`;
8805	for (j = `0`; j < ncopies; j++)
8806	{
8807	gimple *new_stmt;
8808	if (j == `0`)
8809	{
8810	/ For interleaved stores we collect vectorized defs for all*
8811	the stores in the group in DR_CHAIN. DR_CHAIN is then used
8812	as an input to vect_permute_store_chain(). /*
8813	stmt_vec_info next_stmt_info = first_stmt_info;
8814	for (i = `0`; i < group_size; i++)
8815	{
8816	/ Since gaps are not supported for interleaved stores,*
8817	DR_GROUP_SIZE is the exact number of stmts in the
8818	chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. /*
8819	op = vect_get_store_rhs (stmt_info: next_stmt_info);
8820	if (costing_p)
8821	update_prologue_cost (&prologue_cost, op);
8822	else
8823	{
8824	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
8825	ncopies, op,
8826	vec_oprnds: gvec_oprnds [i]);
8827	vec_oprnd = (*gvec_oprnds [i])[`0`];
8828	dr_chain.quick_push (obj: vec_oprnd);
8829	}
8830	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8831	}
8832
8833	if (!costing_p)
8834	{
8835	if (mask)
8836	{
8837	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
8838	op: mask, vec_oprnds: &vec_masks,
8839	vectype: mask_vectype);
8840	vec_mask = vec_masks [`0`];
8841	}
8842
8843	/ We should have catched mismatched types earlier. /
8844	gcc_assert (
8845	useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8846	dataref_ptr
8847	= vect_create_data_ref_ptr (vinfo, first_stmt_info,
8848	aggr_type, NULL, offset, &dummy,
8849	gsi, &ptr_incr, false, bump);
8850	}
8851	}
8852	else if (!costing_p)
8853	{
8854	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8855	/ DR_CHAIN is then used as an input to*
8856	vect_permute_store_chain(). /*
8857	for (i = `0`; i < group_size; i++)
8858	{
8859	vec_oprnd = (*gvec_oprnds [i])[j];
8860	dr_chain [i] = vec_oprnd;
8861	}
8862	if (mask)
8863	vec_mask = vec_masks [j];
8864	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8865	stmt_info, bump);
8866	}
8867
8868	if (costing_p)
8869	{
8870	n_adjacent_stores += vec_num;
8871	continue;
8872	}
8873
8874	/ Get an array into which we can store the individual vectors. /
8875	tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
8876
8877	/ Invalidate the current contents of VEC_ARRAY. This should*
8878	become an RTL clobber too, which prevents the vector registers
8879	from being upward-exposed. /*
8880	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8881
8882	/ Store the individual vectors into the array. /
8883	for (i = `0`; i < vec_num; i++)
8884	{
8885	vec_oprnd = dr_chain [i];
8886	write_vector_array (vinfo, stmt_info, gsi, vect: vec_oprnd, array: vec_array,
8887	n: i);
8888	}
8889
8890	tree final_mask = NULL;
8891	tree final_len = NULL;
8892	tree bias = NULL;
8893	if (loop_masks)
8894	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8895	ncopies, vectype, j);
8896	if (vec_mask)
8897	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
8898	vec_mask, gsi);
8899
8900	if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8901	{
8902	if (loop_lens)
8903	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8904	ncopies, vectype, j, `1`);
8905	else
8906	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8907	signed char biasval
8908	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8909	bias = build_int_cst (intQI_type_node, biasval);
8910	if (!final_mask)
8911	{
8912	mask_vectype = truth_type_for (vectype);
8913	final_mask = build_minus_one_cst (mask_vectype);
8914	}
8915	}
8916
8917	gcall *call;
8918	if (final_len && final_mask)
8919	{
8920	/ Emit:*
8921	MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8922	LEN, BIAS, VEC_ARRAY). /*
8923	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8924	tree alias_ptr = build_int_cst (ref_type, align);
8925	call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, `6`,
8926	dataref_ptr, alias_ptr,
8927	final_mask, final_len, bias,
8928	vec_array);
8929	}
8930	else if (final_mask)
8931	{
8932	/ Emit:*
8933	MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8934	VEC_ARRAY). /*
8935	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8936	tree alias_ptr = build_int_cst (ref_type, align);
8937	call = gimple_build_call_internal (IFN_MASK_STORE_LANES, `4`,
8938	dataref_ptr, alias_ptr,
8939	final_mask, vec_array);
8940	}
8941	else
8942	{
8943	/ Emit:*
8944	MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). /*
8945	data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
8946	call = gimple_build_call_internal (IFN_STORE_LANES, `1`, vec_array);
8947	gimple_call_set_lhs (gs: call, lhs: data_ref);
8948	}
8949	gimple_call_set_nothrow (s: call, nothrow_p: true);
8950	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
8951	new_stmt = call;
8952
8953	/ Record that VEC_ARRAY is now dead. /
8954	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
8955	if (j == `0`)
8956	*vec_stmt = new_stmt;
8957	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
8958	}
8959
8960	if (costing_p)
8961	{
8962	if (n_adjacent_stores > `0`)
8963	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
8964	alignment_support_scheme, misalignment,
8965	inside_cost: &inside_cost, body_cost_vec: cost_vec);
8966	if (dump_enabled_p ())
8967	dump_printf_loc (MSG_NOTE, vect_location,
8968	"vect_model_store_cost: inside_cost = %d, "
8969	"prologue_cost = %d .\n",
8970	inside_cost, prologue_cost);
8971	}
8972
8973	return true;
8974	}
8975
8976	if (memory_access_type == VMAT_GATHER_SCATTER)
8977	{
8978	gcc_assert (!grouped_store);
8979	auto_vec<tree> vec_offsets;
8980	unsigned int inside_cost = `0`, prologue_cost = `0`;
8981	for (j = `0`; j < ncopies; j++)
8982	{
8983	gimple *new_stmt;
8984	if (j == `0`)
8985	{
8986	if (costing_p && vls_type == VLS_STORE_INVARIANT)
8987	prologue_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec,
8988	stmt_info, misalign: `0`, where: vect_prologue);
8989	else if (!costing_p)
8990	{
8991	/ Since the store is not grouped, DR_GROUP_SIZE is 1, and*
8992	DR_CHAIN is of size 1. /*
8993	gcc_assert (group_size == `1`);
8994	if (slp_node)
8995	vect_get_slp_defs (op_node, gvec_oprnds [`0`]);
8996	else
8997	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: first_stmt_info,
8998	ncopies, op, vec_oprnds: gvec_oprnds [`0`]);
8999	if (mask)
9000	{
9001	if (slp_node)
9002	vect_get_slp_defs (mask_node, &vec_masks);
9003	else
9004	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info,
9005	ncopies,
9006	op: mask, vec_oprnds: &vec_masks,
9007	vectype: mask_vectype);
9008	}
9009
9010	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9011	vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9012	slp_node, gs_info: &gs_info,
9013	dataref_ptr: &dataref_ptr, vec_offset: &vec_offsets);
9014	else
9015	dataref_ptr
9016	= vect_create_data_ref_ptr (vinfo, first_stmt_info,
9017	aggr_type, NULL, offset,
9018	&dummy, gsi, &ptr_incr, false,
9019	bump);
9020	}
9021	}
9022	else if (!costing_p)
9023	{
9024	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9025	if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9026	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9027	gsi, stmt_info, bump);
9028	}
9029
9030	new_stmt = NULL;
9031	for (i = `0`; i < vec_num; ++i)
9032	{
9033	if (!costing_p)
9034	{
9035	vec_oprnd = (gvec_oprnds [`0`])[vec_num j + i];
9036	if (mask)
9037	vec_mask = vec_masks [vec_num * j + i];
9038	/ We should have catched mismatched types earlier. /
9039	gcc_assert (useless_type_conversion_p (vectype,
9040	TREE_TYPE (vec_oprnd)));
9041	}
9042	unsigned HOST_WIDE_INT align;
9043	tree final_mask = NULL_TREE;
9044	tree final_len = NULL_TREE;
9045	tree bias = NULL_TREE;
9046	if (!costing_p)
9047	{
9048	if (loop_masks)
9049	final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9050	loop_masks, ncopies,
9051	vectype, j);
9052	if (vec_mask)
9053	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
9054	loop_mask: final_mask, vec_mask, gsi);
9055	}
9056
9057	if (gs_info.ifn != IFN_LAST)
9058	{
9059	if (costing_p)
9060	{
9061	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9062	inside_cost
9063	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9064	stmt_info, misalign: `0`, where: vect_body);
9065	continue;
9066	}
9067
9068	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9069	vec_offset = vec_offsets [vec_num * j + i];
9070	tree scale = size_int (gs_info.scale);
9071
9072	if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9073	{
9074	if (loop_lens)
9075	final_len = vect_get_loop_len (loop_vinfo, gsi,
9076	loop_lens, ncopies,
9077	vectype, j, `1`);
9078	else
9079	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9080	signed char biasval
9081	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9082	bias = build_int_cst (intQI_type_node, biasval);
9083	if (!final_mask)
9084	{
9085	mask_vectype = truth_type_for (vectype);
9086	final_mask = build_minus_one_cst (mask_vectype);
9087	}
9088	}
9089
9090	gcall *call;
9091	if (final_len && final_mask)
9092	call = gimple_build_call_internal
9093	(IFN_MASK_LEN_SCATTER_STORE, `7`, dataref_ptr,
9094	vec_offset, scale, vec_oprnd, final_mask,
9095	final_len, bias);
9096	else if (final_mask)
9097	call = gimple_build_call_internal
9098	(IFN_MASK_SCATTER_STORE, `5`, dataref_ptr,
9099	vec_offset, scale, vec_oprnd, final_mask);
9100	else
9101	call = gimple_build_call_internal (IFN_SCATTER_STORE, `4`,
9102	dataref_ptr, vec_offset,
9103	scale, vec_oprnd);
9104	gimple_call_set_nothrow (s: call, nothrow_p: true);
9105	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9106	new_stmt = call;
9107	}
9108	else if (gs_info.decl)
9109	{
9110	/ The builtin decls path for scatter is legacy, x86 only. /
9111	gcc_assert (nunits.is_constant ()
9112	&& (!final_mask
9113	\|\| SCALAR_INT_MODE_P
9114	(TYPE_MODE (TREE_TYPE (final_mask)))));
9115	if (costing_p)
9116	{
9117	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9118	inside_cost
9119	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9120	stmt_info, misalign: `0`, where: vect_body);
9121	continue;
9122	}
9123	poly_uint64 offset_nunits
9124	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
9125	if (known_eq (nunits, offset_nunits))
9126	{
9127	new_stmt = vect_build_one_scatter_store_call
9128	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9129	ptr: dataref_ptr, offset: vec_offsets [vec_num * j + i],
9130	oprnd: vec_oprnd, mask: final_mask);
9131	vect_finish_stmt_generation (vinfo, stmt_info,
9132	vec_stmt: new_stmt, gsi);
9133	}
9134	else if (known_eq (nunits, offset_nunits * `2`))
9135	{
9136	/ We have a offset vector with half the number of*
9137	lanes but the builtins will store full vectype
9138	data from the lower lanes. /*
9139	new_stmt = vect_build_one_scatter_store_call
9140	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9141	ptr: dataref_ptr,
9142	offset: vec_offsets [`2` * vec_num * j + `2` * i],
9143	oprnd: vec_oprnd, mask: final_mask);
9144	vect_finish_stmt_generation (vinfo, stmt_info,
9145	vec_stmt: new_stmt, gsi);
9146	int count = nunits.to_constant ();
9147	vec_perm_builder sel (count, count, `1`);
9148	sel.quick_grow (len: count);
9149	for (int i = `0`; i < count; ++i)
9150	sel [i] = i \| (count / `2`);
9151	vec_perm_indices indices (sel, `2`, count);
9152	tree perm_mask
9153	= vect_gen_perm_mask_checked (vectype, indices);
9154	new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9155	vec_oprnd, vec_oprnd,
9156	perm_mask);
9157	vec_oprnd = make_ssa_name (var: vectype);
9158	gimple_set_lhs (new_stmt, vec_oprnd);
9159	vect_finish_stmt_generation (vinfo, stmt_info,
9160	vec_stmt: new_stmt, gsi);
9161	if (final_mask)
9162	{
9163	new_stmt = gimple_build_assign (NULL_TREE,
9164	VEC_UNPACK_HI_EXPR,
9165	final_mask);
9166	final_mask = make_ssa_name
9167	(var: truth_type_for (gs_info.offset_vectype));
9168	gimple_set_lhs (new_stmt, final_mask);
9169	vect_finish_stmt_generation (vinfo, stmt_info,
9170	vec_stmt: new_stmt, gsi);
9171	}
9172	new_stmt = vect_build_one_scatter_store_call
9173	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9174	ptr: dataref_ptr,
9175	offset: vec_offsets [`2` * vec_num * j + `2` * i + `1`],
9176	oprnd: vec_oprnd, mask: final_mask);
9177	vect_finish_stmt_generation (vinfo, stmt_info,
9178	vec_stmt: new_stmt, gsi);
9179	}
9180	else if (known_eq (nunits * `2`, offset_nunits))
9181	{
9182	/ We have a offset vector with double the number of*
9183	lanes. Select the low/high part accordingly. /*
9184	vec_offset = vec_offsets [(vec_num * j + i) / `2`];
9185	if ((vec_num * j + i) & `1`)
9186	{
9187	int count = offset_nunits.to_constant ();
9188	vec_perm_builder sel (count, count, `1`);
9189	sel.quick_grow (len: count);
9190	for (int i = `0`; i < count; ++i)
9191	sel [i] = i \| (count / `2`);
9192	vec_perm_indices indices (sel, `2`, count);
9193	tree perm_mask = vect_gen_perm_mask_checked
9194	(TREE_TYPE (vec_offset), indices);
9195	new_stmt = gimple_build_assign (NULL_TREE,
9196	VEC_PERM_EXPR,
9197	vec_offset,
9198	vec_offset,
9199	perm_mask);
9200	vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9201	gimple_set_lhs (new_stmt, vec_offset);
9202	vect_finish_stmt_generation (vinfo, stmt_info,
9203	vec_stmt: new_stmt, gsi);
9204	}
9205	new_stmt = vect_build_one_scatter_store_call
9206	(vinfo, stmt_info, gsi, gs_info: &gs_info,
9207	ptr: dataref_ptr, offset: vec_offset,
9208	oprnd: vec_oprnd, mask: final_mask);
9209	vect_finish_stmt_generation (vinfo, stmt_info,
9210	vec_stmt: new_stmt, gsi);
9211	}
9212	else
9213	gcc_unreachable ();
9214	}
9215	else
9216	{
9217	/ Emulated scatter. /
9218	gcc_assert (!final_mask);
9219	if (costing_p)
9220	{
9221	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
9222	/ For emulated scatter N offset vector element extracts*
9223	(we assume the scalar scaling and ptr + offset add is
9224	consumed by the load). /*
9225	inside_cost
9226	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9227	stmt_info, misalign: `0`, where: vect_body);
9228	/ N scalar stores plus extracting the elements. /
9229	inside_cost
9230	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: vec_to_scalar,
9231	stmt_info, misalign: `0`, where: vect_body);
9232	inside_cost
9233	+= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_store,
9234	stmt_info, misalign: `0`, where: vect_body);
9235	continue;
9236	}
9237
9238	unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9239	unsigned HOST_WIDE_INT const_offset_nunits
9240	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype).to_constant ();
9241	vec<constructor_elt, va_gc> *ctor_elts;
9242	vec_alloc (v&: ctor_elts, nelems: const_nunits);
9243	gimple_seq stmts = NULL;
9244	tree elt_type = TREE_TYPE (vectype);
9245	unsigned HOST_WIDE_INT elt_size
9246	= tree_to_uhwi (TYPE_SIZE (elt_type));
9247	/ We support offset vectors with more elements*
9248	than the data vector for now. /*
9249	unsigned HOST_WIDE_INT factor
9250	= const_offset_nunits / const_nunits;
9251	vec_offset = vec_offsets [(vec_num * j + i) / factor];
9252	unsigned elt_offset
9253	= ((vec_num * j + i) % factor) * const_nunits;
9254	tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9255	tree scale = size_int (gs_info.scale);
9256	align = get_object_alignment (DR_REF (first_dr_info->dr));
9257	tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9258	for (unsigned k = `0`; k < const_nunits; ++k)
9259	{
9260	/ Compute the offsetted pointer. /
9261	tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9262	bitsize_int (k + elt_offset));
9263	tree idx
9264	= gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
9265	ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
9266	idx = gimple_convert (seq: &stmts, sizetype, op: idx);
9267	idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype,
9268	ops: idx, ops: scale);
9269	tree ptr
9270	= gimple_build (seq: &stmts, code: PLUS_EXPR,
9271	TREE_TYPE (dataref_ptr),
9272	ops: dataref_ptr, ops: idx);
9273	ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
9274	/ Extract the element to be stored. /
9275	tree elt
9276	= gimple_build (seq: &stmts, code: BIT_FIELD_REF,
9277	TREE_TYPE (vectype),
9278	ops: vec_oprnd, TYPE_SIZE (elt_type),
9279	bitsize_int (k * elt_size));
9280	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9281	stmts = NULL;
9282	tree ref
9283	= build2 (MEM_REF, ltype, ptr,
9284	build_int_cst (ref_type, `0`));
9285	new_stmt = gimple_build_assign (ref, elt);
9286	vect_finish_stmt_generation (vinfo, stmt_info,
9287	vec_stmt: new_stmt, gsi);
9288	}
9289	if (slp)
9290	slp_node->push_vec_def (def: new_stmt);
9291	}
9292	}
9293	if (!slp && !costing_p)
9294	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9295	}
9296
9297	if (!slp && !costing_p)
9298	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
9299
9300	if (costing_p && dump_enabled_p ())
9301	dump_printf_loc (MSG_NOTE, vect_location,
9302	"vect_model_store_cost: inside_cost = %d, "
9303	"prologue_cost = %d .\n",
9304	inside_cost, prologue_cost);
9305
9306	return true;
9307	}
9308
9309	gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9310	\|\| memory_access_type == VMAT_CONTIGUOUS_DOWN
9311	\|\| memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9312	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9313
9314	unsigned inside_cost = `0`, prologue_cost = `0`;
9315	/ For costing some adjacent vector stores, we'd like to cost with*
9316	the total number of them once instead of cost each one by one. /*
9317	unsigned int n_adjacent_stores = `0`;
9318	auto_vec<tree> result_chain (group_size);
9319	auto_vec<tree, `1`> vec_oprnds;
9320	for (j = `0`; j < ncopies; j++)
9321	{
9322	gimple *new_stmt;
9323	if (j == `0`)
9324	{
9325	if (slp && !costing_p)
9326	{
9327	/ Get vectorized arguments for SLP_NODE. /
9328	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies: `1`, op0: op,
9329	vec_oprnds0: &vec_oprnds, op1: mask, vec_oprnds1: &vec_masks);
9330	vec_oprnd = vec_oprnds [`0`];
9331	if (mask)
9332	vec_mask = vec_masks [`0`];
9333	}
9334	else
9335	{
9336	/ For interleaved stores we collect vectorized defs for all the*
9337	stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9338	input to vect_permute_store_chain().
9339
9340	If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9341	is of size 1. /*
9342	stmt_vec_info next_stmt_info = first_stmt_info;
9343	for (i = `0`; i < group_size; i++)
9344	{
9345	/ Since gaps are not supported for interleaved stores,*
9346	DR_GROUP_SIZE is the exact number of stmts in the chain.
9347	Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9348	that there is no interleaving, DR_GROUP_SIZE is 1,
9349	and only one iteration of the loop will be executed. /*
9350	op = vect_get_store_rhs (stmt_info: next_stmt_info);
9351	if (costing_p)
9352	update_prologue_cost (&prologue_cost, op);
9353	else
9354	{
9355	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: next_stmt_info,
9356	ncopies, op,
9357	vec_oprnds: gvec_oprnds [i]);
9358	vec_oprnd = (*gvec_oprnds [i])[`0`];
9359	dr_chain.quick_push (obj: vec_oprnd);
9360	}
9361	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9362	}
9363	if (mask && !costing_p)
9364	{
9365	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies,
9366	op: mask, vec_oprnds: &vec_masks,
9367	vectype: mask_vectype);
9368	vec_mask = vec_masks [`0`];
9369	}
9370	}
9371
9372	/ We should have catched mismatched types earlier. /
9373	gcc_assert (costing_p
9374	\|\| useless_type_conversion_p (vectype,
9375	TREE_TYPE (vec_oprnd)));
9376	bool simd_lane_access_p
9377	= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != `0`;
9378	if (!costing_p
9379	&& simd_lane_access_p
9380	&& !loop_masks
9381	&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9382	&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), `0`))
9383	&& integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
9384	&& integer_zerop (DR_INIT (first_dr_info->dr))
9385	&& alias_sets_conflict_p (get_alias_set (aggr_type),
9386	get_alias_set (TREE_TYPE (ref_type))))
9387	{
9388	dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9389	dataref_offset = build_int_cst (ref_type, `0`);
9390	}
9391	else if (!costing_p)
9392	dataref_ptr
9393	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9394	simd_lane_access_p ? loop : NULL,
9395	offset, &dummy, gsi, &ptr_incr,
9396	simd_lane_access_p, bump);
9397	}
9398	else if (!costing_p)
9399	{
9400	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9401	/ DR_CHAIN is then used as an input to vect_permute_store_chain().*
9402	If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9403	of size 1. /*
9404	for (i = `0`; i < group_size; i++)
9405	{
9406	vec_oprnd = (*gvec_oprnds [i])[j];
9407	dr_chain [i] = vec_oprnd;
9408	}
9409	if (mask)
9410	vec_mask = vec_masks [j];
9411	if (dataref_offset)
9412	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9413	else
9414	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9415	stmt_info, bump);
9416	}
9417
9418	new_stmt = NULL;
9419	if (grouped_store)
9420	{
9421	/ Permute. /
9422	gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9423	if (costing_p)
9424	{
9425	int group_size = DR_GROUP_SIZE (first_stmt_info);
9426	int nstmts = ceil_log2 (x: group_size) * group_size;
9427	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
9428	stmt_info, misalign: `0`, where: vect_body);
9429	if (dump_enabled_p ())
9430	dump_printf_loc (MSG_NOTE, vect_location,
9431	"vect_model_store_cost: "
9432	"strided group_size = %d .\n",
9433	group_size);
9434	}
9435	else
9436	vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9437	gsi, &result_chain);
9438	}
9439
9440	stmt_vec_info next_stmt_info = first_stmt_info;
9441	for (i = `0`; i < vec_num; i++)
9442	{
9443	if (!costing_p)
9444	{
9445	if (slp)
9446	vec_oprnd = vec_oprnds [i];
9447	else if (grouped_store)
9448	/ For grouped stores vectorized defs are interleaved in*
9449	vect_permute_store_chain(). /*
9450	vec_oprnd = result_chain [i];
9451	}
9452
9453	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9454	{
9455	if (costing_p)
9456	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_perm,
9457	stmt_info, misalign: `0`, where: vect_body);
9458	else
9459	{
9460	tree perm_mask = perm_mask_for_reverse (vectype);
9461	tree perm_dest = vect_create_destination_var (
9462	vect_get_store_rhs (stmt_info), vectype);
9463	tree new_temp = make_ssa_name (var: perm_dest);
9464
9465	/ Generate the permute statement. /
9466	gimple *perm_stmt
9467	= gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9468	vec_oprnd, perm_mask);
9469	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt,
9470	gsi);
9471
9472	perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9473	vec_oprnd = new_temp;
9474	}
9475	}
9476
9477	if (costing_p)
9478	{
9479	n_adjacent_stores++;
9480
9481	if (!slp)
9482	{
9483	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9484	if (!next_stmt_info)
9485	break;
9486	}
9487
9488	continue;
9489	}
9490
9491	tree final_mask = NULL_TREE;
9492	tree final_len = NULL_TREE;
9493	tree bias = NULL_TREE;
9494	if (loop_masks)
9495	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9496	vec_num * ncopies, vectype,
9497	vec_num * j + i);
9498	if (slp && vec_mask)
9499	vec_mask = vec_masks [i];
9500	if (vec_mask)
9501	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
9502	vec_mask, gsi);
9503
9504	if (i > `0`)
9505	/ Bump the vector pointer. /
9506	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9507	stmt_info, bump);
9508
9509	unsigned misalign;
9510	unsigned HOST_WIDE_INT align;
9511	align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9512	if (alignment_support_scheme == dr_aligned)
9513	misalign = `0`;
9514	else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9515	{
9516	align = dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
9517	misalign = `0`;
9518	}
9519	else
9520	misalign = misalignment;
9521	if (dataref_offset == NULL_TREE
9522	&& TREE_CODE (dataref_ptr) == SSA_NAME)
9523	set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9524	misalign);
9525	align = least_bit_hwi (x: misalign \| align);
9526
9527	/ Compute IFN when LOOP_LENS or final_mask valid. /
9528	machine_mode vmode = TYPE_MODE (vectype);
9529	machine_mode new_vmode = vmode;
9530	internal_fn partial_ifn = IFN_LAST;
9531	if (loop_lens)
9532	{
9533	opt_machine_mode new_ovmode
9534	= get_len_load_store_mode (vmode, false, &partial_ifn);
9535	new_vmode = new_ovmode.require ();
9536	unsigned factor
9537	= (new_ovmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vmode);
9538	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9539	vec_num * ncopies, vectype,
9540	vec_num * j + i, factor);
9541	}
9542	else if (final_mask)
9543	{
9544	if (!can_vec_mask_load_store_p (
9545	vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9546	&partial_ifn))
9547	gcc_unreachable ();
9548	}
9549
9550	if (partial_ifn == IFN_MASK_LEN_STORE)
9551	{
9552	if (!final_len)
9553	{
9554	/ Pass VF value to 'len' argument of*
9555	MASK_LEN_STORE if LOOP_LENS is invalid. /*
9556	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9557	}
9558	if (!final_mask)
9559	{
9560	/ Pass all ones value to 'mask' argument of*
9561	MASK_LEN_STORE if final_mask is invalid. /*
9562	mask_vectype = truth_type_for (vectype);
9563	final_mask = build_minus_one_cst (mask_vectype);
9564	}
9565	}
9566	if (final_len)
9567	{
9568	signed char biasval
9569	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9570
9571	bias = build_int_cst (intQI_type_node, biasval);
9572	}
9573
9574	/ Arguments are ready. Create the new vector stmt. /
9575	if (final_len)
9576	{
9577	gcall *call;
9578	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9579	/ Need conversion if it's wrapped with VnQI. /
9580	if (vmode != new_vmode)
9581	{
9582	tree new_vtype
9583	= build_vector_type_for_mode (unsigned_intQI_type_node,
9584	new_vmode);
9585	tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9586	vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9587	gassign *new_stmt
9588	= gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9589	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9590	vec_oprnd = var;
9591	}
9592
9593	if (partial_ifn == IFN_MASK_LEN_STORE)
9594	call = gimple_build_call_internal (IFN_MASK_LEN_STORE, `6`,
9595	dataref_ptr, ptr, final_mask,
9596	final_len, bias, vec_oprnd);
9597	else
9598	call = gimple_build_call_internal (IFN_LEN_STORE, `5`,
9599	dataref_ptr, ptr, final_len,
9600	bias, vec_oprnd);
9601	gimple_call_set_nothrow (s: call, nothrow_p: true);
9602	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9603	new_stmt = call;
9604	}
9605	else if (final_mask)
9606	{
9607	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9608	gcall *call
9609	= gimple_build_call_internal (IFN_MASK_STORE, `4`, dataref_ptr,
9610	ptr, final_mask, vec_oprnd);
9611	gimple_call_set_nothrow (s: call, nothrow_p: true);
9612	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
9613	new_stmt = call;
9614	}
9615	else
9616	{
9617	data_ref
9618	= fold_build2 (MEM_REF, vectype, dataref_ptr,
9619	dataref_offset ? dataref_offset
9620	: build_int_cst (ref_type, `0`));
9621	if (alignment_support_scheme == dr_aligned)
9622	;
9623	else
9624	TREE_TYPE (data_ref)
9625	= build_aligned_type (TREE_TYPE (data_ref),
9626	align * BITS_PER_UNIT);
9627	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9628	new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9629	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
9630	}
9631
9632	if (slp)
9633	continue;
9634
9635	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9636	if (!next_stmt_info)
9637	break;
9638	}
9639	if (!slp && !costing_p)
9640	{
9641	if (j == `0`)
9642	*vec_stmt = new_stmt;
9643	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
9644	}
9645	}
9646
9647	if (costing_p)
9648	{
9649	if (n_adjacent_stores > `0`)
9650	vect_get_store_cost (vinfo, stmt_info, ncopies: n_adjacent_stores,
9651	alignment_support_scheme, misalignment,
9652	inside_cost: &inside_cost, body_cost_vec: cost_vec);
9653
9654	/ When vectorizing a store into the function result assign*
9655	a penalty if the function returns in a multi-register location.
9656	In this case we assume we'll end up with having to spill the
9657	vector result and do piecewise loads as a conservative estimate. /*
9658	tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9659	if (base
9660	&& (TREE_CODE (base) == RESULT_DECL
9661	\|\| (DECL_P (base) && cfun_returns (decl: base)))
9662	&& !aggregate_value_p (base, cfun->decl))
9663	{
9664	rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, `0`, `1`);
9665	/ ??? Handle PARALLEL in some way. /
9666	if (REG_P (reg))
9667	{
9668	int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9669	/ Assume that a single reg-reg move is possible and cheap,*
9670	do not account for vector to gp register move cost. /*
9671	if (nregs > `1`)
9672	{
9673	/ Spill. /
9674	prologue_cost
9675	+= record_stmt_cost (body_cost_vec: cost_vec, count: ncopies, kind: vector_store,
9676	stmt_info, misalign: `0`, where: vect_epilogue);
9677	/ Loads. /
9678	prologue_cost
9679	+= record_stmt_cost (body_cost_vec: cost_vec, count: ncopies * nregs, kind: scalar_load,
9680	stmt_info, misalign: `0`, where: vect_epilogue);
9681	}
9682	}
9683	}
9684	if (dump_enabled_p ())
9685	dump_printf_loc (MSG_NOTE, vect_location,
9686	"vect_model_store_cost: inside_cost = %d, "
9687	"prologue_cost = %d .\n",
9688	inside_cost, prologue_cost);
9689	}
9690
9691	return true;
9692	}
9693
9694	/ Given a vector type VECTYPE, turns permutation SEL into the equivalent*
9695	VECTOR_CST mask. No checks are made that the target platform supports the
9696	mask, so callers may wish to test can_vec_perm_const_p separately, or use
9697	vect_gen_perm_mask_checked. /*
9698
9699	tree
9700	vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9701	{
9702	tree mask_type;
9703
9704	poly_uint64 nunits = sel.length ();
9705	gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9706
9707	mask_type = build_vector_type (ssizetype, nunits);
9708	return vec_perm_indices_to_tree (mask_type, sel);
9709	}
9710
9711	/ Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,*
9712	i.e. that the target supports the pattern _for arbitrary input vectors_. /*
9713
9714	tree
9715	vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9716	{
9717	machine_mode vmode = TYPE_MODE (vectype);
9718	gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9719	return vect_gen_perm_mask_any (vectype, sel);
9720	}
9721
9722	/ Given a vector variable X and Y, that was generated for the scalar*
9723	STMT_INFO, generate instructions to permute the vector elements of X and Y
9724	using permutation mask MASK_VEC, insert them at GSI and return the*
9725	permuted vector variable. /*
9726
9727	static tree
9728	permute_vec_elements (vec_info *vinfo,
9729	tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9730	gimple_stmt_iterator *gsi)
9731	{
9732	tree vectype = TREE_TYPE (x);
9733	tree perm_dest, data_ref;
9734	gimple *perm_stmt;
9735
9736	tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9737	if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9738	perm_dest = vect_create_destination_var (scalar_dest, vectype);
9739	else
9740	perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9741	data_ref = make_ssa_name (var: perm_dest);
9742
9743	/ Generate the permute statement. /
9744	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9745	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: perm_stmt, gsi);
9746
9747	return data_ref;
9748	}
9749
9750	/ Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,*
9751	inserting them on the loops preheader edge. Returns true if we
9752	were successful in doing so (and thus STMT_INFO can be moved then),
9753	otherwise returns false. HOIST_P indicates if we want to hoist the
9754	definitions of all SSA uses, it would be false when we are costing. /*
9755
9756	static bool
9757	hoist_defs_of_uses (stmt_vec_info stmt_info, class loop loop, bool* hoist_p)
9758	{
9759	ssa_op_iter i;
9760	tree op;
9761	bool any = false;
9762
9763	FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9764	{
9765	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9766	if (!gimple_nop_p (g: def_stmt)
9767	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9768	{
9769	/ Make sure we don't need to recurse. While we could do*
9770	so in simple cases when there are more complex use webs
9771	we don't have an easy way to preserve stmt order to fulfil
9772	dependencies within them. /*
9773	tree op2;
9774	ssa_op_iter i2;
9775	if (gimple_code (g: def_stmt) == GIMPLE_PHI)
9776	return false;
9777	FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9778	{
9779	gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9780	if (!gimple_nop_p (g: def_stmt2)
9781	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt2)))
9782	return false;
9783	}
9784	any = true;
9785	}
9786	}
9787
9788	if (!any)
9789	return true;
9790
9791	if (!hoist_p)
9792	return true;
9793
9794	FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9795	{
9796	gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9797	if (!gimple_nop_p (g: def_stmt)
9798	&& flow_bb_inside_loop_p (loop, gimple_bb (g: def_stmt)))
9799	{
9800	gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9801	gsi_remove (&gsi, false);
9802	gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9803	}
9804	}
9805
9806	return true;
9807	}
9808
9809	/ vectorizable_load.*
9810
9811	Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9812	that can be vectorized.
9813	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9814	stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9815	Return true if STMT_INFO is vectorizable in this way. /*
9816
9817	static bool
9818	vectorizable_load (vec_info *vinfo,
9819	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9820	gimple **vec_stmt, slp_tree slp_node,
9821	stmt_vector_for_cost *cost_vec)
9822	{
9823	tree scalar_dest;
9824	tree vec_dest = NULL;
9825	tree data_ref = NULL;
9826	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
9827	class loop *loop = NULL;
9828	class loop *containing_loop = gimple_bb (g: stmt_info->stmt)->loop_father;
9829	bool nested_in_vect_loop = false;
9830	tree elem_type;
9831	/ Avoid false positive uninitialized warning, see PR110652. /
9832	tree new_temp = NULL_TREE;
9833	machine_mode mode;
9834	tree dummy;
9835	tree dataref_ptr = NULL_TREE;
9836	tree dataref_offset = NULL_TREE;
9837	gimple *ptr_incr = NULL;
9838	int ncopies;
9839	int i, j;
9840	unsigned int group_size;
9841	poly_uint64 group_gap_adj;
9842	tree msq = NULL_TREE, lsq;
9843	tree realignment_token = NULL_TREE;
9844	gphi *phi = NULL;
9845	vec<tree> dr_chain = vNULL;
9846	bool grouped_load = false;
9847	stmt_vec_info first_stmt_info;
9848	stmt_vec_info first_stmt_info_for_drptr = NULL;
9849	bool compute_in_loop = false;
9850	class loop *at_loop;
9851	int vec_num;
9852	bool slp = (slp_node != NULL);
9853	bool slp_perm = false;
9854	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
9855	poly_uint64 vf;
9856	tree aggr_type;
9857	gather_scatter_info gs_info;
9858	tree ref_type;
9859	enum vect_def_type mask_dt = vect_unknown_def_type;
9860
9861	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9862	return false;
9863
9864	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9865	&& ! vec_stmt)
9866	return false;
9867
9868	if (!STMT_VINFO_DATA_REF (stmt_info))
9869	return false;
9870
9871	tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9872	int mask_index = -`1`;
9873	slp_tree slp_op = NULL;
9874	if (gassign assign = dyn_cast <gassign > (p: stmt_info->stmt))
9875	{
9876	scalar_dest = gimple_assign_lhs (gs: assign);
9877	if (TREE_CODE (scalar_dest) != SSA_NAME)
9878	return false;
9879
9880	tree_code code = gimple_assign_rhs_code (gs: assign);
9881	if (code != ARRAY_REF
9882	&& code != BIT_FIELD_REF
9883	&& code != INDIRECT_REF
9884	&& code != COMPONENT_REF
9885	&& code != IMAGPART_EXPR
9886	&& code != REALPART_EXPR
9887	&& code != MEM_REF
9888	&& TREE_CODE_CLASS (code) != tcc_declaration)
9889	return false;
9890	}
9891	else
9892	{
9893	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
9894	if (!call \|\| !gimple_call_internal_p (gs: call))
9895	return false;
9896
9897	internal_fn ifn = gimple_call_internal_fn (gs: call);
9898	if (!internal_load_fn_p (ifn))
9899	return false;
9900
9901	scalar_dest = gimple_call_lhs (gs: call);
9902	if (!scalar_dest)
9903	return false;
9904
9905	mask_index = internal_fn_mask_index (ifn);
9906	if (mask_index >= `0` && slp_node)
9907	mask_index = vect_slp_child_index_for_operand
9908	(call, op: mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9909	if (mask_index >= `0`
9910	&& !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9911	mask: &mask, mask_node: &slp_op, mask_dt_out: &mask_dt, mask_vectype_out: &mask_vectype))
9912	return false;
9913	}
9914
9915	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9916	poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
9917
9918	if (loop_vinfo)
9919	{
9920	loop = LOOP_VINFO_LOOP (loop_vinfo);
9921	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9922	vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9923	}
9924	else
9925	vf = `1`;
9926
9927	/ Multiple types in SLP are handled by creating the appropriate number of*
9928	vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9929	case of SLP. /*
9930	if (slp)
9931	ncopies = `1`;
9932	else
9933	ncopies = vect_get_num_copies (loop_vinfo, vectype);
9934
9935	gcc_assert (ncopies >= `1`);
9936
9937	/ FORNOW. This restriction should be relaxed. /
9938	if (nested_in_vect_loop && ncopies > `1`)
9939	{
9940	if (dump_enabled_p ())
9941	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9942	"multiple types in nested loop.\n");
9943	return false;
9944	}
9945
9946	/ Invalidate assumptions made by dependence analysis when vectorization*
9947	on the unrolled body effectively re-orders stmts. /*
9948	if (ncopies > `1`
9949	&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != `0`
9950	&& maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9951	STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9952	{
9953	if (dump_enabled_p ())
9954	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9955	"cannot perform implicit CSE when unrolling "
9956	"with negative dependence distance\n");
9957	return false;
9958	}
9959
9960	elem_type = TREE_TYPE (vectype);
9961	mode = TYPE_MODE (vectype);
9962
9963	/ FORNOW. In some cases can vectorize even if data-type not supported*
9964	(e.g. - data copies). /*
9965	if (optab_handler (op: mov_optab, mode) == CODE_FOR_nothing)
9966	{
9967	if (dump_enabled_p ())
9968	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9969	"Aligned load, but unsupported type.\n");
9970	return false;
9971	}
9972
9973	/ Check if the load is a part of an interleaving chain. /
9974	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9975	{
9976	grouped_load = true;
9977	/ FORNOW /
9978	gcc_assert (!nested_in_vect_loop);
9979	gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9980
9981	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9982	group_size = DR_GROUP_SIZE (first_stmt_info);
9983
9984	/ Refuse non-SLP vectorization of SLP-only groups. /
9985	if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9986	{
9987	if (dump_enabled_p ())
9988	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9989	"cannot vectorize load in non-SLP mode.\n");
9990	return false;
9991	}
9992
9993	/ Invalidate assumptions made by dependence analysis when vectorization*
9994	on the unrolled body effectively re-orders stmts. /*
9995	if (!PURE_SLP_STMT (stmt_info)
9996	&& STMT_VINFO_MIN_NEG_DIST (stmt_info) != `0`
9997	&& maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9998	STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9999	{
10000	if (dump_enabled_p ())
10001	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10002	"cannot perform implicit CSE when performing "
10003	"group loads with negative dependence distance\n");
10004	return false;
10005	}
10006	}
10007	else
10008	group_size = `1`;
10009
10010	if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10011	{
10012	slp_perm = true;
10013
10014	if (!loop_vinfo)
10015	{
10016	/ In BB vectorization we may not actually use a loaded vector*
10017	accessing elements in excess of DR_GROUP_SIZE. /*
10018	stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
10019	group_info = DR_GROUP_FIRST_ELEMENT (group_info);
10020	unsigned HOST_WIDE_INT nunits;
10021	unsigned j, k, maxk = `0`;
10022	FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10023	if (k > maxk)
10024	maxk = k;
10025	tree vectype = SLP_TREE_VECTYPE (slp_node);
10026	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nunits)
10027	\|\| maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - `1`)))
10028	{
10029	if (dump_enabled_p ())
10030	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10031	"BB vectorization with gaps at the end of "
10032	"a load is not supported\n");
10033	return false;
10034	}
10035	}
10036
10037	auto_vec<tree> tem;
10038	unsigned n_perms;
10039	if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10040	true, &n_perms))
10041	{
10042	if (dump_enabled_p ())
10043	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10044	vect_location,
10045	"unsupported load permutation\n");
10046	return false;
10047	}
10048	}
10049
10050	vect_memory_access_type memory_access_type;
10051	enum dr_alignment_support alignment_support_scheme;
10052	int misalignment;
10053	poly_int64 poffset;
10054	internal_fn lanes_ifn;
10055	if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, masked_p: mask, vls_type: VLS_LOAD,
10056	ncopies, memory_access_type: &memory_access_type, poffset: &poffset,
10057	alignment_support_scheme: &alignment_support_scheme, misalignment: &misalignment, gs_info: &gs_info,
10058	lanes_ifn: &lanes_ifn))
10059	return false;
10060
10061	if (mask)
10062	{
10063	if (memory_access_type == VMAT_CONTIGUOUS)
10064	{
10065	machine_mode vec_mode = TYPE_MODE (vectype);
10066	if (!VECTOR_MODE_P (vec_mode)
10067	\|\| !can_vec_mask_load_store_p (vec_mode,
10068	TYPE_MODE (mask_vectype), true))
10069	return false;
10070	}
10071	else if (memory_access_type != VMAT_LOAD_STORE_LANES
10072	&& memory_access_type != VMAT_GATHER_SCATTER)
10073	{
10074	if (dump_enabled_p ())
10075	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10076	"unsupported access type for masked load.\n");
10077	return false;
10078	}
10079	else if (memory_access_type == VMAT_GATHER_SCATTER
10080	&& gs_info.ifn == IFN_LAST
10081	&& !gs_info.decl)
10082	{
10083	if (dump_enabled_p ())
10084	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10085	"unsupported masked emulated gather.\n");
10086	return false;
10087	}
10088	else if (memory_access_type == VMAT_ELEMENTWISE
10089	\|\| memory_access_type == VMAT_STRIDED_SLP)
10090	{
10091	if (dump_enabled_p ())
10092	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10093	"unsupported masked strided access.\n");
10094	return false;
10095	}
10096	}
10097
10098	bool costing_p = !vec_stmt;
10099
10100	if (costing_p) / transformation not required. /
10101	{
10102	if (slp_node
10103	&& mask
10104	&& !vect_maybe_update_slp_op_vectype (slp_op,
10105	mask_vectype))
10106	{
10107	if (dump_enabled_p ())
10108	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10109	"incompatible vector types for invariants\n");
10110	return false;
10111	}
10112
10113	if (!slp)
10114	STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10115
10116	if (loop_vinfo
10117	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10118	check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10119	vls_type: VLS_LOAD, group_size,
10120	memory_access_type, gs_info: &gs_info,
10121	scalar_mask: mask);
10122
10123	if (dump_enabled_p ()
10124	&& memory_access_type != VMAT_ELEMENTWISE
10125	&& memory_access_type != VMAT_GATHER_SCATTER
10126	&& alignment_support_scheme != dr_aligned)
10127	dump_printf_loc (MSG_NOTE, vect_location,
10128	"Vectorizing an unaligned access.\n");
10129
10130	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10131	vinfo->any_known_not_updated_vssa = true;
10132
10133	STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10134	}
10135
10136	if (!slp)
10137	gcc_assert (memory_access_type
10138	== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10139
10140	if (dump_enabled_p () && !costing_p)
10141	dump_printf_loc (MSG_NOTE, vect_location,
10142	"transform load. ncopies = %d\n", ncopies);
10143
10144	/ Transform. /
10145
10146	dr_vec_info dr_info = STMT_VINFO_DR_INFO (stmt_info), first_dr_info = NULL;
10147	ensure_base_align (dr_info);
10148
10149	if (memory_access_type == VMAT_INVARIANT)
10150	{
10151	gcc_assert (!grouped_load && !mask && !bb_vinfo);
10152	/ If we have versioned for aliasing or the loop doesn't*
10153	have any data dependencies that would preclude this,
10154	then we are sure this is a loop invariant load and
10155	thus we can insert it on the preheader edge.
10156	TODO: hoist_defs_of_uses should ideally be computed
10157	once at analysis time, remembered and used in the
10158	transform time. /*
10159	bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10160	&& !nested_in_vect_loop
10161	&& hoist_defs_of_uses (stmt_info, loop, hoist_p: !costing_p));
10162	if (costing_p)
10163	{
10164	enum vect_cost_model_location cost_loc
10165	= hoist_p ? vect_prologue : vect_body;
10166	unsigned int cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_load,
10167	stmt_info, misalign: `0`, where: cost_loc);
10168	cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_to_vec, stmt_info, misalign: `0`,
10169	where: cost_loc);
10170	unsigned int prologue_cost = hoist_p ? cost : `0`;
10171	unsigned int inside_cost = hoist_p ? `0` : cost;
10172	if (dump_enabled_p ())
10173	dump_printf_loc (MSG_NOTE, vect_location,
10174	"vect_model_load_cost: inside_cost = %d, "
10175	"prologue_cost = %d .\n",
10176	inside_cost, prologue_cost);
10177	return true;
10178	}
10179	if (hoist_p)
10180	{
10181	gassign stmt = as_a <gassign > (p: stmt_info->stmt);
10182	if (dump_enabled_p ())
10183	dump_printf_loc (MSG_NOTE, vect_location,
10184	"hoisting out of the vectorized loop: %G",
10185	(gimple *) stmt);
10186	scalar_dest = copy_ssa_name (var: scalar_dest);
10187	tree rhs = unshare_expr (gimple_assign_rhs1 (gs: stmt));
10188	edge pe = loop_preheader_edge (loop);
10189	gphi *vphi = get_virtual_phi (loop->header);
10190	tree vuse;
10191	if (vphi)
10192	vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10193	else
10194	vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
10195	gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10196	gimple_set_vuse (g: new_stmt, vuse);
10197	gsi_insert_on_edge_immediate (pe, new_stmt);
10198	}
10199	/ These copies are all equivalent. /
10200	if (hoist_p)
10201	new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10202	type: vectype, NULL);
10203	else
10204	{
10205	gimple_stmt_iterator gsi2 = *gsi;
10206	gsi_next (i: &gsi2);
10207	new_temp = vect_init_vector (vinfo, stmt_info, val: scalar_dest,
10208	type: vectype, gsi: &gsi2);
10209	}
10210	gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10211	if (slp)
10212	for (j = `0`; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10213	slp_node->push_vec_def (def: new_stmt);
10214	else
10215	{
10216	for (j = `0`; j < ncopies; ++j)
10217	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10218	*vec_stmt = new_stmt;
10219	}
10220	return true;
10221	}
10222
10223	if (memory_access_type == VMAT_ELEMENTWISE
10224	\|\| memory_access_type == VMAT_STRIDED_SLP)
10225	{
10226	gimple_stmt_iterator incr_gsi;
10227	bool insert_after;
10228	tree offvar;
10229	tree ivstep;
10230	tree running_off;
10231	vec<constructor_elt, va_gc> *v = NULL;
10232	tree stride_base, stride_step, alias_off;
10233	/ Checked by get_load_store_type. /
10234	unsigned int const_nunits = nunits.to_constant ();
10235	unsigned HOST_WIDE_INT cst_offset = `0`;
10236	tree dr_offset;
10237	unsigned int inside_cost = `0`;
10238
10239	gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10240	gcc_assert (!nested_in_vect_loop);
10241
10242	if (grouped_load)
10243	{
10244	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10245	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10246	}
10247	else
10248	{
10249	first_stmt_info = stmt_info;
10250	first_dr_info = dr_info;
10251	}
10252
10253	if (slp && grouped_load)
10254	{
10255	group_size = DR_GROUP_SIZE (first_stmt_info);
10256	ref_type = get_group_alias_ptr_type (first_stmt_info);
10257	}
10258	else
10259	{
10260	if (grouped_load)
10261	cst_offset
10262	= (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10263	* vect_get_place_in_interleaving_chain (stmt_info,
10264	first_stmt_info));
10265	group_size = `1`;
10266	ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10267	}
10268
10269	if (!costing_p)
10270	{
10271	dr_offset = get_dr_vinfo_offset (vinfo, dr_info: first_dr_info);
10272	stride_base = fold_build_pointer_plus (
10273	DR_BASE_ADDRESS (first_dr_info->dr),
10274	size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10275	convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10276	stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10277
10278	/ For a load with loop-invariant (but other than power-of-2)*
10279	stride (i.e. not a grouped access) like so:
10280
10281	for (i = 0; i < n; i += stride)
10282	... = array[i];
10283
10284	we generate a new induction variable and new accesses to
10285	form a new vector (or vectors, depending on ncopies):
10286
10287	for (j = 0; ; j += VFstride)*
10288	tmp1 = array[j];
10289	tmp2 = array[j + stride];
10290	...
10291	vectemp = {tmp1, tmp2, ...}
10292	*/
10293
10294	ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10295	build_int_cst (TREE_TYPE (stride_step), vf));
10296
10297	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10298
10299	stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10300	ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10301	create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10302	loop, &incr_gsi, insert_after,
10303	&offvar, NULL);
10304
10305	stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10306	}
10307
10308	running_off = offvar;
10309	alias_off = build_int_cst (ref_type, `0`);
10310	int nloads = const_nunits;
10311	int lnel = `1`;
10312	tree ltype = TREE_TYPE (vectype);
10313	tree lvectype = vectype;
10314	auto_vec<tree> dr_chain;
10315	if (memory_access_type == VMAT_STRIDED_SLP)
10316	{
10317	if (group_size < const_nunits)
10318	{
10319	/ First check if vec_init optab supports construction from vector*
10320	elts directly. Otherwise avoid emitting a constructor of
10321	vector elements by performing the loads using an integer type
10322	of the same size, constructing a vector of those and then
10323	re-interpreting it as the original vector type. This avoids a
10324	huge runtime penalty due to the general inability to perform
10325	store forwarding from smaller stores to a larger load. /*
10326	tree ptype;
10327	tree vtype
10328	= vector_vector_composition_type (vtype: vectype,
10329	nelts: const_nunits / group_size,
10330	ptype: &ptype);
10331	if (vtype != NULL_TREE)
10332	{
10333	nloads = const_nunits / group_size;
10334	lnel = group_size;
10335	lvectype = vtype;
10336	ltype = ptype;
10337	}
10338	}
10339	else
10340	{
10341	nloads = `1`;
10342	lnel = const_nunits;
10343	ltype = vectype;
10344	}
10345	ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10346	}
10347	/ Load vector(1) scalar_type if it's 1 element-wise vectype. /
10348	else if (nloads == `1`)
10349	ltype = vectype;
10350
10351	if (slp)
10352	{
10353	/ For SLP permutation support we need to load the whole group,*
10354	not only the number of vector stmts the permutation result
10355	fits in. /*
10356	if (slp_perm)
10357	{
10358	/ We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for*
10359	variable VF. /*
10360	unsigned int const_vf = vf.to_constant ();
10361	ncopies = CEIL (group_size * const_vf, const_nunits);
10362	dr_chain.create (nelems: ncopies);
10363	}
10364	else
10365	ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10366	}
10367	unsigned int group_el = `0`;
10368	unsigned HOST_WIDE_INT
10369	elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10370	unsigned int n_groups = `0`;
10371	/ For costing some adjacent vector loads, we'd like to cost with*
10372	the total number of them once instead of cost each one by one. /*
10373	unsigned int n_adjacent_loads = `0`;
10374	for (j = `0`; j < ncopies; j++)
10375	{
10376	if (nloads > `1` && !costing_p)
10377	vec_alloc (v, nelems: nloads);
10378	gimple *new_stmt = NULL;
10379	for (i = `0`; i < nloads; i++)
10380	{
10381	if (costing_p)
10382	{
10383	/ For VMAT_ELEMENTWISE, just cost it as scalar_load to*
10384	avoid ICE, see PR110776. /*
10385	if (VECTOR_TYPE_P (ltype)
10386	&& memory_access_type != VMAT_ELEMENTWISE)
10387	n_adjacent_loads++;
10388	else
10389	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: scalar_load,
10390	stmt_info, misalign: `0`, where: vect_body);
10391	continue;
10392	}
10393	tree this_off = build_int_cst (TREE_TYPE (alias_off),
10394	group_el * elsz + cst_offset);
10395	tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10396	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10397	new_stmt = gimple_build_assign (make_ssa_name (var: ltype), data_ref);
10398	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
10399	if (nloads > `1`)
10400	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10401	gimple_assign_lhs (new_stmt));
10402
10403	group_el += lnel;
10404	if (! slp
10405	\|\| group_el == group_size)
10406	{
10407	n_groups++;
10408	/ When doing SLP make sure to not load elements from*
10409	the next vector iteration, those will not be accessed
10410	so just use the last element again. See PR107451. /*
10411	if (!slp \|\| known_lt (n_groups, vf))
10412	{
10413	tree newoff = copy_ssa_name (var: running_off);
10414	gimple *incr
10415	= gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10416	running_off, stride_step);
10417	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: incr, gsi);
10418	running_off = newoff;
10419	}
10420	group_el = `0`;
10421	}
10422	}
10423
10424	if (nloads > `1`)
10425	{
10426	if (costing_p)
10427	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_construct,
10428	stmt_info, misalign: `0`, where: vect_body);
10429	else
10430	{
10431	tree vec_inv = build_constructor (lvectype, v);
10432	new_temp = vect_init_vector (vinfo, stmt_info, val: vec_inv,
10433	type: lvectype, gsi);
10434	new_stmt = SSA_NAME_DEF_STMT (new_temp);
10435	if (lvectype != vectype)
10436	{
10437	new_stmt
10438	= gimple_build_assign (make_ssa_name (var: vectype),
10439	VIEW_CONVERT_EXPR,
10440	build1 (VIEW_CONVERT_EXPR,
10441	vectype, new_temp));
10442	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
10443	gsi);
10444	}
10445	}
10446	}
10447
10448	if (!costing_p)
10449	{
10450	if (slp)
10451	{
10452	if (slp_perm)
10453	dr_chain.quick_push (obj: gimple_assign_lhs (gs: new_stmt));
10454	else
10455	slp_node->push_vec_def (def: new_stmt);
10456	}
10457	else
10458	{
10459	if (j == `0`)
10460	*vec_stmt = new_stmt;
10461	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
10462	}
10463	}
10464	}
10465	if (slp_perm)
10466	{
10467	unsigned n_perms;
10468	if (costing_p)
10469	{
10470	unsigned n_loads;
10471	vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10472	true, &n_perms, &n_loads);
10473	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
10474	stmt_info: first_stmt_info, misalign: `0`, where: vect_body);
10475	}
10476	else
10477	vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10478	false, &n_perms);
10479	}
10480
10481	if (costing_p)
10482	{
10483	if (n_adjacent_loads > `0`)
10484	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10485	alignment_support_scheme, misalignment, add_realign_cost: false,
10486	inside_cost: &inside_cost, prologue_cost: nullptr, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10487	record_prologue_costs: true);
10488	if (dump_enabled_p ())
10489	dump_printf_loc (MSG_NOTE, vect_location,
10490	"vect_model_load_cost: inside_cost = %u, "
10491	"prologue_cost = 0 .\n",
10492	inside_cost);
10493	}
10494
10495	return true;
10496	}
10497
10498	if (memory_access_type == VMAT_GATHER_SCATTER
10499	\|\| (!slp && memory_access_type == VMAT_CONTIGUOUS))
10500	grouped_load = false;
10501
10502	if (grouped_load
10503	\|\| (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10504	{
10505	if (grouped_load)
10506	{
10507	first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10508	group_size = DR_GROUP_SIZE (first_stmt_info);
10509	}
10510	else
10511	{
10512	first_stmt_info = stmt_info;
10513	group_size = `1`;
10514	}
10515	/ For SLP vectorization we directly vectorize a subchain*
10516	without permutation. /*
10517	if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10518	first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[`0`];
10519	/ For BB vectorization always use the first stmt to base*
10520	the data ref pointer on. /*
10521	if (bb_vinfo)
10522	first_stmt_info_for_drptr
10523	= vect_find_first_scalar_stmt_in_slp (slp_node);
10524
10525	/ Check if the chain of loads is already vectorized. /
10526	if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10527	/ For SLP we would need to copy over SLP_TREE_VEC_DEFS.*
10528	??? But we can only do so if there is exactly one
10529	as we have no way to get at the rest. Leave the CSE
10530	opportunity alone.
10531	??? With the group load eventually participating
10532	in multiple different permutations (having multiple
10533	slp nodes which refer to the same group) the CSE
10534	is even wrong code. See PR56270. /*
10535	&& !slp)
10536	{
10537	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
10538	return true;
10539	}
10540	first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10541	group_gap_adj = `0`;
10542
10543	/ VEC_NUM is the number of vect stmts to be created for this group. /
10544	if (slp)
10545	{
10546	grouped_load = false;
10547	/ If an SLP permutation is from N elements to N elements,*
10548	and if one vector holds a whole number of N, we can load
10549	the inputs to the permutation in the same way as an
10550	unpermuted sequence. In other cases we need to load the
10551	whole group, not only the number of vector stmts the
10552	permutation result fits in. /*
10553	unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10554	if (slp_perm
10555	&& (group_size != scalar_lanes
10556	\|\| !multiple_p (a: nunits, b: group_size)))
10557	{
10558	/ We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for*
10559	variable VF; see vect_transform_slp_perm_load. /*
10560	unsigned int const_vf = vf.to_constant ();
10561	unsigned int const_nunits = nunits.to_constant ();
10562	vec_num = CEIL (group_size * const_vf, const_nunits);
10563	group_gap_adj = vf * group_size - nunits * vec_num;
10564	}
10565	else
10566	{
10567	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10568	group_gap_adj
10569	= group_size - scalar_lanes;
10570	}
10571	}
10572	else
10573	vec_num = group_size;
10574
10575	ref_type = get_group_alias_ptr_type (first_stmt_info);
10576	}
10577	else
10578	{
10579	first_stmt_info = stmt_info;
10580	first_dr_info = dr_info;
10581	group_size = vec_num = `1`;
10582	group_gap_adj = `0`;
10583	ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10584	if (slp)
10585	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10586	}
10587
10588	gcc_assert (alignment_support_scheme);
10589	vec_loop_masks *loop_masks
10590	= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10591	? &LOOP_VINFO_MASKS (loop_vinfo)
10592	: NULL);
10593	vec_loop_lens *loop_lens
10594	= (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10595	? &LOOP_VINFO_LENS (loop_vinfo)
10596	: NULL);
10597
10598	/ The vect_transform_stmt and vect_analyze_stmt will go here but there*
10599	are some difference here. We cannot enable both the lens and masks
10600	during transform but it is allowed during analysis.
10601	Shouldn't go with length-based approach if fully masked. /*
10602	if (cost_vec == NULL)
10603	/ The cost_vec is NULL during transfrom. /
10604	gcc_assert ((!loop_lens \|\| !loop_masks));
10605
10606	/ Targets with store-lane instructions must not require explicit*
10607	realignment. vect_supportable_dr_alignment always returns either
10608	dr_aligned or dr_unaligned_supported for masked operations. /*
10609	gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10610	&& !mask
10611	&& !loop_masks)
10612	\|\| alignment_support_scheme == dr_aligned
10613	\|\| alignment_support_scheme == dr_unaligned_supported);
10614
10615	/ In case the vectorization factor (VF) is bigger than the number*
10616	of elements that we can fit in a vectype (nunits), we have to generate
10617	more than one vector stmt - i.e - we need to "unroll" the
10618	vector stmt by a factor VF/nunits. In doing so, we record a pointer
10619	from one copy of the vector stmt to the next, in the field
10620	STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10621	stages to find the correct vector defs to be used when vectorizing
10622	stmts that use the defs of the current stmt. The example below
10623	illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10624	need to create 4 vectorized stmts):
10625
10626	before vectorization:
10627	RELATED_STMT VEC_STMT
10628	S1: x = memref - -
10629	S2: z = x + 1 - -
10630
10631	step 1: vectorize stmt S1:
10632	We first create the vector stmt VS1_0, and, as usual, record a
10633	pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10634	Next, we create the vector stmt VS1_1, and record a pointer to
10635	it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10636	Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10637	stmts and pointers:
10638	RELATED_STMT VEC_STMT
10639	VS1_0: vx0 = memref0 VS1_1 -
10640	VS1_1: vx1 = memref1 VS1_2 -
10641	VS1_2: vx2 = memref2 VS1_3 -
10642	VS1_3: vx3 = memref3 - -
10643	S1: x = load - VS1_0
10644	S2: z = x + 1 - -
10645	*/
10646
10647	/ In case of interleaving (non-unit grouped access):*
10648
10649	S1: x2 = &base + 2
10650	S2: x0 = &base
10651	S3: x1 = &base + 1
10652	S4: x3 = &base + 3
10653
10654	Vectorized loads are created in the order of memory accesses
10655	starting from the access of the first stmt of the chain:
10656
10657	VS1: vx0 = &base
10658	VS2: vx1 = &base + vec_size1*
10659	VS3: vx3 = &base + vec_size2*
10660	VS4: vx4 = &base + vec_size3*
10661
10662	Then permutation statements are generated:
10663
10664	VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i2 } >*
10665	VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i2+1 } >*
10666	...
10667
10668	And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10669	(the order of the data-refs in the output of vect_permute_load_chain
10670	corresponds to the order of scalar stmts in the interleaving chain - see
10671	the documentation of vect_permute_load_chain()).
10672	The generation of permutation stmts and recording them in
10673	STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10674
10675	In case of both multiple types and interleaving, the vector loads and
10676	permutation stmts above are created for every copy. The result vector
10677	stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10678	corresponding STMT_VINFO_RELATED_STMT for the next copies. /*
10679
10680	/ If the data reference is aligned (dr_aligned) or potentially unaligned*
10681	on a target that supports unaligned accesses (dr_unaligned_supported)
10682	we generate the following code:
10683	p = initial_addr;
10684	indx = 0;
10685	loop {
10686	p = p + indx vectype_size;*
10687	vec_dest = (p);*
10688	indx = indx + 1;
10689	}
10690
10691	Otherwise, the data reference is potentially unaligned on a target that
10692	does not support unaligned accesses (dr_explicit_realign_optimized) -
10693	then generate the following code, in which the data in each iteration is
10694	obtained by two vector loads, one from the previous iteration, and one
10695	from the current iteration:
10696	p1 = initial_addr;
10697	msq_init = (floor(p1))*
10698	p2 = initial_addr + VS - 1;
10699	realignment_token = call target_builtin;
10700	indx = 0;
10701	loop {
10702	p2 = p2 + indx vectype_size*
10703	lsq = (floor(p2))*
10704	vec_dest = realign_load (msq, lsq, realignment_token)
10705	indx = indx + 1;
10706	msq = lsq;
10707	} /*
10708
10709	/ If the misalignment remains the same throughout the execution of the*
10710	loop, we can create the init_addr and permutation mask at the loop
10711	preheader. Otherwise, it needs to be created inside the loop.
10712	This can only occur when vectorizing memory accesses in the inner-loop
10713	nested within an outer-loop that is being vectorized. /*
10714
10715	if (nested_in_vect_loop
10716	&& !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10717	b: GET_MODE_SIZE (TYPE_MODE (vectype))))
10718	{
10719	gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10720	compute_in_loop = true;
10721	}
10722
10723	bool diff_first_stmt_info
10724	= first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10725
10726	tree offset = NULL_TREE;
10727	if ((alignment_support_scheme == dr_explicit_realign_optimized
10728	\|\| alignment_support_scheme == dr_explicit_realign)
10729	&& !compute_in_loop)
10730	{
10731	/ If we have different first_stmt_info, we can't set up realignment*
10732	here, since we can't guarantee first_stmt_info DR has been
10733	initialized yet, use first_stmt_info_for_drptr DR by bumping the
10734	distance from first_stmt_info DR instead as below. /*
10735	if (!costing_p)
10736	{
10737	if (!diff_first_stmt_info)
10738	msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10739	&realignment_token,
10740	alignment_support_scheme, NULL_TREE,
10741	&at_loop);
10742	if (alignment_support_scheme == dr_explicit_realign_optimized)
10743	{
10744	phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10745	offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10746	size_one_node);
10747	gcc_assert (!first_stmt_info_for_drptr);
10748	}
10749	}
10750	}
10751	else
10752	at_loop = loop;
10753
10754	if (!known_eq (poffset, `0`))
10755	offset = (offset
10756	? size_binop (PLUS_EXPR, offset, size_int (poffset))
10757	: size_int (poffset));
10758
10759	tree bump;
10760	tree vec_offset = NULL_TREE;
10761	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10762	{
10763	aggr_type = NULL_TREE;
10764	bump = NULL_TREE;
10765	}
10766	else if (memory_access_type == VMAT_GATHER_SCATTER)
10767	{
10768	aggr_type = elem_type;
10769	if (!costing_p)
10770	vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, gs_info: &gs_info,
10771	dataref_bump: &bump, vec_offset: &vec_offset, loop_lens);
10772	}
10773	else
10774	{
10775	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10776	aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10777	else
10778	aggr_type = vectype;
10779	bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10780	memory_access_type, loop_lens);
10781	}
10782
10783	auto_vec<tree> vec_offsets;
10784	auto_vec<tree> vec_masks;
10785	if (mask && !costing_p)
10786	{
10787	if (slp_node)
10788	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10789	&vec_masks);
10790	else
10791	vect_get_vec_defs_for_operand (vinfo, stmt_vinfo: stmt_info, ncopies, op: mask,
10792	vec_oprnds: &vec_masks, vectype: mask_vectype);
10793	}
10794
10795	tree vec_mask = NULL_TREE;
10796	if (memory_access_type == VMAT_LOAD_STORE_LANES)
10797	{
10798	gcc_assert (alignment_support_scheme == dr_aligned
10799	\|\| alignment_support_scheme == dr_unaligned_supported);
10800	gcc_assert (grouped_load && !slp);
10801
10802	unsigned int inside_cost = `0`, prologue_cost = `0`;
10803	/ For costing some adjacent vector loads, we'd like to cost with*
10804	the total number of them once instead of cost each one by one. /*
10805	unsigned int n_adjacent_loads = `0`;
10806	for (j = `0`; j < ncopies; j++)
10807	{
10808	if (costing_p)
10809	{
10810	/ An IFN_LOAD_LANES will load all its vector results,*
10811	regardless of which ones we actually need. Account
10812	for the cost of unused results. /*
10813	if (first_stmt_info == stmt_info)
10814	{
10815	unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10816	stmt_vec_info next_stmt_info = first_stmt_info;
10817	do
10818	{
10819	gaps -= `1`;
10820	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10821	}
10822	while (next_stmt_info);
10823	if (gaps)
10824	{
10825	if (dump_enabled_p ())
10826	dump_printf_loc (MSG_NOTE, vect_location,
10827	"vect_model_load_cost: %d "
10828	"unused vectors.\n",
10829	gaps);
10830	vect_get_load_cost (vinfo, stmt_info, ncopies: gaps,
10831	alignment_support_scheme,
10832	misalignment, add_realign_cost: false, inside_cost: &inside_cost,
10833	prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
10834	record_prologue_costs: true);
10835	}
10836	}
10837	n_adjacent_loads++;
10838	continue;
10839	}
10840
10841	/ 1. Create the vector or array pointer update chain. /
10842	if (j == `0`)
10843	dataref_ptr
10844	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10845	at_loop, offset, &dummy, gsi,
10846	&ptr_incr, false, bump);
10847	else
10848	{
10849	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10850	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10851	stmt_info, bump);
10852	}
10853	if (mask)
10854	vec_mask = vec_masks [j];
10855
10856	tree vec_array = create_vector_array (elem_type: vectype, nelems: vec_num);
10857
10858	tree final_mask = NULL_TREE;
10859	tree final_len = NULL_TREE;
10860	tree bias = NULL_TREE;
10861	if (loop_masks)
10862	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10863	ncopies, vectype, j);
10864	if (vec_mask)
10865	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype, loop_mask: final_mask,
10866	vec_mask, gsi);
10867
10868	if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10869	{
10870	if (loop_lens)
10871	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10872	ncopies, vectype, j, `1`);
10873	else
10874	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10875	signed char biasval
10876	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10877	bias = build_int_cst (intQI_type_node, biasval);
10878	if (!final_mask)
10879	{
10880	mask_vectype = truth_type_for (vectype);
10881	final_mask = build_minus_one_cst (mask_vectype);
10882	}
10883	}
10884
10885	gcall *call;
10886	if (final_len && final_mask)
10887	{
10888	/ Emit:*
10889	VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10890	VEC_MASK, LEN, BIAS). /*
10891	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10892	tree alias_ptr = build_int_cst (ref_type, align);
10893	call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, `5`,
10894	dataref_ptr, alias_ptr,
10895	final_mask, final_len, bias);
10896	}
10897	else if (final_mask)
10898	{
10899	/ Emit:*
10900	VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10901	VEC_MASK). /*
10902	unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10903	tree alias_ptr = build_int_cst (ref_type, align);
10904	call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, `3`,
10905	dataref_ptr, alias_ptr,
10906	final_mask);
10907	}
10908	else
10909	{
10910	/ Emit:*
10911	VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). /*
10912	data_ref = create_array_ref (type: aggr_type, ptr: dataref_ptr, alias_ptr_type: ref_type);
10913	call = gimple_build_call_internal (IFN_LOAD_LANES, `1`, data_ref);
10914	}
10915	gimple_call_set_lhs (gs: call, lhs: vec_array);
10916	gimple_call_set_nothrow (s: call, nothrow_p: true);
10917	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call, gsi);
10918
10919	dr_chain.create (nelems: vec_num);
10920	/ Extract each vector into an SSA_NAME. /
10921	for (i = `0`; i < vec_num; i++)
10922	{
10923	new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10924	array: vec_array, n: i);
10925	dr_chain.quick_push (obj: new_temp);
10926	}
10927
10928	/ Record the mapping between SSA_NAMEs and statements. /
10929	vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10930
10931	/ Record that VEC_ARRAY is now dead. /
10932	vect_clobber_variable (vinfo, stmt_info, gsi, var: vec_array);
10933
10934	dr_chain.release ();
10935
10936	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
10937	}
10938
10939	if (costing_p)
10940	{
10941	if (n_adjacent_loads > `0`)
10942	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
10943	alignment_support_scheme, misalignment, add_realign_cost: false,
10944	inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec,
10945	body_cost_vec: cost_vec, record_prologue_costs: true);
10946	if (dump_enabled_p ())
10947	dump_printf_loc (MSG_NOTE, vect_location,
10948	"vect_model_load_cost: inside_cost = %u, "
10949	"prologue_cost = %u .\n",
10950	inside_cost, prologue_cost);
10951	}
10952
10953	return true;
10954	}
10955
10956	if (memory_access_type == VMAT_GATHER_SCATTER)
10957	{
10958	gcc_assert (alignment_support_scheme == dr_aligned
10959	\|\| alignment_support_scheme == dr_unaligned_supported);
10960	gcc_assert (!grouped_load && !slp_perm);
10961
10962	unsigned int inside_cost = `0`, prologue_cost = `0`;
10963	for (j = `0`; j < ncopies; j++)
10964	{
10965	/ 1. Create the vector or array pointer update chain. /
10966	if (j == `0` && !costing_p)
10967	{
10968	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10969	vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10970	slp_node, gs_info: &gs_info, dataref_ptr: &dataref_ptr,
10971	vec_offset: &vec_offsets);
10972	else
10973	dataref_ptr
10974	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10975	at_loop, offset, &dummy, gsi,
10976	&ptr_incr, false, bump);
10977	}
10978	else if (!costing_p)
10979	{
10980	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10981	if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10982	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10983	gsi, stmt_info, bump);
10984	}
10985
10986	gimple *new_stmt = NULL;
10987	for (i = `0`; i < vec_num; i++)
10988	{
10989	tree final_mask = NULL_TREE;
10990	tree final_len = NULL_TREE;
10991	tree bias = NULL_TREE;
10992	if (!costing_p)
10993	{
10994	if (mask)
10995	vec_mask = vec_masks [vec_num * j + i];
10996	if (loop_masks)
10997	final_mask
10998	= vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10999	vec_num * ncopies, vectype,
11000	vec_num * j + i);
11001	if (vec_mask)
11002	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11003	loop_mask: final_mask, vec_mask, gsi);
11004
11005	if (i > `0` && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11006	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11007	gsi, stmt_info, bump);
11008	}
11009
11010	/ 2. Create the vector-load in the loop. /
11011	unsigned HOST_WIDE_INT align;
11012	if (gs_info.ifn != IFN_LAST)
11013	{
11014	if (costing_p)
11015	{
11016	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
11017	inside_cost
11018	= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
11019	stmt_info, misalign: `0`, where: vect_body);
11020	continue;
11021	}
11022	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
11023	vec_offset = vec_offsets [vec_num * j + i];
11024	tree zero = build_zero_cst (vectype);
11025	tree scale = size_int (gs_info.scale);
11026
11027	if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
11028	{
11029	if (loop_lens)
11030	final_len
11031	= vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11032	vec_num * ncopies, vectype,
11033	vec_num * j + i, `1`);
11034	else
11035	final_len
11036	= build_int_cst (sizetype,
11037	TYPE_VECTOR_SUBPARTS (node: vectype));
11038	signed char biasval
11039	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11040	bias = build_int_cst (intQI_type_node, biasval);
11041	if (!final_mask)
11042	{
11043	mask_vectype = truth_type_for (vectype);
11044	final_mask = build_minus_one_cst (mask_vectype);
11045	}
11046	}
11047
11048	gcall *call;
11049	if (final_len && final_mask)
11050	call
11051	= gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, `7`,
11052	dataref_ptr, vec_offset,
11053	scale, zero, final_mask,
11054	final_len, bias);
11055	else if (final_mask)
11056	call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, `5`,
11057	dataref_ptr, vec_offset,
11058	scale, zero, final_mask);
11059	else
11060	call = gimple_build_call_internal (IFN_GATHER_LOAD, `4`,
11061	dataref_ptr, vec_offset,
11062	scale, zero);
11063	gimple_call_set_nothrow (s: call, nothrow_p: true);
11064	new_stmt = call;
11065	data_ref = NULL_TREE;
11066	}
11067	else if (gs_info.decl)
11068	{
11069	/ The builtin decls path for gather is legacy, x86 only. /
11070	gcc_assert (!final_len && nunits.is_constant ());
11071	if (costing_p)
11072	{
11073	unsigned int cnunits = vect_nunits_for_cost (vec_type: vectype);
11074	inside_cost
11075	= record_stmt_cost (body_cost_vec: cost_vec, count: cnunits, kind: scalar_load,
11076	stmt_info, misalign: `0`, where: vect_body);
11077	continue;
11078	}
11079	poly_uint64 offset_nunits
11080	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype);
11081	if (known_eq (nunits, offset_nunits))
11082	{
11083	new_stmt = vect_build_one_gather_load_call
11084	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11085	ptr: dataref_ptr, offset: vec_offsets [vec_num * j + i],
11086	mask: final_mask);
11087	data_ref = NULL_TREE;
11088	}
11089	else if (known_eq (nunits, offset_nunits * `2`))
11090	{
11091	/ We have a offset vector with half the number of*
11092	lanes but the builtins will produce full vectype
11093	data with just the lower lanes filled. /*
11094	new_stmt = vect_build_one_gather_load_call
11095	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11096	ptr: dataref_ptr, offset: vec_offsets [`2` * vec_num * j + `2` * i],
11097	mask: final_mask);
11098	tree low = make_ssa_name (var: vectype);
11099	gimple_set_lhs (new_stmt, low);
11100	vect_finish_stmt_generation (vinfo, stmt_info,
11101	vec_stmt: new_stmt, gsi);
11102
11103	/ now put upper half of final_mask in final_mask low. /
11104	if (final_mask
11105	&& !SCALAR_INT_MODE_P
11106	(TYPE_MODE (TREE_TYPE (final_mask))))
11107	{
11108	int count = nunits.to_constant ();
11109	vec_perm_builder sel (count, count, `1`);
11110	sel.quick_grow (len: count);
11111	for (int i = `0`; i < count; ++i)
11112	sel [i] = i \| (count / `2`);
11113	vec_perm_indices indices (sel, `2`, count);
11114	tree perm_mask = vect_gen_perm_mask_checked
11115	(TREE_TYPE (final_mask), sel: indices);
11116	new_stmt = gimple_build_assign (NULL_TREE,
11117	VEC_PERM_EXPR,
11118	final_mask,
11119	final_mask,
11120	perm_mask);
11121	final_mask = make_ssa_name (TREE_TYPE (final_mask));
11122	gimple_set_lhs (new_stmt, final_mask);
11123	vect_finish_stmt_generation (vinfo, stmt_info,
11124	vec_stmt: new_stmt, gsi);
11125	}
11126	else if (final_mask)
11127	{
11128	new_stmt = gimple_build_assign (NULL_TREE,
11129	VEC_UNPACK_HI_EXPR,
11130	final_mask);
11131	final_mask = make_ssa_name
11132	(var: truth_type_for (gs_info.offset_vectype));
11133	gimple_set_lhs (new_stmt, final_mask);
11134	vect_finish_stmt_generation (vinfo, stmt_info,
11135	vec_stmt: new_stmt, gsi);
11136	}
11137
11138	new_stmt = vect_build_one_gather_load_call
11139	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11140	ptr: dataref_ptr,
11141	offset: vec_offsets [`2` * vec_num * j + `2` * i + `1`],
11142	mask: final_mask);
11143	tree high = make_ssa_name (var: vectype);
11144	gimple_set_lhs (new_stmt, high);
11145	vect_finish_stmt_generation (vinfo, stmt_info,
11146	vec_stmt: new_stmt, gsi);
11147
11148	/ compose low + high. /
11149	int count = nunits.to_constant ();
11150	vec_perm_builder sel (count, count, `1`);
11151	sel.quick_grow (len: count);
11152	for (int i = `0`; i < count; ++i)
11153	sel [i] = i < count / `2` ? i : i + count / `2`;
11154	vec_perm_indices indices (sel, `2`, count);
11155	tree perm_mask
11156	= vect_gen_perm_mask_checked (vectype, sel: indices);
11157	new_stmt = gimple_build_assign (NULL_TREE,
11158	VEC_PERM_EXPR,
11159	low, high, perm_mask);
11160	data_ref = NULL_TREE;
11161	}
11162	else if (known_eq (nunits * `2`, offset_nunits))
11163	{
11164	/ We have a offset vector with double the number of*
11165	lanes. Select the low/high part accordingly. /*
11166	vec_offset = vec_offsets [(vec_num * j + i) / `2`];
11167	if ((vec_num * j + i) & `1`)
11168	{
11169	int count = offset_nunits.to_constant ();
11170	vec_perm_builder sel (count, count, `1`);
11171	sel.quick_grow (len: count);
11172	for (int i = `0`; i < count; ++i)
11173	sel [i] = i \| (count / `2`);
11174	vec_perm_indices indices (sel, `2`, count);
11175	tree perm_mask = vect_gen_perm_mask_checked
11176	(TREE_TYPE (vec_offset), sel: indices);
11177	new_stmt = gimple_build_assign (NULL_TREE,
11178	VEC_PERM_EXPR,
11179	vec_offset,
11180	vec_offset,
11181	perm_mask);
11182	vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11183	gimple_set_lhs (new_stmt, vec_offset);
11184	vect_finish_stmt_generation (vinfo, stmt_info,
11185	vec_stmt: new_stmt, gsi);
11186	}
11187	new_stmt = vect_build_one_gather_load_call
11188	(vinfo, stmt_info, gsi, gs_info: &gs_info,
11189	ptr: dataref_ptr, offset: vec_offset, mask: final_mask);
11190	data_ref = NULL_TREE;
11191	}
11192	else
11193	gcc_unreachable ();
11194	}
11195	else
11196	{
11197	/ Emulated gather-scatter. /
11198	gcc_assert (!final_mask);
11199	unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11200	if (costing_p)
11201	{
11202	/ For emulated gathers N offset vector element*
11203	offset add is consumed by the load). /*
11204	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits,
11205	kind: vec_to_scalar, stmt_info,
11206	misalign: `0`, where: vect_body);
11207	/ N scalar loads plus gathering them into a*
11208	vector. /*
11209	inside_cost
11210	= record_stmt_cost (body_cost_vec: cost_vec, count: const_nunits, kind: scalar_load,
11211	stmt_info, misalign: `0`, where: vect_body);
11212	inside_cost
11213	= record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_construct,
11214	stmt_info, misalign: `0`, where: vect_body);
11215	continue;
11216	}
11217	unsigned HOST_WIDE_INT const_offset_nunits
11218	= TYPE_VECTOR_SUBPARTS (node: gs_info.offset_vectype)
11219	.to_constant ();
11220	vec<constructor_elt, va_gc> *ctor_elts;
11221	vec_alloc (v&: ctor_elts, nelems: const_nunits);
11222	gimple_seq stmts = NULL;
11223	/ We support offset vectors with more elements*
11224	than the data vector for now. /*
11225	unsigned HOST_WIDE_INT factor
11226	= const_offset_nunits / const_nunits;
11227	vec_offset = vec_offsets [(vec_num * j + i) / factor];
11228	unsigned elt_offset
11229	= ((vec_num * j + i) % factor) * const_nunits;
11230	tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11231	tree scale = size_int (gs_info.scale);
11232	align = get_object_alignment (DR_REF (first_dr_info->dr));
11233	tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11234	for (unsigned k = `0`; k < const_nunits; ++k)
11235	{
11236	tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11237	bitsize_int (k + elt_offset));
11238	tree idx
11239	= gimple_build (seq: &stmts, code: BIT_FIELD_REF, type: idx_type,
11240	ops: vec_offset, TYPE_SIZE (idx_type), ops: boff);
11241	idx = gimple_convert (seq: &stmts, sizetype, op: idx);
11242	idx = gimple_build (seq: &stmts, code: MULT_EXPR, sizetype, ops: idx,
11243	ops: scale);
11244	tree ptr = gimple_build (seq: &stmts, code: PLUS_EXPR,
11245	TREE_TYPE (dataref_ptr),
11246	ops: dataref_ptr, ops: idx);
11247	ptr = gimple_convert (seq: &stmts, ptr_type_node, op: ptr);
11248	tree elt = make_ssa_name (TREE_TYPE (vectype));
11249	tree ref = build2 (MEM_REF, ltype, ptr,
11250	build_int_cst (ref_type, `0`));
11251	new_stmt = gimple_build_assign (elt, ref);
11252	gimple_set_vuse (g: new_stmt, vuse: gimple_vuse (g: gsi_stmt (i: *gsi)));
11253	gimple_seq_add_stmt (&stmts, new_stmt);
11254	CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11255	}
11256	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11257	new_stmt = gimple_build_assign (
11258	NULL_TREE, build_constructor (vectype, ctor_elts));
11259	data_ref = NULL_TREE;
11260	}
11261
11262	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11263	/ DATA_REF is null if we've already built the statement. /
11264	if (data_ref)
11265	{
11266	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11267	new_stmt = gimple_build_assign (vec_dest, data_ref);
11268	}
11269	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11270	gimple_set_lhs (new_stmt, new_temp);
11271	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11272
11273	/ Store vector loads in the corresponding SLP_NODE. /
11274	if (slp)
11275	slp_node->push_vec_def (def: new_stmt);
11276	}
11277
11278	if (!slp && !costing_p)
11279	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11280	}
11281
11282	if (!slp && !costing_p)
11283	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11284
11285	if (costing_p && dump_enabled_p ())
11286	dump_printf_loc (MSG_NOTE, vect_location,
11287	"vect_model_load_cost: inside_cost = %u, "
11288	"prologue_cost = %u .\n",
11289	inside_cost, prologue_cost);
11290	return true;
11291	}
11292
11293	poly_uint64 group_elt = `0`;
11294	unsigned int inside_cost = `0`, prologue_cost = `0`;
11295	/ For costing some adjacent vector loads, we'd like to cost with*
11296	the total number of them once instead of cost each one by one. /*
11297	unsigned int n_adjacent_loads = `0`;
11298	for (j = `0`; j < ncopies; j++)
11299	{
11300	/ 1. Create the vector or array pointer update chain. /
11301	if (j == `0` && !costing_p)
11302	{
11303	bool simd_lane_access_p
11304	= STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != `0`;
11305	if (simd_lane_access_p
11306	&& TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11307	&& VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), `0`))
11308	&& integer_zerop (get_dr_vinfo_offset (vinfo, dr_info: first_dr_info))
11309	&& integer_zerop (DR_INIT (first_dr_info->dr))
11310	&& alias_sets_conflict_p (get_alias_set (aggr_type),
11311	get_alias_set (TREE_TYPE (ref_type)))
11312	&& (alignment_support_scheme == dr_aligned
11313	\|\| alignment_support_scheme == dr_unaligned_supported))
11314	{
11315	dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11316	dataref_offset = build_int_cst (ref_type, `0`);
11317	}
11318	else if (diff_first_stmt_info)
11319	{
11320	dataref_ptr
11321	= vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11322	aggr_type, at_loop, offset, &dummy,
11323	gsi, &ptr_incr, simd_lane_access_p,
11324	bump);
11325	/ Adjust the pointer by the difference to first_stmt. /
11326	data_reference_p ptrdr
11327	= STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11328	tree diff
11329	= fold_convert (sizetype,
11330	size_binop (MINUS_EXPR,
11331	DR_INIT (first_dr_info->dr),
11332	DR_INIT (ptrdr)));
11333	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11334	stmt_info, diff);
11335	if (alignment_support_scheme == dr_explicit_realign)
11336	{
11337	msq = vect_setup_realignment (vinfo,
11338	first_stmt_info_for_drptr, gsi,
11339	&realignment_token,
11340	alignment_support_scheme,
11341	dataref_ptr, &at_loop);
11342	gcc_assert (!compute_in_loop);
11343	}
11344	}
11345	else
11346	dataref_ptr
11347	= vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11348	at_loop,
11349	offset, &dummy, gsi, &ptr_incr,
11350	simd_lane_access_p, bump);
11351	}
11352	else if (!costing_p)
11353	{
11354	gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11355	if (dataref_offset)
11356	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11357	bump);
11358	else
11359	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11360	stmt_info, bump);
11361	}
11362
11363	if (grouped_load \|\| slp_perm)
11364	dr_chain.create (nelems: vec_num);
11365
11366	gimple *new_stmt = NULL;
11367	for (i = `0`; i < vec_num; i++)
11368	{
11369	tree final_mask = NULL_TREE;
11370	tree final_len = NULL_TREE;
11371	tree bias = NULL_TREE;
11372	if (!costing_p)
11373	{
11374	if (mask)
11375	vec_mask = vec_masks [vec_num * j + i];
11376	if (loop_masks)
11377	final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11378	vec_num * ncopies, vectype,
11379	vec_num * j + i);
11380	if (vec_mask)
11381	final_mask = prepare_vec_mask (loop_vinfo, mask_type: mask_vectype,
11382	loop_mask: final_mask, vec_mask, gsi);
11383
11384	if (i > `0`)
11385	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11386	gsi, stmt_info, bump);
11387	}
11388
11389	/ 2. Create the vector-load in the loop. /
11390	switch (alignment_support_scheme)
11391	{
11392	case dr_aligned:
11393	case dr_unaligned_supported:
11394	{
11395	if (costing_p)
11396	break;
11397
11398	unsigned int misalign;
11399	unsigned HOST_WIDE_INT align;
11400	align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11401	if (alignment_support_scheme == dr_aligned)
11402	misalign = `0`;
11403	else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11404	{
11405	align
11406	= dr_alignment (vect_dr_behavior (vinfo, dr_info: first_dr_info));
11407	misalign = `0`;
11408	}
11409	else
11410	misalign = misalignment;
11411	if (dataref_offset == NULL_TREE
11412	&& TREE_CODE (dataref_ptr) == SSA_NAME)
11413	set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11414	misalign);
11415	align = least_bit_hwi (x: misalign \| align);
11416
11417	/ Compute IFN when LOOP_LENS or final_mask valid. /
11418	machine_mode vmode = TYPE_MODE (vectype);
11419	machine_mode new_vmode = vmode;
11420	internal_fn partial_ifn = IFN_LAST;
11421	if (loop_lens)
11422	{
11423	opt_machine_mode new_ovmode
11424	= get_len_load_store_mode (vmode, true, &partial_ifn);
11425	new_vmode = new_ovmode.require ();
11426	unsigned factor
11427	= (new_ovmode == vmode) ? `1` : GET_MODE_UNIT_SIZE (vmode);
11428	final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11429	vec_num * ncopies, vectype,
11430	vec_num * j + i, factor);
11431	}
11432	else if (final_mask)
11433	{
11434	if (!can_vec_mask_load_store_p (
11435	vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11436	&partial_ifn))
11437	gcc_unreachable ();
11438	}
11439
11440	if (partial_ifn == IFN_MASK_LEN_LOAD)
11441	{
11442	if (!final_len)
11443	{
11444	/ Pass VF value to 'len' argument of*
11445	MASK_LEN_LOAD if LOOP_LENS is invalid. /*
11446	final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11447	}
11448	if (!final_mask)
11449	{
11450	/ Pass all ones value to 'mask' argument of*
11451	MASK_LEN_LOAD if final_mask is invalid. /*
11452	mask_vectype = truth_type_for (vectype);
11453	final_mask = build_minus_one_cst (mask_vectype);
11454	}
11455	}
11456	if (final_len)
11457	{
11458	signed char biasval
11459	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11460
11461	bias = build_int_cst (intQI_type_node, biasval);
11462	}
11463
11464	if (final_len)
11465	{
11466	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11467	gcall *call;
11468	if (partial_ifn == IFN_MASK_LEN_LOAD)
11469	call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, `5`,
11470	dataref_ptr, ptr,
11471	final_mask, final_len,
11472	bias);
11473	else
11474	call = gimple_build_call_internal (IFN_LEN_LOAD, `4`,
11475	dataref_ptr, ptr,
11476	final_len, bias);
11477	gimple_call_set_nothrow (s: call, nothrow_p: true);
11478	new_stmt = call;
11479	data_ref = NULL_TREE;
11480
11481	/ Need conversion if it's wrapped with VnQI. /
11482	if (vmode != new_vmode)
11483	{
11484	tree new_vtype = build_vector_type_for_mode (
11485	unsigned_intQI_type_node, new_vmode);
11486	tree var
11487	= vect_get_new_ssa_name (new_vtype, vect_simple_var);
11488	gimple_set_lhs (call, var);
11489	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: call,
11490	gsi);
11491	tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11492	new_stmt = gimple_build_assign (vec_dest,
11493	VIEW_CONVERT_EXPR, op);
11494	}
11495	}
11496	else if (final_mask)
11497	{
11498	tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11499	gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, `3`,
11500	dataref_ptr, ptr,
11501	final_mask);
11502	gimple_call_set_nothrow (s: call, nothrow_p: true);
11503	new_stmt = call;
11504	data_ref = NULL_TREE;
11505	}
11506	else
11507	{
11508	tree ltype = vectype;
11509	tree new_vtype = NULL_TREE;
11510	unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11511	unsigned int vect_align
11512	= vect_known_alignment_in_bytes (dr_info: first_dr_info, vectype);
11513	unsigned int scalar_dr_size
11514	= vect_get_scalar_dr_size (dr_info: first_dr_info);
11515	/ If there's no peeling for gaps but we have a gap*
11516	with slp loads then load the lower half of the
11517	vector only. See get_group_load_store_type for
11518	when we apply this optimization. /*
11519	if (slp
11520	&& loop_vinfo
11521	&& !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != `0`
11522	&& known_eq (nunits, (group_size - gap) * `2`)
11523	&& known_eq (nunits, group_size)
11524	&& gap >= (vect_align / scalar_dr_size))
11525	{
11526	tree half_vtype;
11527	new_vtype
11528	= vector_vector_composition_type (vtype: vectype, nelts: `2`,
11529	ptype: &half_vtype);
11530	if (new_vtype != NULL_TREE)
11531	ltype = half_vtype;
11532	}
11533	/ Try to use a single smaller load when we are about*
11534	to load excess elements compared to the unrolled
11535	scalar loop.
11536	??? This should cover the above case as well. /*
11537	else if (known_gt ((vec_num * j + i + `1`) * nunits,
11538	(group_size * vf - gap)))
11539	{
11540	if (known_ge ((vec_num * j + i + `1`) * nunits
11541	- (group_size * vf - gap), nunits))
11542	/ DR will be unused. /
11543	ltype = NULL_TREE;
11544	else if (known_ge (vect_align,
11545	tree_to_poly_uint64
11546	(TYPE_SIZE_UNIT (vectype))))
11547	/ Aligned access to excess elements is OK if*
11548	at least one element is accessed in the
11549	scalar loop. /*
11550	;
11551	else
11552	{
11553	auto remain
11554	= ((group_size * vf - gap)
11555	- (vec_num * j + i) * nunits);
11556	/ remain should now be > 0 and < nunits. /
11557	unsigned num;
11558	if (constant_multiple_p (a: nunits, b: remain, multiple: &num))
11559	{
11560	tree ptype;
11561	new_vtype
11562	= vector_vector_composition_type (vtype: vectype,
11563	nelts: num,
11564	ptype: &ptype);
11565	if (new_vtype)
11566	ltype = ptype;
11567	}
11568	/ Else use multiple loads or a masked load? /
11569	}
11570	}
11571	tree offset
11572	= (dataref_offset ? dataref_offset
11573	: build_int_cst (ref_type, `0`));
11574	if (!ltype)
11575	;
11576	else if (ltype != vectype
11577	&& memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11578	{
11579	poly_uint64 gap_offset
11580	= (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11581	- tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11582	tree gapcst = build_int_cstu (type: ref_type, gap_offset);
11583	offset = size_binop (PLUS_EXPR, offset, gapcst);
11584	}
11585	if (ltype)
11586	{
11587	data_ref
11588	= fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11589	if (alignment_support_scheme == dr_aligned)
11590	;
11591	else
11592	TREE_TYPE (data_ref)
11593	= build_aligned_type (TREE_TYPE (data_ref),
11594	align * BITS_PER_UNIT);
11595	}
11596	if (!ltype)
11597	data_ref = build_constructor (vectype, NULL);
11598	else if (ltype != vectype)
11599	{
11600	vect_copy_ref_info (data_ref,
11601	DR_REF (first_dr_info->dr));
11602	tree tem = make_ssa_name (var: ltype);
11603	new_stmt = gimple_build_assign (tem, data_ref);
11604	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt,
11605	gsi);
11606	data_ref = NULL;
11607	vec<constructor_elt, va_gc> *v;
11608	/ We've computed 'num' above to statically two*
11609	or via constant_multiple_p. /*
11610	unsigned num
11611	= (exact_div (a: tree_to_poly_uint64
11612	(TYPE_SIZE_UNIT (vectype)),
11613	b: tree_to_poly_uint64
11614	(TYPE_SIZE_UNIT (ltype)))
11615	.to_constant ());
11616	vec_alloc (v, nelems: num);
11617	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11618	{
11619	while (--num)
11620	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11621	build_zero_cst (ltype));
11622	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11623	}
11624	else
11625	{
11626	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11627	while (--num)
11628	CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11629	build_zero_cst (ltype));
11630	}
11631	gcc_assert (new_vtype != NULL_TREE);
11632	if (new_vtype == vectype)
11633	new_stmt = gimple_build_assign (
11634	vec_dest, build_constructor (vectype, v));
11635	else
11636	{
11637	tree new_vname = make_ssa_name (var: new_vtype);
11638	new_stmt = gimple_build_assign (
11639	new_vname, build_constructor (new_vtype, v));
11640	vect_finish_stmt_generation (vinfo, stmt_info,
11641	vec_stmt: new_stmt, gsi);
11642	new_stmt = gimple_build_assign (
11643	vec_dest,
11644	build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11645	}
11646	}
11647	}
11648	break;
11649	}
11650	case dr_explicit_realign:
11651	{
11652	if (costing_p)
11653	break;
11654	tree ptr, bump;
11655
11656	tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11657
11658	if (compute_in_loop)
11659	msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11660	&realignment_token,
11661	dr_explicit_realign,
11662	dataref_ptr, NULL);
11663
11664	if (TREE_CODE (dataref_ptr) == SSA_NAME)
11665	ptr = copy_ssa_name (var: dataref_ptr);
11666	else
11667	ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11668	// For explicit realign the target alignment should be
11669	// known at compile time.
11670	unsigned HOST_WIDE_INT align
11671	= DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11672	new_stmt = gimple_build_assign (
11673	ptr, BIT_AND_EXPR, dataref_ptr,
11674	build_int_cst (TREE_TYPE (dataref_ptr),
11675	-(HOST_WIDE_INT) align));
11676	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11677	data_ref
11678	= build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, `0`));
11679	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11680	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11681	new_stmt = gimple_build_assign (vec_dest, data_ref);
11682	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11683	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11684	gimple_move_vops (new_stmt, stmt_info->stmt);
11685	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11686	msq = new_temp;
11687
11688	bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11689	bump = size_binop (MINUS_EXPR, bump, size_one_node);
11690	ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11691	bump);
11692	new_stmt = gimple_build_assign (
11693	NULL_TREE, BIT_AND_EXPR, ptr,
11694	build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11695	if (TREE_CODE (ptr) == SSA_NAME)
11696	ptr = copy_ssa_name (var: ptr, stmt: new_stmt);
11697	else
11698	ptr = make_ssa_name (TREE_TYPE (ptr), stmt: new_stmt);
11699	gimple_assign_set_lhs (gs: new_stmt, lhs: ptr);
11700	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11701	data_ref
11702	= build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, `0`));
11703	break;
11704	}
11705	case dr_explicit_realign_optimized:
11706	{
11707	if (costing_p)
11708	break;
11709	if (TREE_CODE (dataref_ptr) == SSA_NAME)
11710	new_temp = copy_ssa_name (var: dataref_ptr);
11711	else
11712	new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11713	// We should only be doing this if we know the target
11714	// alignment at compile time.
11715	unsigned HOST_WIDE_INT align
11716	= DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11717	new_stmt = gimple_build_assign (
11718	new_temp, BIT_AND_EXPR, dataref_ptr,
11719	build_int_cst (TREE_TYPE (dataref_ptr),
11720	-(HOST_WIDE_INT) align));
11721	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11722	data_ref = build2 (MEM_REF, vectype, new_temp,
11723	build_int_cst (ref_type, `0`));
11724	break;
11725	}
11726	default:
11727	gcc_unreachable ();
11728	}
11729
11730	/ One common place to cost the above vect load for different*
11731	alignment support schemes. /*
11732	if (costing_p)
11733	{
11734	/ For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we*
11735	only need to take care of the first stmt, whose
11736	stmt_info is first_stmt_info, vec_num iterating on it
11737	will cover the cost for the remaining, it's consistent
11738	with transforming. For the prologue cost for realign,
11739	we only need to count it once for the whole group. /*
11740	bool first_stmt_info_p = first_stmt_info == stmt_info;
11741	bool add_realign_cost = first_stmt_info_p && i == `0`;
11742	if (memory_access_type == VMAT_CONTIGUOUS
11743	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE
11744	\|\| (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11745	&& (!grouped_load \|\| first_stmt_info_p)))
11746	{
11747	/ Leave realign cases alone to keep them simple. /
11748	if (alignment_support_scheme == dr_explicit_realign_optimized
11749	\|\| alignment_support_scheme == dr_explicit_realign)
11750	vect_get_load_cost (vinfo, stmt_info, ncopies: `1`,
11751	alignment_support_scheme, misalignment,
11752	add_realign_cost, inside_cost: &inside_cost,
11753	prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11754	record_prologue_costs: true);
11755	else
11756	n_adjacent_loads++;
11757	}
11758	}
11759	else
11760	{
11761	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11762	/ DATA_REF is null if we've already built the statement. /
11763	if (data_ref)
11764	{
11765	vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11766	new_stmt = gimple_build_assign (vec_dest, data_ref);
11767	}
11768	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11769	gimple_set_lhs (new_stmt, new_temp);
11770	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11771	}
11772
11773	/ 3. Handle explicit realignment if necessary/supported.*
11774	Create in loop:
11775	vec_dest = realign_load (msq, lsq, realignment_token) /*
11776	if (!costing_p
11777	&& (alignment_support_scheme == dr_explicit_realign_optimized
11778	\|\| alignment_support_scheme == dr_explicit_realign))
11779	{
11780	lsq = gimple_assign_lhs (gs: new_stmt);
11781	if (!realignment_token)
11782	realignment_token = dataref_ptr;
11783	vec_dest = vect_create_destination_var (scalar_dest, vectype);
11784	new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11785	lsq, realignment_token);
11786	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
11787	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
11788	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
11789
11790	if (alignment_support_scheme == dr_explicit_realign_optimized)
11791	{
11792	gcc_assert (phi);
11793	if (i == vec_num - `1` && j == ncopies - `1`)
11794	add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11795	UNKNOWN_LOCATION);
11796	msq = lsq;
11797	}
11798	}
11799
11800	if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11801	{
11802	if (costing_p)
11803	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: `1`, kind: vec_perm,
11804	stmt_info, misalign: `0`, where: vect_body);
11805	else
11806	{
11807	tree perm_mask = perm_mask_for_reverse (vectype);
11808	new_temp = permute_vec_elements (vinfo, x: new_temp, y: new_temp,
11809	mask_vec: perm_mask, stmt_info, gsi);
11810	new_stmt = SSA_NAME_DEF_STMT (new_temp);
11811	}
11812	}
11813
11814	/ Collect vector loads and later create their permutation in*
11815	vect_transform_grouped_load (). /*
11816	if (!costing_p && (grouped_load \|\| slp_perm))
11817	dr_chain.quick_push (obj: new_temp);
11818
11819	/ Store vector loads in the corresponding SLP_NODE. /
11820	if (!costing_p && slp && !slp_perm)
11821	slp_node->push_vec_def (def: new_stmt);
11822
11823	/ With SLP permutation we load the gaps as well, without*
11824	we need to skip the gaps after we manage to fully load
11825	all elements. group_gap_adj is DR_GROUP_SIZE here. /*
11826	group_elt += nunits;
11827	if (!costing_p
11828	&& maybe_ne (a: group_gap_adj, b: `0U`)
11829	&& !slp_perm
11830	&& known_eq (group_elt, group_size - group_gap_adj))
11831	{
11832	poly_wide_int bump_val
11833	= (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11834	if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11835	== -`1`)
11836	bump_val = -bump_val;
11837	tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11838	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11839	stmt_info, bump);
11840	group_elt = `0`;
11841	}
11842	}
11843	/ Bump the vector pointer to account for a gap or for excess*
11844	elements loaded for a permuted SLP load. /*
11845	if (!costing_p
11846	&& maybe_ne (a: group_gap_adj, b: `0U`)
11847	&& slp_perm)
11848	{
11849	poly_wide_int bump_val
11850	= (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11851	if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -`1`)
11852	bump_val = -bump_val;
11853	tree bump = wide_int_to_tree (sizetype, cst: bump_val);
11854	dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11855	stmt_info, bump);
11856	}
11857
11858	if (slp && !slp_perm)
11859	continue;
11860
11861	if (slp_perm)
11862	{
11863	unsigned n_perms;
11864	/ For SLP we know we've seen all possible uses of dr_chain so*
11865	direct vect_transform_slp_perm_load to DCE the unused parts.
11866	??? This is a hack to prevent compile-time issues as seen
11867	in PR101120 and friends. /*
11868	if (costing_p)
11869	{
11870	vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11871	true, &n_perms, nullptr);
11872	inside_cost = record_stmt_cost (body_cost_vec: cost_vec, count: n_perms, kind: vec_perm,
11873	stmt_info, misalign: `0`, where: vect_body);
11874	}
11875	else
11876	{
11877	bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11878	gsi, vf, false, &n_perms,
11879	nullptr, true);
11880	gcc_assert (ok);
11881	}
11882	}
11883	else
11884	{
11885	if (grouped_load)
11886	{
11887	gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11888	/ We assume that the cost of a single load-lanes instruction*
11889	is equivalent to the cost of DR_GROUP_SIZE separate loads.
11890	If a grouped access is instead being provided by a
11891	load-and-permute operation, include the cost of the
11892	permutes. /*
11893	if (costing_p && first_stmt_info == stmt_info)
11894	{
11895	/ Uses an even and odd extract operations or shuffle*
11896	operations for each needed permute. /*
11897	int group_size = DR_GROUP_SIZE (first_stmt_info);
11898	int nstmts = ceil_log2 (x: group_size) * group_size;
11899	inside_cost += record_stmt_cost (body_cost_vec: cost_vec, count: nstmts, kind: vec_perm,
11900	stmt_info, misalign: `0`, where: vect_body);
11901
11902	if (dump_enabled_p ())
11903	dump_printf_loc (MSG_NOTE, vect_location,
11904	"vect_model_load_cost:"
11905	"strided group_size = %d .\n",
11906	group_size);
11907	}
11908	else if (!costing_p)
11909	{
11910	vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11911	group_size, gsi);
11912	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11913	}
11914	}
11915	else if (!costing_p)
11916	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
11917	}
11918	dr_chain.release ();
11919	}
11920	if (!slp && !costing_p)
11921	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
11922
11923	if (costing_p)
11924	{
11925	gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11926	\|\| memory_access_type == VMAT_CONTIGUOUS_REVERSE
11927	\|\| memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11928	if (n_adjacent_loads > `0`)
11929	vect_get_load_cost (vinfo, stmt_info, ncopies: n_adjacent_loads,
11930	alignment_support_scheme, misalignment, add_realign_cost: false,
11931	inside_cost: &inside_cost, prologue_cost: &prologue_cost, prologue_cost_vec: cost_vec, body_cost_vec: cost_vec,
11932	record_prologue_costs: true);
11933	if (dump_enabled_p ())
11934	dump_printf_loc (MSG_NOTE, vect_location,
11935	"vect_model_load_cost: inside_cost = %u, "
11936	"prologue_cost = %u .\n",
11937	inside_cost, prologue_cost);
11938	}
11939
11940	return true;
11941	}
11942
11943	/ Function vect_is_simple_cond.*
11944
11945	Input:
11946	LOOP - the loop that is being vectorized.
11947	COND - Condition that is checked for simple use.
11948
11949	Output:
11950	*COMP_VECTYPE - the vector type for the comparison.
11951	*DTS - The def types for the arguments of the comparison
11952
11953	Returns whether a COND can be vectorized. Checks whether
11954	condition operands are supportable using vec_is_simple_use. /*
11955
11956	static bool
11957	vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11958	slp_tree slp_node, tree *comp_vectype,
11959	enum vect_def_type *dts, tree vectype)
11960	{
11961	tree lhs, rhs;
11962	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11963	slp_tree slp_op;
11964
11965	/ Mask case. /
11966	if (TREE_CODE (cond) == SSA_NAME
11967	&& VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11968	{
11969	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`, &cond,
11970	&slp_op, &dts[`0`], comp_vectype)
11971	\|\| !*comp_vectype
11972	\|\| !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11973	return false;
11974	return true;
11975	}
11976
11977	if (!COMPARISON_CLASS_P (cond))
11978	return false;
11979
11980	lhs = TREE_OPERAND (cond, `0`);
11981	rhs = TREE_OPERAND (cond, `1`);
11982
11983	if (TREE_CODE (lhs) == SSA_NAME)
11984	{
11985	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`,
11986	&lhs, &slp_op, &dts[`0`], &vectype1))
11987	return false;
11988	}
11989	else if (TREE_CODE (lhs) == INTEGER_CST \|\| TREE_CODE (lhs) == REAL_CST
11990	\|\| TREE_CODE (lhs) == FIXED_CST)
11991	dts[`0`] = vect_constant_def;
11992	else
11993	return false;
11994
11995	if (TREE_CODE (rhs) == SSA_NAME)
11996	{
11997	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1`,
11998	&rhs, &slp_op, &dts[`1`], &vectype2))
11999	return false;
12000	}
12001	else if (TREE_CODE (rhs) == INTEGER_CST \|\| TREE_CODE (rhs) == REAL_CST
12002	\|\| TREE_CODE (rhs) == FIXED_CST)
12003	dts[`1`] = vect_constant_def;
12004	else
12005	return false;
12006
12007	if (vectype1 && vectype2
12008	&& maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12009	b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12010	return false;
12011
12012	*comp_vectype = vectype1 ? vectype1 : vectype2;
12013	/ Invariant comparison. /
12014	if (! *comp_vectype)
12015	{
12016	tree scalar_type = TREE_TYPE (lhs);
12017	if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12018	*comp_vectype = truth_type_for (vectype);
12019	else
12020	{
12021	/ If we can widen the comparison to match vectype do so. /
12022	if (INTEGRAL_TYPE_P (scalar_type)
12023	&& !slp_node
12024	&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
12025	TYPE_SIZE (TREE_TYPE (vectype))))
12026	scalar_type = build_nonstandard_integer_type
12027	(vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
12028	*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12029	slp_node);
12030	}
12031	}
12032
12033	return true;
12034	}
12035
12036	/ vectorizable_condition.*
12037
12038	Check if STMT_INFO is conditional modify expression that can be vectorized.
12039	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12040	stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12041	at GSI.
12042
12043	When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12044
12045	Return true if STMT_INFO is vectorizable in this way. /*
12046
12047	static bool
12048	vectorizable_condition (vec_info *vinfo,
12049	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12050	gimple **vec_stmt,
12051	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12052	{
12053	tree scalar_dest = NULL_TREE;
12054	tree vec_dest = NULL_TREE;
12055	tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12056	tree then_clause, else_clause;
12057	tree comp_vectype = NULL_TREE;
12058	tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12059	tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12060	tree vec_compare;
12061	tree new_temp;
12062	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12063	enum vect_def_type dts[`4`]
12064	= {vect_unknown_def_type, vect_unknown_def_type,
12065	vect_unknown_def_type, vect_unknown_def_type};
12066	int ndts = `4`;
12067	int ncopies;
12068	int vec_num;
12069	enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12070	int i;
12071	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12072	vec<tree> vec_oprnds0 = vNULL;
12073	vec<tree> vec_oprnds1 = vNULL;
12074	vec<tree> vec_oprnds2 = vNULL;
12075	vec<tree> vec_oprnds3 = vNULL;
12076	tree vec_cmp_type;
12077	bool masked = false;
12078
12079	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12080	return false;
12081
12082	/ Is vectorizable conditional operation? /
12083	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
12084	if (!stmt)
12085	return false;
12086
12087	code = gimple_assign_rhs_code (gs: stmt);
12088	if (code != COND_EXPR)
12089	return false;
12090
12091	stmt_vec_info reduc_info = NULL;
12092	int reduc_index = -`1`;
12093	vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12094	bool for_reduction
12095	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12096	if (for_reduction)
12097	{
12098	if (slp_node)
12099	return false;
12100	reduc_info = info_for_reduction (vinfo, stmt_info);
12101	reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12102	reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12103	gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12104	\|\| reduc_index != -`1`);
12105	}
12106	else
12107	{
12108	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12109	return false;
12110	}
12111
12112	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12113	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12114
12115	if (slp_node)
12116	{
12117	ncopies = `1`;
12118	vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12119	}
12120	else
12121	{
12122	ncopies = vect_get_num_copies (loop_vinfo, vectype);
12123	vec_num = `1`;
12124	}
12125
12126	gcc_assert (ncopies >= `1`);
12127	if (for_reduction && ncopies > `1`)
12128	return false; / FORNOW /
12129
12130	cond_expr = gimple_assign_rhs1 (gs: stmt);
12131
12132	if (!vect_is_simple_cond (cond: cond_expr, vinfo, stmt_info, slp_node,
12133	comp_vectype: &comp_vectype, dts: &dts[`0`], vectype)
12134	\|\| !comp_vectype)
12135	return false;
12136
12137	unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? `1` : `0`;
12138	slp_tree then_slp_node, else_slp_node;
12139	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `1` + op_adjust,
12140	&then_clause, &then_slp_node, &dts[`2`], &vectype1))
12141	return false;
12142	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `2` + op_adjust,
12143	&else_clause, &else_slp_node, &dts[`3`], &vectype2))
12144	return false;
12145
12146	if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12147	return false;
12148
12149	if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12150	return false;
12151
12152	masked = !COMPARISON_CLASS_P (cond_expr);
12153	vec_cmp_type = truth_type_for (comp_vectype);
12154
12155	if (vec_cmp_type == NULL_TREE)
12156	return false;
12157
12158	cond_code = TREE_CODE (cond_expr);
12159	if (!masked)
12160	{
12161	cond_expr0 = TREE_OPERAND (cond_expr, `0`);
12162	cond_expr1 = TREE_OPERAND (cond_expr, `1`);
12163	}
12164
12165	/ For conditional reductions, the "then" value needs to be the candidate*
12166	value calculated by this iteration while the "else" value needs to be
12167	the result carried over from previous iterations. If the COND_EXPR
12168	is the other way around, we need to swap it. /*
12169	bool must_invert_cmp_result = false;
12170	if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == `1`)
12171	{
12172	if (masked)
12173	must_invert_cmp_result = true;
12174	else
12175	{
12176	bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12177	tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12178	if (new_code == ERROR_MARK)
12179	must_invert_cmp_result = true;
12180	else
12181	{
12182	cond_code = new_code;
12183	/ Make sure we don't accidentally use the old condition. /
12184	cond_expr = NULL_TREE;
12185	}
12186	}
12187	std::swap (a&: then_clause, b&: else_clause);
12188	}
12189
12190	if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12191	{
12192	/ Boolean values may have another representation in vectors*
12193	and therefore we prefer bit operations over comparison for
12194	them (which also works for scalar masks). We store opcodes
12195	to use in bitop1 and bitop2. Statement is vectorized as
12196	BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12197	depending on bitop1 and bitop2 arity. /*
12198	switch (cond_code)
12199	{
12200	case GT_EXPR:
12201	bitop1 = BIT_NOT_EXPR;
12202	bitop2 = BIT_AND_EXPR;
12203	break;
12204	case GE_EXPR:
12205	bitop1 = BIT_NOT_EXPR;
12206	bitop2 = BIT_IOR_EXPR;
12207	break;
12208	case LT_EXPR:
12209	bitop1 = BIT_NOT_EXPR;
12210	bitop2 = BIT_AND_EXPR;
12211	std::swap (a&: cond_expr0, b&: cond_expr1);
12212	break;
12213	case LE_EXPR:
12214	bitop1 = BIT_NOT_EXPR;
12215	bitop2 = BIT_IOR_EXPR;
12216	std::swap (a&: cond_expr0, b&: cond_expr1);
12217	break;
12218	case NE_EXPR:
12219	bitop1 = BIT_XOR_EXPR;
12220	break;
12221	case EQ_EXPR:
12222	bitop1 = BIT_XOR_EXPR;
12223	bitop2 = BIT_NOT_EXPR;
12224	break;
12225	default:
12226	return false;
12227	}
12228	cond_code = SSA_NAME;
12229	}
12230
12231	if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12232	&& reduction_type == EXTRACT_LAST_REDUCTION
12233	&& !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12234	{
12235	if (dump_enabled_p ())
12236	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12237	"reduction comparison operation not supported.\n");
12238	return false;
12239	}
12240
12241	if (!vec_stmt)
12242	{
12243	if (bitop1 != NOP_EXPR)
12244	{
12245	machine_mode mode = TYPE_MODE (comp_vectype);
12246	optab optab;
12247
12248	optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12249	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12250	return false;
12251
12252	if (bitop2 != NOP_EXPR)
12253	{
12254	optab = optab_for_tree_code (bitop2, comp_vectype,
12255	optab_default);
12256	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12257	return false;
12258	}
12259	}
12260
12261	vect_cost_for_stmt kind = vector_stmt;
12262	if (reduction_type == EXTRACT_LAST_REDUCTION)
12263	/ Count one reduction-like operation per vector. /
12264	kind = vec_to_scalar;
12265	else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12266	&& (masked
12267	\|\| (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12268	cond_code)
12269	\|\| !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12270	ERROR_MARK))))
12271	return false;
12272
12273	if (slp_node
12274	&& (!vect_maybe_update_slp_op_vectype
12275	(SLP_TREE_CHILDREN (slp_node)[`0`], comp_vectype)
12276	\|\| (op_adjust == `1`
12277	&& !vect_maybe_update_slp_op_vectype
12278	(SLP_TREE_CHILDREN (slp_node)[`1`], comp_vectype))
12279	\|\| !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12280	\|\| !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12281	{
12282	if (dump_enabled_p ())
12283	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12284	"incompatible vector types for invariants\n");
12285	return false;
12286	}
12287
12288	if (loop_vinfo && for_reduction
12289	&& LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12290	{
12291	if (reduction_type == EXTRACT_LAST_REDUCTION)
12292	{
12293	if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12294	vectype, OPTIMIZE_FOR_SPEED))
12295	vect_record_loop_len (loop_vinfo,
12296	&LOOP_VINFO_LENS (loop_vinfo),
12297	ncopies * vec_num, vectype, `1`);
12298	else
12299	vect_record_loop_mask (loop_vinfo,
12300	&LOOP_VINFO_MASKS (loop_vinfo),
12301	ncopies * vec_num, vectype, NULL);
12302	}
12303	/ Extra inactive lanes should be safe for vect_nested_cycle. /
12304	else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12305	{
12306	if (dump_enabled_p ())
12307	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12308	"conditional reduction prevents the use"
12309	" of partial vectors.\n");
12310	LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12311	}
12312	}
12313
12314	STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12315	vect_model_simple_cost (vinfo, stmt_info, ncopies, dt: dts, ndts, node: slp_node,
12316	cost_vec, kind);
12317	return true;
12318	}
12319
12320	/ Transform. /
12321
12322	/ Handle def. /
12323	scalar_dest = gimple_assign_lhs (gs: stmt);
12324	if (reduction_type != EXTRACT_LAST_REDUCTION)
12325	vec_dest = vect_create_destination_var (scalar_dest, vectype);
12326
12327	bool swap_cond_operands = false;
12328
12329	/ See whether another part of the vectorized code applies a loop*
12330	mask to the condition, or to its inverse. /*
12331
12332	vec_loop_masks *masks = NULL;
12333	vec_loop_lens *lens = NULL;
12334	if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12335	{
12336	if (reduction_type == EXTRACT_LAST_REDUCTION)
12337	lens = &LOOP_VINFO_LENS (loop_vinfo);
12338	}
12339	else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12340	{
12341	if (reduction_type == EXTRACT_LAST_REDUCTION)
12342	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12343	else
12344	{
12345	scalar_cond_masked_key cond (cond_expr, ncopies);
12346	if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12347	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12348	else
12349	{
12350	bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12351	tree_code orig_code = cond.code;
12352	cond.code = invert_tree_comparison (cond.code, honor_nans);
12353	if (!masked && loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12354	{
12355	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12356	cond_code = cond.code;
12357	swap_cond_operands = true;
12358	}
12359	else
12360	{
12361	/ Try the inverse of the current mask. We check if the*
12362	inverse mask is live and if so we generate a negate of
12363	the current mask such that we still honor NaNs. /*
12364	cond.inverted_p = true;
12365	cond.code = orig_code;
12366	if (loop_vinfo->scalar_cond_masked_set.contains (k: cond))
12367	{
12368	masks = &LOOP_VINFO_MASKS (loop_vinfo);
12369	cond_code = cond.code;
12370	swap_cond_operands = true;
12371	must_invert_cmp_result = true;
12372	}
12373	}
12374	}
12375	}
12376	}
12377
12378	/ Handle cond expr. /
12379	if (masked)
12380	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12381	op0: cond_expr, vectype0: comp_vectype, vec_oprnds0: &vec_oprnds0,
12382	op1: then_clause, vectype1: vectype, vec_oprnds1: &vec_oprnds2,
12383	op2: reduction_type != EXTRACT_LAST_REDUCTION
12384	? else_clause : NULL, vectype2: vectype, vec_oprnds2: &vec_oprnds3);
12385	else
12386	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12387	op0: cond_expr0, vectype0: comp_vectype, vec_oprnds0: &vec_oprnds0,
12388	op1: cond_expr1, vectype1: comp_vectype, vec_oprnds1: &vec_oprnds1,
12389	op2: then_clause, vectype2: vectype, vec_oprnds2: &vec_oprnds2,
12390	op3: reduction_type != EXTRACT_LAST_REDUCTION
12391	? else_clause : NULL, vectype3: vectype, vec_oprnds3: &vec_oprnds3);
12392
12393	/ Arguments are ready. Create the new vector stmt. /
12394	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12395	{
12396	vec_then_clause = vec_oprnds2 [i];
12397	if (reduction_type != EXTRACT_LAST_REDUCTION)
12398	vec_else_clause = vec_oprnds3 [i];
12399
12400	if (swap_cond_operands)
12401	std::swap (a&: vec_then_clause, b&: vec_else_clause);
12402
12403	if (masked)
12404	vec_compare = vec_cond_lhs;
12405	else
12406	{
12407	vec_cond_rhs = vec_oprnds1 [i];
12408	if (bitop1 == NOP_EXPR)
12409	{
12410	gimple_seq stmts = NULL;
12411	vec_compare = gimple_build (seq: &stmts, code: cond_code, type: vec_cmp_type,
12412	ops: vec_cond_lhs, ops: vec_cond_rhs);
12413	gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12414	}
12415	else
12416	{
12417	new_temp = make_ssa_name (var: vec_cmp_type);
12418	gassign *new_stmt;
12419	if (bitop1 == BIT_NOT_EXPR)
12420	new_stmt = gimple_build_assign (new_temp, bitop1,
12421	vec_cond_rhs);
12422	else
12423	new_stmt
12424	= gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12425	vec_cond_rhs);
12426	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12427	if (bitop2 == NOP_EXPR)
12428	vec_compare = new_temp;
12429	else if (bitop2 == BIT_NOT_EXPR
12430	&& reduction_type != EXTRACT_LAST_REDUCTION)
12431	{
12432	/ Instead of doing ~x ? y : z do x ? z : y. /
12433	vec_compare = new_temp;
12434	std::swap (a&: vec_then_clause, b&: vec_else_clause);
12435	}
12436	else
12437	{
12438	vec_compare = make_ssa_name (var: vec_cmp_type);
12439	if (bitop2 == BIT_NOT_EXPR)
12440	new_stmt
12441	= gimple_build_assign (vec_compare, bitop2, new_temp);
12442	else
12443	new_stmt
12444	= gimple_build_assign (vec_compare, bitop2,
12445	vec_cond_lhs, new_temp);
12446	vect_finish_stmt_generation (vinfo, stmt_info,
12447	vec_stmt: new_stmt, gsi);
12448	}
12449	}
12450	}
12451
12452	/ If we decided to apply a loop mask to the result of the vector*
12453	comparison, AND the comparison with the mask now. Later passes
12454	should then be able to reuse the AND results between mulitple
12455	vector statements.
12456
12457	For example:
12458	for (int i = 0; i < 100; ++i)
12459	x[i] = y[i] ? z[i] : 10;
12460
12461	results in following optimized GIMPLE:
12462
12463	mask__35.8_43 = vect__4.7_41 != { 0, ... };
12464	vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12465	_19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12466	vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12467	vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12468	vect_iftmp.11_47, { 10, ... }>;
12469
12470	instead of using a masked and unmasked forms of
12471	vec != { 0, ... } (masked in the MASK_LOAD,
12472	unmasked in the VEC_COND_EXPR). /*
12473
12474	/ Force vec_compare to be an SSA_NAME rather than a comparison,*
12475	in cases where that's necessary. /*
12476
12477	tree len = NULL_TREE, bias = NULL_TREE;
12478	if (masks \|\| lens \|\| reduction_type == EXTRACT_LAST_REDUCTION)
12479	{
12480	if (!is_gimple_val (vec_compare))
12481	{
12482	tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12483	gassign *new_stmt = gimple_build_assign (vec_compare_name,
12484	vec_compare);
12485	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12486	vec_compare = vec_compare_name;
12487	}
12488
12489	if (must_invert_cmp_result)
12490	{
12491	tree vec_compare_name = make_ssa_name (var: vec_cmp_type);
12492	gassign *new_stmt = gimple_build_assign (vec_compare_name,
12493	BIT_NOT_EXPR,
12494	vec_compare);
12495	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12496	vec_compare = vec_compare_name;
12497	}
12498
12499	if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12500	vectype, OPTIMIZE_FOR_SPEED))
12501	{
12502	if (lens)
12503	{
12504	len = vect_get_loop_len (loop_vinfo, gsi, lens,
12505	vec_num * ncopies, vectype, i, `1`);
12506	signed char biasval
12507	= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12508	bias = build_int_cst (intQI_type_node, biasval);
12509	}
12510	else
12511	{
12512	len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12513	bias = build_int_cst (intQI_type_node, `0`);
12514	}
12515	}
12516	if (masks)
12517	{
12518	tree loop_mask
12519	= vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12520	vectype, i);
12521	tree tmp2 = make_ssa_name (var: vec_cmp_type);
12522	gassign *g
12523	= gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12524	loop_mask);
12525	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: g, gsi);
12526	vec_compare = tmp2;
12527	}
12528	}
12529
12530	gimple *new_stmt;
12531	if (reduction_type == EXTRACT_LAST_REDUCTION)
12532	{
12533	gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12534	tree lhs = gimple_get_lhs (old_stmt);
12535	if (len)
12536	new_stmt = gimple_build_call_internal
12537	(IFN_LEN_FOLD_EXTRACT_LAST, `5`, else_clause, vec_compare,
12538	vec_then_clause, len, bias);
12539	else
12540	new_stmt = gimple_build_call_internal
12541	(IFN_FOLD_EXTRACT_LAST, `3`, else_clause, vec_compare,
12542	vec_then_clause);
12543	gimple_call_set_lhs (gs: new_stmt, lhs);
12544	SSA_NAME_DEF_STMT (lhs) = new_stmt;
12545	if (old_stmt == gsi_stmt (i: *gsi))
12546	vect_finish_replace_stmt (vinfo, stmt_info, vec_stmt: new_stmt);
12547	else
12548	{
12549	/ In this case we're moving the definition to later in the*
12550	block. That doesn't matter because the only uses of the
12551	lhs are in phi statements. /*
12552	gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12553	gsi_remove (&old_gsi, true);
12554	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12555	}
12556	}
12557	else
12558	{
12559	new_temp = make_ssa_name (var: vec_dest);
12560	new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12561	vec_then_clause, vec_else_clause);
12562	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12563	}
12564	if (slp_node)
12565	slp_node->push_vec_def (def: new_stmt);
12566	else
12567	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12568	}
12569
12570	if (!slp_node)
12571	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
12572
12573	vec_oprnds0.release ();
12574	vec_oprnds1.release ();
12575	vec_oprnds2.release ();
12576	vec_oprnds3.release ();
12577
12578	return true;
12579	}
12580
12581	/ Helper of vectorizable_comparison.*
12582
12583	Check if STMT_INFO is comparison expression CODE that can be vectorized.
12584	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12585	comparison, put it in VEC_STMT, and insert it at GSI.
12586
12587	Return true if STMT_INFO is vectorizable in this way. /*
12588
12589	static bool
12590	vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12591	stmt_vec_info stmt_info, tree_code code,
12592	gimple_stmt_iterator gsi, gimple *vec_stmt,
12593	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12594	{
12595	tree lhs, rhs1, rhs2;
12596	tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12597	tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12598	tree new_temp;
12599	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12600	enum vect_def_type dts[`2`] = {vect_unknown_def_type, vect_unknown_def_type};
12601	int ndts = `2`;
12602	poly_uint64 nunits;
12603	int ncopies;
12604	enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12605	int i;
12606	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12607	vec<tree> vec_oprnds0 = vNULL;
12608	vec<tree> vec_oprnds1 = vNULL;
12609	tree mask_type;
12610	tree mask = NULL_TREE;
12611
12612	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12613	return false;
12614
12615	if (!vectype \|\| !VECTOR_BOOLEAN_TYPE_P (vectype))
12616	return false;
12617
12618	mask_type = vectype;
12619	nunits = TYPE_VECTOR_SUBPARTS (node: vectype);
12620
12621	if (slp_node)
12622	ncopies = `1`;
12623	else
12624	ncopies = vect_get_num_copies (loop_vinfo, vectype);
12625
12626	gcc_assert (ncopies >= `1`);
12627
12628	if (TREE_CODE_CLASS (code) != tcc_comparison)
12629	return false;
12630
12631	slp_tree slp_rhs1, slp_rhs2;
12632	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12633	`0`, &rhs1, &slp_rhs1, &dts[`0`], &vectype1))
12634	return false;
12635
12636	if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12637	`1`, &rhs2, &slp_rhs2, &dts[`1`], &vectype2))
12638	return false;
12639
12640	if (vectype1 && vectype2
12641	&& maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype1),
12642	b: TYPE_VECTOR_SUBPARTS (node: vectype2)))
12643	return false;
12644
12645	vectype = vectype1 ? vectype1 : vectype2;
12646
12647	/ Invariant comparison. /
12648	if (!vectype)
12649	{
12650	if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12651	vectype = mask_type;
12652	else
12653	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12654	slp_node);
12655	if (!vectype \|\| maybe_ne (a: TYPE_VECTOR_SUBPARTS (node: vectype), b: nunits))
12656	return false;
12657	}
12658	else if (maybe_ne (a: nunits, b: TYPE_VECTOR_SUBPARTS (node: vectype)))
12659	return false;
12660
12661	/ Can't compare mask and non-mask types. /
12662	if (vectype1 && vectype2
12663	&& (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12664	return false;
12665
12666	/ Boolean values may have another representation in vectors*
12667	and therefore we prefer bit operations over comparison for
12668	them (which also works for scalar masks). We store opcodes
12669	to use in bitop1 and bitop2. Statement is vectorized as
12670	BITOP2 (rhs1 BITOP1 rhs2) or
12671	rhs1 BITOP2 (BITOP1 rhs2)
12672	depending on bitop1 and bitop2 arity. /*
12673	bool swap_p = false;
12674	if (VECTOR_BOOLEAN_TYPE_P (vectype))
12675	{
12676	if (code == GT_EXPR)
12677	{
12678	bitop1 = BIT_NOT_EXPR;
12679	bitop2 = BIT_AND_EXPR;
12680	}
12681	else if (code == GE_EXPR)
12682	{
12683	bitop1 = BIT_NOT_EXPR;
12684	bitop2 = BIT_IOR_EXPR;
12685	}
12686	else if (code == LT_EXPR)
12687	{
12688	bitop1 = BIT_NOT_EXPR;
12689	bitop2 = BIT_AND_EXPR;
12690	swap_p = true;
12691	}
12692	else if (code == LE_EXPR)
12693	{
12694	bitop1 = BIT_NOT_EXPR;
12695	bitop2 = BIT_IOR_EXPR;
12696	swap_p = true;
12697	}
12698	else
12699	{
12700	bitop1 = BIT_XOR_EXPR;
12701	if (code == EQ_EXPR)
12702	bitop2 = BIT_NOT_EXPR;
12703	}
12704	}
12705
12706	if (!vec_stmt)
12707	{
12708	if (bitop1 == NOP_EXPR)
12709	{
12710	if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12711	return false;
12712	}
12713	else
12714	{
12715	machine_mode mode = TYPE_MODE (vectype);
12716	optab optab;
12717
12718	optab = optab_for_tree_code (bitop1, vectype, optab_default);
12719	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12720	return false;
12721
12722	if (bitop2 != NOP_EXPR)
12723	{
12724	optab = optab_for_tree_code (bitop2, vectype, optab_default);
12725	if (!optab \|\| optab_handler (op: optab, mode) == CODE_FOR_nothing)
12726	return false;
12727	}
12728	}
12729
12730	/ Put types on constant and invariant SLP children. /
12731	if (slp_node
12732	&& (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12733	\|\| !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12734	{
12735	if (dump_enabled_p ())
12736	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12737	"incompatible vector types for invariants\n");
12738	return false;
12739	}
12740
12741	vect_model_simple_cost (vinfo, stmt_info,
12742	ncopies: ncopies * (`1` + (bitop2 != NOP_EXPR)),
12743	dt: dts, ndts, node: slp_node, cost_vec);
12744	return true;
12745	}
12746
12747	/ Transform. /
12748
12749	/ Handle def. /
12750	lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12751	if (lhs)
12752	mask = vect_create_destination_var (lhs, mask_type);
12753
12754	vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12755	op0: rhs1, vectype0: vectype, vec_oprnds0: &vec_oprnds0,
12756	op1: rhs2, vectype1: vectype, vec_oprnds1: &vec_oprnds1);
12757	if (swap_p)
12758	std::swap (a&: vec_oprnds0, b&: vec_oprnds1);
12759
12760	/ Arguments are ready. Create the new vector stmt. /
12761	FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12762	{
12763	gimple *new_stmt;
12764	vec_rhs2 = vec_oprnds1 [i];
12765
12766	if (lhs)
12767	new_temp = make_ssa_name (var: mask);
12768	else
12769	new_temp = make_temp_ssa_name (type: mask_type, NULL, name: "cmp");
12770	if (bitop1 == NOP_EXPR)
12771	{
12772	new_stmt = gimple_build_assign (new_temp, code,
12773	vec_rhs1, vec_rhs2);
12774	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12775	}
12776	else
12777	{
12778	if (bitop1 == BIT_NOT_EXPR)
12779	new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12780	else
12781	new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12782	vec_rhs2);
12783	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12784	if (bitop2 != NOP_EXPR)
12785	{
12786	tree res = make_ssa_name (var: mask);
12787	if (bitop2 == BIT_NOT_EXPR)
12788	new_stmt = gimple_build_assign (res, bitop2, new_temp);
12789	else
12790	new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12791	new_temp);
12792	vect_finish_stmt_generation (vinfo, stmt_info, vec_stmt: new_stmt, gsi);
12793	}
12794	}
12795	if (slp_node)
12796	slp_node->push_vec_def (def: new_stmt);
12797	else
12798	STMT_VINFO_VEC_STMTS (stmt_info).safe_push (obj: new_stmt);
12799	}
12800
12801	if (!slp_node)
12802	*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[`0`];
12803
12804	vec_oprnds0.release ();
12805	vec_oprnds1.release ();
12806
12807	return true;
12808	}
12809
12810	/ vectorizable_comparison.*
12811
12812	Check if STMT_INFO is comparison expression that can be vectorized.
12813	If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12814	comparison, put it in VEC_STMT, and insert it at GSI.
12815
12816	Return true if STMT_INFO is vectorizable in this way. /*
12817
12818	static bool
12819	vectorizable_comparison (vec_info *vinfo,
12820	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12821	gimple **vec_stmt,
12822	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12823	{
12824	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
12825
12826	if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12827	return false;
12828
12829	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12830	return false;
12831
12832	gassign stmt = dyn_cast <gassign > (p: stmt_info->stmt);
12833	if (!stmt)
12834	return false;
12835
12836	enum tree_code code = gimple_assign_rhs_code (gs: stmt);
12837	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12838	if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12839	vec_stmt, slp_node, cost_vec))
12840	return false;
12841
12842	if (!vec_stmt)
12843	STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12844
12845	return true;
12846	}
12847
12848	/ Check to see if the current early break given in STMT_INFO is valid for*
12849	vectorization. /*
12850
12851	static bool
12852	vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
12853	gimple_stmt_iterator gsi, gimple *vec_stmt,
12854	slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12855	{
12856	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
12857	if (!loop_vinfo
12858	\|\| !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12859	return false;
12860
12861	if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12862	return false;
12863
12864	if (!STMT_VINFO_RELEVANT_P (stmt_info))
12865	return false;
12866
12867	DUMP_VECT_SCOPE ("vectorizable_early_exit");
12868
12869	auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12870
12871	tree vectype = NULL_TREE;
12872	slp_tree slp_op0;
12873	tree op0;
12874	enum vect_def_type dt0;
12875	if (!vect_is_simple_use (vinfo, stmt_info, slp_node, `0`, &op0, &slp_op0, &dt0,
12876	&vectype))
12877	{
12878	if (dump_enabled_p ())
12879	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12880	"use not simple.\n");
12881	return false;
12882	}
12883
12884	if (!vectype)
12885	return false;
12886
12887	machine_mode mode = TYPE_MODE (vectype);
12888	int ncopies;
12889
12890	if (slp_node)
12891	ncopies = `1`;
12892	else
12893	ncopies = vect_get_num_copies (loop_vinfo, vectype);
12894
12895	vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12896	bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12897
12898	/ Now build the new conditional. Pattern gimple_conds get dropped during*
12899	codegen so we must replace the original insn. /*
12900	gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12901	gcond cond_stmt = as_a <gcond >(p: orig_stmt);
12902	/ When vectorizing we assume that if the branch edge is taken that we're*
12903	exiting the loop. This is not however always the case as the compiler will
12904	rewrite conditions to always be a comparison against 0. To do this it
12905	sometimes flips the edges. This is fine for scalar, but for vector we
12906	then have to flip the test, as we're still assuming that if you take the
12907	branch edge that we found the exit condition. i.e. we need to know whether
12908	we are generating a `forall` or an `exist` condition. /*
12909	auto new_code = NE_EXPR;
12910	auto reduc_optab = ior_optab;
12911	auto reduc_op = BIT_IOR_EXPR;
12912	tree cst = build_zero_cst (vectype);
12913	edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), `0`);
12914	if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12915	exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), `1`);
12916	gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12917	if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12918	exit_true_edge->dest))
12919	{
12920	new_code = EQ_EXPR;
12921	reduc_optab = and_optab;
12922	reduc_op = BIT_AND_EXPR;
12923	cst = build_minus_one_cst (vectype);
12924	}
12925
12926	/ Analyze only. /
12927	if (!vec_stmt)
12928	{
12929	if (direct_optab_handler (op: cbranch_optab, mode) == CODE_FOR_nothing)
12930	{
12931	if (dump_enabled_p ())
12932	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12933	"can't vectorize early exit because the "
12934	"target doesn't support flag setting vector "
12935	"comparisons.\n");
12936	return false;
12937	}
12938
12939	if (ncopies > `1`
12940	&& direct_optab_handler (op: reduc_optab, mode) == CODE_FOR_nothing)
12941	{
12942	if (dump_enabled_p ())
12943	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12944	"can't vectorize early exit because the "
12945	"target does not support boolean vector %s "
12946	"for type %T.\n",
12947	reduc_optab == ior_optab ? "OR" : "AND",
12948	vectype);
12949	return false;
12950	}
12951
12952	if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12953	vec_stmt, slp_node, cost_vec))
12954	return false;
12955
12956	if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12957	{
12958	if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
12959	OPTIMIZE_FOR_SPEED))
12960	return false;
12961	else
12962	vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
12963	}
12964
12965
12966	return true;
12967	}
12968
12969	/ Tranform. /
12970
12971	tree new_temp = NULL_TREE;
12972	gimple *new_stmt = NULL;
12973
12974	if (dump_enabled_p ())
12975	dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
12976
12977	if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12978	vec_stmt, slp_node, cost_vec))
12979	gcc_unreachable ();
12980
12981	gimple *stmt = STMT_VINFO_STMT (stmt_info);
12982	basic_block cond_bb = gimple_bb (g: stmt);
12983	gimple_stmt_iterator cond_gsi = gsi_last_bb (bb: cond_bb);
12984
12985	auto_vec<tree> stmts;
12986
12987	if (slp_node)
12988	stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
12989	else
12990	{
12991	auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
12992	stmts.reserve_exact (nelems: vec_stmts.length ());
12993	for (auto stmt : vec_stmts)
12994	stmts.quick_push (obj: gimple_assign_lhs (gs: stmt));
12995	}
12996
12997	/ Determine if we need to reduce the final value. /
12998	if (stmts.length () > `1`)
12999	{
13000	/ We build the reductions in a way to maintain as much parallelism as*
13001	possible. /*
13002	auto_vec<tree> workset (stmts.length ());
13003
13004	/ Mask the statements as we queue them up. Normally we loop over*
13005	vec_num, but since we inspect the exact results of vectorization
13006	we don't need to and instead can just use the stmts themselves. /*
13007	if (masked_loop_p)
13008	for (unsigned i = `0`; i < stmts.length (); i++)
13009	{
13010	tree stmt_mask
13011	= vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
13012	i);
13013	stmt_mask
13014	= prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), loop_mask: stmt_mask,
13015	vec_mask: stmts [i], gsi: &cond_gsi);
13016	workset.quick_push (obj: stmt_mask);
13017	}
13018	else
13019	workset.splice (src: stmts);
13020
13021	while (workset.length () > `1`)
13022	{
13023	new_temp = make_temp_ssa_name (type: vectype, NULL, name: "vexit_reduc");
13024	tree arg0 = workset.pop ();
13025	tree arg1 = workset.pop ();
13026	new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
13027	vect_finish_stmt_generation (vinfo: loop_vinfo, stmt_info, vec_stmt: new_stmt,
13028	gsi: &cond_gsi);
13029	workset.quick_insert (ix: `0`, obj: new_temp);
13030	}
13031	}
13032	else
13033	{
13034	new_temp = stmts [`0`];
13035	if (masked_loop_p)
13036	{
13037	tree mask
13038	= vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, `0`);
13039	new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), loop_mask: mask,
13040	vec_mask: new_temp, gsi: &cond_gsi);
13041	}
13042	}
13043
13044	gcc_assert (new_temp);
13045
13046	gimple_cond_set_condition (stmt: cond_stmt, code: new_code, lhs: new_temp, rhs: cst);
13047	update_stmt (s: orig_stmt);
13048
13049	if (slp_node)
13050	SLP_TREE_VEC_DEFS (slp_node).truncate (size: `0`);
13051	else
13052	STMT_VINFO_VEC_STMTS (stmt_info).truncate (size: `0`);
13053
13054	if (!slp_node)
13055	*vec_stmt = orig_stmt;
13056
13057	return true;
13058	}
13059
13060	/ If SLP_NODE is nonnull, return true if vectorizable_live_operation*
13061	can handle all live statements in the node. Otherwise return true
13062	if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13063	VEC_STMT_P is as for vectorizable_live_operation. /*
13064
13065	static bool
13066	can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13067	slp_tree slp_node, slp_instance slp_node_instance,
13068	bool vec_stmt_p,
13069	stmt_vector_for_cost *cost_vec)
13070	{
13071	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
13072	if (slp_node)
13073	{
13074	stmt_vec_info slp_stmt_info;
13075	unsigned int i;
13076	FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13077	{
13078	if ((STMT_VINFO_LIVE_P (slp_stmt_info)
13079	\|\| (loop_vinfo
13080	&& LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13081	&& STMT_VINFO_DEF_TYPE (slp_stmt_info)
13082	== vect_induction_def))
13083	&& !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13084	slp_node_instance, i,
13085	vec_stmt_p, cost_vec))
13086	return false;
13087	}
13088	}
13089	else if ((STMT_VINFO_LIVE_P (stmt_info)
13090	\|\| (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13091	&& STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13092	&& !vectorizable_live_operation (vinfo, stmt_info,
13093	slp_node, slp_node_instance, -`1`,
13094	vec_stmt_p, cost_vec))
13095	return false;
13096
13097	return true;
13098	}
13099
13100	/ Make sure the statement is vectorizable. /
13101
13102	opt_result
13103	vect_analyze_stmt (vec_info *vinfo,
13104	stmt_vec_info stmt_info, bool *need_to_vectorize,
13105	slp_tree node, slp_instance node_instance,
13106	stmt_vector_for_cost *cost_vec)
13107	{
13108	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
13109	enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13110	bool ok;
13111	gimple_seq pattern_def_seq;
13112
13113	if (dump_enabled_p ())
13114	dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13115	stmt_info->stmt);
13116
13117	if (gimple_has_volatile_ops (stmt: stmt_info->stmt))
13118	return opt_result::failure_at (loc: stmt_info->stmt,
13119	fmt: "not vectorized:"
13120	" stmt has volatile operands: %G\n",
13121	stmt_info->stmt);
13122
13123	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13124	&& node == NULL
13125	&& (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13126	{
13127	gimple_stmt_iterator si;
13128
13129	for (si = gsi_start (seq&: pattern_def_seq); !gsi_end_p (i: si); gsi_next (i: &si))
13130	{
13131	stmt_vec_info pattern_def_stmt_info
13132	= vinfo->lookup_stmt (gsi_stmt (i: si));
13133	if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13134	\|\| STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13135	{
13136	/ Analyze def stmt of STMT if it's a pattern stmt. /
13137	if (dump_enabled_p ())
13138	dump_printf_loc (MSG_NOTE, vect_location,
13139	"==> examining pattern def statement: %G",
13140	pattern_def_stmt_info->stmt);
13141
13142	opt_result res
13143	= vect_analyze_stmt (vinfo, stmt_info: pattern_def_stmt_info,
13144	need_to_vectorize, node, node_instance,
13145	cost_vec);
13146	if (!res)
13147	return res;
13148	}
13149	}
13150	}
13151
13152	/ Skip stmts that do not need to be vectorized. In loops this is expected*
13153	to include:
13154	- the COND_EXPR which is the loop exit condition
13155	- any LABEL_EXPRs in the loop
13156	- computations that are used only for array indexing or loop control.
13157	In basic blocks we only analyze statements that are a part of some SLP
13158	instance, therefore, all the statements are relevant.
13159
13160	Pattern statement needs to be analyzed instead of the original statement
13161	if the original statement is not relevant. Otherwise, we analyze both
13162	statements. In basic blocks we are called from some SLP instance
13163	traversal, don't analyze pattern stmts instead, the pattern stmts
13164	already will be part of SLP instance. /*
13165
13166	stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13167	if (!STMT_VINFO_RELEVANT_P (stmt_info)
13168	&& !STMT_VINFO_LIVE_P (stmt_info))
13169	{
13170	if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13171	&& pattern_stmt_info
13172	&& (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13173	\|\| STMT_VINFO_LIVE_P (pattern_stmt_info)))
13174	{
13175	/ Analyze PATTERN_STMT instead of the original stmt. /
13176	stmt_info = pattern_stmt_info;
13177	if (dump_enabled_p ())
13178	dump_printf_loc (MSG_NOTE, vect_location,
13179	"==> examining pattern statement: %G",
13180	stmt_info->stmt);
13181	}
13182	else
13183	{
13184	if (dump_enabled_p ())
13185	dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13186
13187	return opt_result::success ();
13188	}
13189	}
13190	else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13191	&& node == NULL
13192	&& pattern_stmt_info
13193	&& (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13194	\|\| STMT_VINFO_LIVE_P (pattern_stmt_info)))
13195	{
13196	/ Analyze PATTERN_STMT too. /
13197	if (dump_enabled_p ())
13198	dump_printf_loc (MSG_NOTE, vect_location,
13199	"==> examining pattern statement: %G",
13200	pattern_stmt_info->stmt);
13201
13202	opt_result res
13203	= vect_analyze_stmt (vinfo, stmt_info: pattern_stmt_info, need_to_vectorize, node,
13204	node_instance, cost_vec);
13205	if (!res)
13206	return res;
13207	}
13208
13209	switch (STMT_VINFO_DEF_TYPE (stmt_info))
13210	{
13211	case vect_internal_def:
13212	case vect_condition_def:
13213	break;
13214
13215	case vect_reduction_def:
13216	case vect_nested_cycle:
13217	gcc_assert (!bb_vinfo
13218	&& (relevance == vect_used_in_outer
13219	\|\| relevance == vect_used_in_outer_by_reduction
13220	\|\| relevance == vect_used_by_reduction
13221	\|\| relevance == vect_unused_in_scope
13222	\|\| relevance == vect_used_only_live));
13223	break;
13224
13225	case vect_induction_def:
13226	case vect_first_order_recurrence:
13227	gcc_assert (!bb_vinfo);
13228	break;
13229
13230	case vect_constant_def:
13231	case vect_external_def:
13232	case vect_unknown_def_type:
13233	default:
13234	gcc_unreachable ();
13235	}
13236
13237	tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13238	if (node)
13239	STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13240
13241	if (STMT_VINFO_RELEVANT_P (stmt_info))
13242	{
13243	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
13244	gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13245	\|\| gimple_code (stmt_info->stmt) == GIMPLE_COND
13246	\|\| (call && gimple_call_lhs (call) == NULL_TREE));
13247	need_to_vectorize = true*;
13248	}
13249
13250	if (PURE_SLP_STMT (stmt_info) && !node)
13251	{
13252	if (dump_enabled_p ())
13253	dump_printf_loc (MSG_NOTE, vect_location,
13254	"handled only by SLP analysis\n");
13255	return opt_result::success ();
13256	}
13257
13258	ok = true;
13259	if (!bb_vinfo
13260	&& (STMT_VINFO_RELEVANT_P (stmt_info)
13261	\|\| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13262	/ Prefer vectorizable_call over vectorizable_simd_clone_call so*
13263	-mveclibabi= takes preference over library functions with
13264	the simd attribute. /*
13265	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13266	\|\| vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, slp_node: node,
13267	cost_vec)
13268	\|\| vectorizable_conversion (vinfo, stmt_info,
13269	NULL, NULL, slp_node: node, cost_vec)
13270	\|\| vectorizable_operation (vinfo, stmt_info,
13271	NULL, NULL, slp_node: node, cost_vec)
13272	\|\| vectorizable_assignment (vinfo, stmt_info,
13273	NULL, NULL, slp_node: node, cost_vec)
13274	\|\| vectorizable_load (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13275	\|\| vectorizable_store (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13276	\|\| vectorizable_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13277	node, node_instance, cost_vec)
13278	\|\| vectorizable_induction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13279	NULL, node, cost_vec)
13280	\|\| vectorizable_shift (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13281	\|\| vectorizable_condition (vinfo, stmt_info,
13282	NULL, NULL, slp_node: node, cost_vec)
13283	\|\| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
13284	cost_vec)
13285	\|\| vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13286	stmt_info, NULL, node)
13287	\|\| vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13288	stmt_info, NULL, node, cost_vec)
13289	\|\| vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, slp_node: node,
13290	cost_vec));
13291	else
13292	{
13293	if (bb_vinfo)
13294	ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, slp_node: node, cost_vec)
13295	\|\| vectorizable_simd_clone_call (vinfo, stmt_info,
13296	NULL, NULL, slp_node: node, cost_vec)
13297	\|\| vectorizable_conversion (vinfo, stmt_info, NULL, NULL, slp_node: node,
13298	cost_vec)
13299	\|\| vectorizable_shift (vinfo, stmt_info,
13300	NULL, NULL, slp_node: node, cost_vec)
13301	\|\| vectorizable_operation (vinfo, stmt_info,
13302	NULL, NULL, slp_node: node, cost_vec)
13303	\|\| vectorizable_assignment (vinfo, stmt_info, NULL, NULL, slp_node: node,
13304	cost_vec)
13305	\|\| vectorizable_load (vinfo, stmt_info,
13306	NULL, NULL, slp_node: node, cost_vec)
13307	\|\| vectorizable_store (vinfo, stmt_info,
13308	NULL, NULL, slp_node: node, cost_vec)
13309	\|\| vectorizable_condition (vinfo, stmt_info,
13310	NULL, NULL, slp_node: node, cost_vec)
13311	\|\| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, slp_node: node,
13312	cost_vec)
13313	\|\| vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13314	\|\| vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, slp_node: node,
13315	cost_vec));
13316
13317	}
13318
13319	if (node)
13320	STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13321
13322	if (!ok)
13323	return opt_result::failure_at (loc: stmt_info->stmt,
13324	fmt: "not vectorized:"
13325	" relevant stmt not supported: %G",
13326	stmt_info->stmt);
13327
13328	/ Stmts that are (also) "live" (i.e. - that are used out of the loop)*
13329	need extra handling, except for vectorizable reductions. /*
13330	if (!bb_vinfo
13331	&& STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13332	&& STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13333	&& !can_vectorize_live_stmts (vinfo: as_a <loop_vec_info> (p: vinfo),
13334	stmt_info, slp_node: node, slp_node_instance: node_instance,
13335	vec_stmt_p: false, cost_vec))
13336	return opt_result::failure_at (loc: stmt_info->stmt,
13337	fmt: "not vectorized:"
13338	" live stmt not supported: %G",
13339	stmt_info->stmt);
13340
13341	return opt_result::success ();
13342	}
13343
13344
13345	/ Function vect_transform_stmt.*
13346
13347	Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. /*
13348
13349	bool
13350	vect_transform_stmt (vec_info *vinfo,
13351	stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13352	slp_tree slp_node, slp_instance slp_node_instance)
13353	{
13354	bool is_store = false;
13355	gimple *vec_stmt = NULL;
13356	bool done;
13357
13358	gcc_assert (slp_node \|\| !PURE_SLP_STMT (stmt_info));
13359
13360	tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13361	if (slp_node)
13362	STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13363
13364	switch (STMT_VINFO_TYPE (stmt_info))
13365	{
13366	case type_demotion_vec_info_type:
13367	case type_promotion_vec_info_type:
13368	case type_conversion_vec_info_type:
13369	done = vectorizable_conversion (vinfo, stmt_info,
13370	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13371	gcc_assert (done);
13372	break;
13373
13374	case induc_vec_info_type:
13375	done = vectorizable_induction (as_a <loop_vec_info> (p: vinfo),
13376	stmt_info, &vec_stmt, slp_node,
13377	NULL);
13378	gcc_assert (done);
13379	break;
13380
13381	case shift_vec_info_type:
13382	done = vectorizable_shift (vinfo, stmt_info,
13383	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13384	gcc_assert (done);
13385	break;
13386
13387	case op_vec_info_type:
13388	done = vectorizable_operation (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13389	NULL);
13390	gcc_assert (done);
13391	break;
13392
13393	case assignment_vec_info_type:
13394	done = vectorizable_assignment (vinfo, stmt_info,
13395	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13396	gcc_assert (done);
13397	break;
13398
13399	case load_vec_info_type:
13400	done = vectorizable_load (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt, slp_node,
13401	NULL);
13402	gcc_assert (done);
13403	break;
13404
13405	case store_vec_info_type:
13406	if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13407	&& !slp_node
13408	&& (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13409	< DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13410	/ In case of interleaving, the whole chain is vectorized when the*
13411	last store in the chain is reached. Store stmts before the last
13412	one are skipped, and there vec_stmt_info shouldn't be freed
13413	meanwhile. /*
13414	;
13415	else
13416	{
13417	done = vectorizable_store (vinfo, stmt_info,
13418	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13419	gcc_assert (done);
13420	is_store = true;
13421	}
13422	break;
13423
13424	case condition_vec_info_type:
13425	done = vectorizable_condition (vinfo, stmt_info,
13426	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13427	gcc_assert (done);
13428	break;
13429
13430	case comparison_vec_info_type:
13431	done = vectorizable_comparison (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13432	slp_node, NULL);
13433	gcc_assert (done);
13434	break;
13435
13436	case call_vec_info_type:
13437	done = vectorizable_call (vinfo, stmt_info,
13438	gsi, vec_stmt: &vec_stmt, slp_node, NULL);
13439	break;
13440
13441	case call_simd_clone_vec_info_type:
13442	done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13443	slp_node, NULL);
13444	break;
13445
13446	case reduc_vec_info_type:
13447	done = vect_transform_reduction (as_a <loop_vec_info> (p: vinfo), stmt_info,
13448	gsi, &vec_stmt, slp_node);
13449	gcc_assert (done);
13450	break;
13451
13452	case cycle_phi_info_type:
13453	done = vect_transform_cycle_phi (as_a <loop_vec_info> (p: vinfo), stmt_info,
13454	&vec_stmt, slp_node, slp_node_instance);
13455	gcc_assert (done);
13456	break;
13457
13458	case lc_phi_info_type:
13459	done = vectorizable_lc_phi (as_a <loop_vec_info> (p: vinfo),
13460	stmt_info, &vec_stmt, slp_node);
13461	gcc_assert (done);
13462	break;
13463
13464	case recurr_info_type:
13465	done = vectorizable_recurr (as_a <loop_vec_info> (p: vinfo),
13466	stmt_info, &vec_stmt, slp_node, NULL);
13467	gcc_assert (done);
13468	break;
13469
13470	case phi_info_type:
13471	done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13472	gcc_assert (done);
13473	break;
13474
13475	case loop_exit_ctrl_vec_info_type:
13476	done = vectorizable_early_exit (vinfo, stmt_info, gsi, vec_stmt: &vec_stmt,
13477	slp_node, NULL);
13478	gcc_assert (done);
13479	break;
13480
13481	default:
13482	if (!STMT_VINFO_LIVE_P (stmt_info))
13483	{
13484	if (dump_enabled_p ())
13485	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13486	"stmt not supported.\n");
13487	gcc_unreachable ();
13488	}
13489	done = true;
13490	}
13491
13492	if (!slp_node && vec_stmt)
13493	gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13494
13495	if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13496	{
13497	/ Handle stmts whose DEF is used outside the loop-nest that is*
13498	being vectorized. /*
13499	done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13500	slp_node_instance, vec_stmt_p: true, NULL);
13501	gcc_assert (done);
13502	}
13503
13504	if (slp_node)
13505	STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13506
13507	return is_store;
13508	}
13509
13510
13511	/ Remove a group of stores (for SLP or interleaving), free their*
13512	stmt_vec_info. /*
13513
13514	void
13515	vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13516	{
13517	stmt_vec_info next_stmt_info = first_stmt_info;
13518
13519	while (next_stmt_info)
13520	{
13521	stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13522	next_stmt_info = vect_orig_stmt (stmt_info: next_stmt_info);
13523	/ Free the attached stmt_vec_info and remove the stmt. /
13524	vinfo->remove_stmt (next_stmt_info);
13525	next_stmt_info = tmp;
13526	}
13527	}
13528
13529	/ If NUNITS is nonzero, return a vector type that contains NUNITS*
13530	elements of type SCALAR_TYPE, or null if the target doesn't support
13531	such a type.
13532
13533	If NUNITS is zero, return a vector type that contains elements of
13534	type SCALAR_TYPE, choosing whichever vector size the target prefers.
13535
13536	If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13537	for this vectorization region and want to "autodetect" the best choice.
13538	Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13539	and we want the new type to be interoperable with it. PREVAILING_MODE
13540	in this case can be a scalar integer mode or a vector mode; when it
13541	is a vector mode, the function acts like a tree-level version of
13542	related_vector_mode. /*
13543
13544	tree
13545	get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13546	tree scalar_type, poly_uint64 nunits)
13547	{
13548	tree orig_scalar_type = scalar_type;
13549	scalar_mode inner_mode;
13550	machine_mode simd_mode;
13551	tree vectype;
13552
13553	if ((!INTEGRAL_TYPE_P (scalar_type)
13554	&& !POINTER_TYPE_P (scalar_type)
13555	&& !SCALAR_FLOAT_TYPE_P (scalar_type))
13556	\|\| (!is_int_mode (TYPE_MODE (scalar_type), int_mode: &inner_mode)
13557	&& !is_float_mode (TYPE_MODE (scalar_type), float_mode: &inner_mode)))
13558	return NULL_TREE;
13559
13560	unsigned int nbytes = GET_MODE_SIZE (mode: inner_mode);
13561
13562	/ Interoperability between modes requires one to be a constant multiple*
13563	of the other, so that the number of vectors required for each operation
13564	is a compile-time constant. /*
13565	if (prevailing_mode != VOIDmode
13566	&& !constant_multiple_p (a: nunits * nbytes,
13567	b: GET_MODE_SIZE (mode: prevailing_mode))
13568	&& !constant_multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode),
13569	b: nunits * nbytes))
13570	return NULL_TREE;
13571
13572	/ For vector types of elements whose mode precision doesn't*
13573	match their types precision we use a element type of mode
13574	precision. The vectorization routines will have to make sure
13575	they support the proper result truncation/extension.
13576	We also make sure to build vector types with INTEGER_TYPE
13577	component type only. /*
13578	if (INTEGRAL_TYPE_P (scalar_type)
13579	&& (GET_MODE_BITSIZE (mode: inner_mode) != TYPE_PRECISION (scalar_type)
13580	\|\| TREE_CODE (scalar_type) != INTEGER_TYPE))
13581	scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode: inner_mode),
13582	TYPE_UNSIGNED (scalar_type));
13583
13584	/ We shouldn't end up building VECTOR_TYPEs of non-scalar components.*
13585	When the component mode passes the above test simply use a type
13586	corresponding to that mode. The theory is that any use that
13587	would cause problems with this will disable vectorization anyway. /*
13588	else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13589	&& !INTEGRAL_TYPE_P (scalar_type))
13590	scalar_type = lang_hooks.types.type_for_mode (inner_mode, `1`);
13591
13592	/ We can't build a vector type of elements with alignment bigger than*
13593	their size. /*
13594	else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13595	scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13596	TYPE_UNSIGNED (scalar_type));
13597
13598	/ If we felt back to using the mode fail if there was*
13599	no scalar type for it. /*
13600	if (scalar_type == NULL_TREE)
13601	return NULL_TREE;
13602
13603	/ If no prevailing mode was supplied, use the mode the target prefers.*
13604	Otherwise lookup a vector mode based on the prevailing mode. /*
13605	if (prevailing_mode == VOIDmode)
13606	{
13607	gcc_assert (known_eq (nunits, `0U`));
13608	simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13609	if (SCALAR_INT_MODE_P (simd_mode))
13610	{
13611	/ Traditional behavior is not to take the integer mode*
13612	literally, but simply to use it as a way of determining
13613	the vector size. It is up to mode_for_vector to decide
13614	what the TYPE_MODE should be.
13615
13616	Note that nunits == 1 is allowed in order to support single
13617	element vector types. /*
13618	if (!multiple_p (a: GET_MODE_SIZE (mode: simd_mode), b: nbytes, multiple: &nunits)
13619	\|\| !mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13620	return NULL_TREE;
13621	}
13622	}
13623	else if (SCALAR_INT_MODE_P (prevailing_mode)
13624	\|\| !related_vector_mode (prevailing_mode,
13625	inner_mode, nunits).exists (mode: &simd_mode))
13626	{
13627	/ Fall back to using mode_for_vector, mostly in the hope of being*
13628	able to use an integer mode. /*
13629	if (known_eq (nunits, `0U`)
13630	&& !multiple_p (a: GET_MODE_SIZE (mode: prevailing_mode), b: nbytes, multiple: &nunits))
13631	return NULL_TREE;
13632
13633	if (!mode_for_vector (inner_mode, nunits).exists (mode: &simd_mode))
13634	return NULL_TREE;
13635	}
13636
13637	vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13638
13639	/ In cases where the mode was chosen by mode_for_vector, check that*
13640	the target actually supports the chosen mode, or that it at least
13641	allows the vector mode to be replaced by a like-sized integer. /*
13642	if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13643	&& !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13644	return NULL_TREE;
13645
13646	/ Re-attach the address-space qualifier if we canonicalized the scalar*
13647	type. /*
13648	if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13649	return build_qualified_type
13650	(vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13651
13652	return vectype;
13653	}
13654
13655	/ Function get_vectype_for_scalar_type.*
13656
13657	Returns the vector type corresponding to SCALAR_TYPE as supported
13658	by the target. If GROUP_SIZE is nonzero and we're performing BB
13659	vectorization, make sure that the number of elements in the vector
13660	is no bigger than GROUP_SIZE. /*
13661
13662	tree
13663	get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13664	unsigned int group_size)
13665	{
13666	/ For BB vectorization, we should always have a group size once we've*
13667	constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13668	are tentative requests during things like early data reference
13669	analysis and pattern recognition. /*
13670	if (is_a <bb_vec_info> (p: vinfo))
13671	gcc_assert (vinfo->slp_instances.is_empty () \|\| group_size != `0`);
13672	else
13673	group_size = `0`;
13674
13675	tree vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13676	scalar_type);
13677	if (vectype && vinfo->vector_mode == VOIDmode)
13678	vinfo->vector_mode = TYPE_MODE (vectype);
13679
13680	/ Register the natural choice of vector type, before the group size*
13681	has been applied. /*
13682	if (vectype)
13683	vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13684
13685	/ If the natural choice of vector type doesn't satisfy GROUP_SIZE,*
13686	try again with an explicit number of elements. /*
13687	if (vectype
13688	&& group_size
13689	&& maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13690	{
13691	/ Start with the biggest number of units that fits within*
13692	GROUP_SIZE and halve it until we find a valid vector type.
13693	Usually either the first attempt will succeed or all will
13694	fail (in the latter case because GROUP_SIZE is too small
13695	for the target), but it's possible that a target could have
13696	a hole between supported vector types.
13697
13698	If GROUP_SIZE is not a power of 2, this has the effect of
13699	trying the largest power of 2 that fits within the group,
13700	even though the group is not a multiple of that vector size.
13701	The BB vectorizer will then try to carve up the group into
13702	smaller pieces. /*
13703	unsigned int nunits = `1` << floor_log2 (x: group_size);
13704	do
13705	{
13706	vectype = get_related_vectype_for_scalar_type (prevailing_mode: vinfo->vector_mode,
13707	scalar_type, nunits);
13708	nunits /= `2`;
13709	}
13710	while (nunits > `1` && !vectype);
13711	}
13712
13713	return vectype;
13714	}
13715
13716	/ Return the vector type corresponding to SCALAR_TYPE as supported*
13717	by the target. NODE, if nonnull, is the SLP tree node that will
13718	use the returned vector type. /*
13719
13720	tree
13721	get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13722	{
13723	unsigned int group_size = `0`;
13724	if (node)
13725	group_size = SLP_TREE_LANES (node);
13726	return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13727	}
13728
13729	/ Function get_mask_type_for_scalar_type.*
13730
13731	Returns the mask type corresponding to a result of comparison
13732	of vectors of specified SCALAR_TYPE as supported by target.
13733	If GROUP_SIZE is nonzero and we're performing BB vectorization,
13734	make sure that the number of elements in the vector is no bigger
13735	than GROUP_SIZE. /*
13736
13737	tree
13738	get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13739	unsigned int group_size)
13740	{
13741	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13742
13743	if (!vectype)
13744	return NULL;
13745
13746	return truth_type_for (vectype);
13747	}
13748
13749	/ Function get_mask_type_for_scalar_type.*
13750
13751	Returns the mask type corresponding to a result of comparison
13752	of vectors of specified SCALAR_TYPE as supported by target.
13753	NODE, if nonnull, is the SLP tree node that will use the returned
13754	vector type. /*
13755
13756	tree
13757	get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13758	slp_tree node)
13759	{
13760	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13761
13762	if (!vectype)
13763	return NULL;
13764
13765	return truth_type_for (vectype);
13766	}
13767
13768	/ Function get_same_sized_vectype*
13769
13770	Returns a vector type corresponding to SCALAR_TYPE of size
13771	VECTOR_TYPE if supported by the target. /*
13772
13773	tree
13774	get_same_sized_vectype (tree scalar_type, tree vector_type)
13775	{
13776	if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13777	return truth_type_for (vector_type);
13778
13779	poly_uint64 nunits;
13780	if (!multiple_p (a: GET_MODE_SIZE (TYPE_MODE (vector_type)),
13781	b: GET_MODE_SIZE (TYPE_MODE (scalar_type)), multiple: &nunits))
13782	return NULL_TREE;
13783
13784	return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13785	scalar_type, nunits);
13786	}
13787
13788	/ Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE*
13789	would not change the chosen vector modes. /*
13790
13791	bool
13792	vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13793	{
13794	for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13795	i != vinfo->used_vector_modes.end (); ++i)
13796	if (!VECTOR_MODE_P (*i)
13797	\|\| related_vector_mode (vector_mode, GET_MODE_INNER (i), `0`) != i)
13798	return false;
13799	return true;
13800	}
13801
13802	/ Function vect_is_simple_use.*
13803
13804	Input:
13805	VINFO - the vect info of the loop or basic block that is being vectorized.
13806	OPERAND - operand in the loop or bb.
13807	Output:
13808	DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13809	case OPERAND is an SSA_NAME that is defined in the vectorizable region
13810	DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13811	the definition could be anywhere in the function
13812	DT - the type of definition
13813
13814	Returns whether a stmt with OPERAND can be vectorized.
13815	For loops, supportable operands are constants, loop invariants, and operands
13816	that are defined by the current iteration of the loop. Unsupportable
13817	operands are those that are defined by a previous iteration of the loop (as
13818	is the case in reduction/induction computations).
13819	For basic blocks, supportable operands are constants and bb invariants.
13820	For now, operands defined outside the basic block are not supported. /*
13821
13822	bool
13823	vect_is_simple_use (tree operand, vec_info vinfo, enum* vect_def_type *dt,
13824	stmt_vec_info def_stmt_info_out, gimple *def_stmt_out)
13825	{
13826	if (def_stmt_info_out)
13827	*def_stmt_info_out = NULL;
13828	if (def_stmt_out)
13829	*def_stmt_out = NULL;
13830	*dt = vect_unknown_def_type;
13831
13832	if (dump_enabled_p ())
13833	{
13834	dump_printf_loc (MSG_NOTE, vect_location,
13835	"vect_is_simple_use: operand ");
13836	if (TREE_CODE (operand) == SSA_NAME
13837	&& !SSA_NAME_IS_DEFAULT_DEF (operand))
13838	dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), `0`);
13839	else
13840	dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13841	}
13842
13843	if (CONSTANT_CLASS_P (operand))
13844	*dt = vect_constant_def;
13845	else if (is_gimple_min_invariant (operand))
13846	*dt = vect_external_def;
13847	else if (TREE_CODE (operand) != SSA_NAME)
13848	*dt = vect_unknown_def_type;
13849	else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13850	*dt = vect_external_def;
13851	else
13852	{
13853	gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13854	stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13855	if (!stmt_vinfo)
13856	*dt = vect_external_def;
13857	else
13858	{
13859	stmt_vinfo = vect_stmt_to_vectorize (stmt_info: stmt_vinfo);
13860	def_stmt = stmt_vinfo->stmt;
13861	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13862	if (def_stmt_info_out)
13863	*def_stmt_info_out = stmt_vinfo;
13864	}
13865	if (def_stmt_out)
13866	*def_stmt_out = def_stmt;
13867	}
13868
13869	if (dump_enabled_p ())
13870	{
13871	dump_printf (MSG_NOTE, ", type of def: ");
13872	switch (*dt)
13873	{
13874	case vect_uninitialized_def:
13875	dump_printf (MSG_NOTE, "uninitialized\n");
13876	break;
13877	case vect_constant_def:
13878	dump_printf (MSG_NOTE, "constant\n");
13879	break;
13880	case vect_external_def:
13881	dump_printf (MSG_NOTE, "external\n");
13882	break;
13883	case vect_internal_def:
13884	dump_printf (MSG_NOTE, "internal\n");
13885	break;
13886	case vect_induction_def:
13887	dump_printf (MSG_NOTE, "induction\n");
13888	break;
13889	case vect_reduction_def:
13890	dump_printf (MSG_NOTE, "reduction\n");
13891	break;
13892	case vect_double_reduction_def:
13893	dump_printf (MSG_NOTE, "double reduction\n");
13894	break;
13895	case vect_nested_cycle:
13896	dump_printf (MSG_NOTE, "nested cycle\n");
13897	break;
13898	case vect_first_order_recurrence:
13899	dump_printf (MSG_NOTE, "first order recurrence\n");
13900	break;
13901	case vect_condition_def:
13902	dump_printf (MSG_NOTE, "control flow\n");
13903	break;
13904	case vect_unknown_def_type:
13905	dump_printf (MSG_NOTE, "unknown\n");
13906	break;
13907	}
13908	}
13909
13910	if (*dt == vect_unknown_def_type)
13911	{
13912	if (dump_enabled_p ())
13913	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13914	"Unsupported pattern.\n");
13915	return false;
13916	}
13917
13918	return true;
13919	}
13920
13921	/ Function vect_is_simple_use.*
13922
13923	Same as vect_is_simple_use but also determines the vector operand
13924	type of OPERAND and stores it to VECTYPE. If the definition of*
13925	OPERAND is vect_uninitialized_def, vect_constant_def or
13926	vect_external_def VECTYPE will be set to NULL_TREE and the caller*
13927	is responsible to compute the best suited vector type for the
13928	scalar operand. /*
13929
13930	bool
13931	vect_is_simple_use (tree operand, vec_info vinfo, enum* vect_def_type *dt,
13932	tree vectype, stmt_vec_info def_stmt_info_out,
13933	gimple **def_stmt_out)
13934	{
13935	stmt_vec_info def_stmt_info;
13936	gimple *def_stmt;
13937	if (!vect_is_simple_use (operand, vinfo, dt, def_stmt_info_out: &def_stmt_info, def_stmt_out: &def_stmt))
13938	return false;
13939
13940	if (def_stmt_out)
13941	*def_stmt_out = def_stmt;
13942	if (def_stmt_info_out)
13943	*def_stmt_info_out = def_stmt_info;
13944
13945	/ Now get a vector type if the def is internal, otherwise supply*
13946	NULL_TREE and leave it up to the caller to figure out a proper
13947	type for the use stmt. /*
13948	if (*dt == vect_internal_def
13949	\|\| *dt == vect_induction_def
13950	\|\| *dt == vect_reduction_def
13951	\|\| *dt == vect_double_reduction_def
13952	\|\| *dt == vect_nested_cycle
13953	\|\| *dt == vect_first_order_recurrence)
13954	{
13955	*vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13956	gcc_assert (*vectype != NULL_TREE);
13957	if (dump_enabled_p ())
13958	dump_printf_loc (MSG_NOTE, vect_location,
13959	"vect_is_simple_use: vectype %T\n", *vectype);
13960	}
13961	else if (*dt == vect_uninitialized_def
13962	\|\| *dt == vect_constant_def
13963	\|\| *dt == vect_external_def)
13964	*vectype = NULL_TREE;
13965	else
13966	gcc_unreachable ();
13967
13968	return true;
13969	}
13970
13971	/ Function vect_is_simple_use.*
13972
13973	Same as vect_is_simple_use but determines the operand by operand
13974	position OPERAND from either STMT or SLP_NODE, filling in OP*
13975	and SLP_DEF (when SLP_NODE is not NULL). /
13976
13977	bool
13978	vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13979	unsigned operand, tree op, slp_tree slp_def,
13980	enum vect_def_type *dt,
13981	tree vectype, stmt_vec_info def_stmt_info_out)
13982	{
13983	if (slp_node)
13984	{
13985	slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13986	*slp_def = child;
13987	*vectype = SLP_TREE_VECTYPE (child);
13988	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13989	{
13990	*op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13991	return vect_is_simple_use (operand: *op, vinfo, dt, def_stmt_info_out);
13992	}
13993	else
13994	{
13995	if (def_stmt_info_out)
13996	*def_stmt_info_out = NULL;
13997	*op = SLP_TREE_SCALAR_OPS (child)[`0`];
13998	*dt = SLP_TREE_DEF_TYPE (child);
13999	return true;
14000	}
14001	}
14002	else
14003	{
14004	*slp_def = NULL;
14005	if (gassign ass = dyn_cast <gassign > (p: stmt->stmt))
14006	{
14007	if (gimple_assign_rhs_code (gs: ass) == COND_EXPR
14008	&& COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
14009	{
14010	if (operand < `2`)
14011	*op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
14012	else
14013	*op = gimple_op (gs: ass, i: operand);
14014	}
14015	else if (gimple_assign_rhs_code (gs: ass) == VIEW_CONVERT_EXPR)
14016	*op = TREE_OPERAND (gimple_assign_rhs1 (ass), `0`);
14017	else
14018	*op = gimple_op (gs: ass, i: operand + `1`);
14019	}
14020	else if (gcond cond = dyn_cast <gcond > (p: stmt->stmt))
14021	*op = gimple_op (gs: cond, i: operand);
14022	else if (gcall call = dyn_cast <gcall > (p: stmt->stmt))
14023	*op = gimple_call_arg (gs: call, index: operand);
14024	else
14025	gcc_unreachable ();
14026	return vect_is_simple_use (operand: *op, vinfo, dt, vectype, def_stmt_info_out);
14027	}
14028	}
14029
14030	/ If OP is not NULL and is external or constant update its vector*
14031	type with VECTYPE. Returns true if successful or false if not,
14032	for example when conflicting vector types are present. /*
14033
14034	bool
14035	vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14036	{
14037	if (!op \|\| SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14038	return true;
14039	if (SLP_TREE_VECTYPE (op))
14040	return types_compatible_p (SLP_TREE_VECTYPE (op), type2: vectype);
14041	/ For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those*
14042	should be handled by patters. Allow vect_constant_def for now. /*
14043	if (VECTOR_BOOLEAN_TYPE_P (vectype)
14044	&& SLP_TREE_DEF_TYPE (op) == vect_external_def)
14045	return false;
14046	SLP_TREE_VECTYPE (op) = vectype;
14047	return true;
14048	}
14049
14050	/ Function supportable_widening_operation*
14051
14052	Check whether an operation represented by the code CODE is a
14053	widening operation that is supported by the target platform in
14054	vector form (i.e., when operating on arguments of type VECTYPE_IN
14055	producing a result of type VECTYPE_OUT).
14056
14057	Widening operations we currently support are NOP (CONVERT), FLOAT,
14058	FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14059	are supported by the target platform either directly (via vector
14060	tree-codes), or via target builtins.
14061
14062	Output:
14063	- CODE1 and CODE2 are codes of vector operations to be used when
14064	vectorizing the operation, if available.
14065	- MULTI_STEP_CVT determines the number of required intermediate steps in
14066	case of multi-step conversion (like char->short->int - in that case
14067	MULTI_STEP_CVT will be 1).
14068	- INTERM_TYPES contains the intermediate type required to perform the
14069	widening operation (short in the above example). /*
14070
14071	bool
14072	supportable_widening_operation (vec_info *vinfo,
14073	code_helper code,
14074	stmt_vec_info stmt_info,
14075	tree vectype_out, tree vectype_in,
14076	code_helper *code1,
14077	code_helper *code2,
14078	int *multi_step_cvt,
14079	vec<tree> *interm_types)
14080	{
14081	loop_vec_info loop_info = dyn_cast <loop_vec_info> (p: vinfo);
14082	class loop *vect_loop = NULL;
14083	machine_mode vec_mode;
14084	enum insn_code icode1, icode2;
14085	optab optab1 = unknown_optab, optab2 = unknown_optab;
14086	tree vectype = vectype_in;
14087	tree wide_vectype = vectype_out;
14088	tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14089	int i;
14090	tree prev_type, intermediate_type;
14091	machine_mode intermediate_mode, prev_mode;
14092	optab optab3, optab4;
14093
14094	*multi_step_cvt = `0`;
14095	if (loop_info)
14096	vect_loop = LOOP_VINFO_LOOP (loop_info);
14097
14098	switch (code.safe_as_tree_code ())
14099	{
14100	case MAX_TREE_CODES:
14101	/ Don't set c1 and c2 if code is not a tree_code. /
14102	break;
14103
14104	case WIDEN_MULT_EXPR:
14105	/ The result of a vectorized widening operation usually requires*
14106	two vectors (because the widened results do not fit into one vector).
14107	The generated vector results would normally be expected to be
14108	generated in the same order as in the original scalar computation,
14109	i.e. if 8 results are generated in each vector iteration, they are
14110	to be organized as follows:
14111	vect1: [res1,res2,res3,res4],
14112	vect2: [res5,res6,res7,res8].
14113
14114	However, in the special case that the result of the widening
14115	operation is used in a reduction computation only, the order doesn't
14116	matter (because when vectorizing a reduction we change the order of
14117	the computation). Some targets can take advantage of this and
14118	generate more efficient code. For example, targets like Altivec,
14119	that support widen_mult using a sequence of {mult_even,mult_odd}
14120	generate the following vectors:
14121	vect1: [res1,res3,res5,res7],
14122	vect2: [res2,res4,res6,res8].
14123
14124	When vectorizing outer-loops, we execute the inner-loop sequentially
14125	(each vectorized inner-loop iteration contributes to VF outer-loop
14126	iterations in parallel). We therefore don't allow to change the
14127	order of the computation in the inner-loop during outer-loop
14128	vectorization. /*
14129	/ TODO: Another case in which order doesn't really matter is when we*
14130	widen and then contract again, e.g. (short)((int)x y >> 8).*
14131	Normally, pack_trunc performs an even/odd permute, whereas the
14132	repack from an even/odd expansion would be an interleave, which
14133	would be significantly simpler for e.g. AVX2. /*
14134	/ In any case, in order to avoid duplicating the code below, recurse*
14135	on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14136	are properly set up for the caller. If we fail, we'll continue with
14137	a VEC_WIDEN_MULT_LO/HI_EXPR check. /*
14138	if (vect_loop
14139	&& STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
14140	&& !nested_in_vect_loop_p (loop: vect_loop, stmt_info)
14141	&& supportable_widening_operation (vinfo, code: VEC_WIDEN_MULT_EVEN_EXPR,
14142	stmt_info, vectype_out,
14143	vectype_in, code1,
14144	code2, multi_step_cvt,
14145	interm_types))
14146	{
14147	/ Elements in a vector with vect_used_by_reduction property cannot*
14148	be reordered if the use chain with this property does not have the
14149	same operation. One such an example is s += a b, where elements*
14150	in a and b cannot be reordered. Here we check if the vector defined
14151	by STMT is only directly used in the reduction statement. /*
14152	tree lhs = gimple_assign_lhs (gs: stmt_info->stmt);
14153	stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14154	if (use_stmt_info
14155	&& STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
14156	return true;
14157	}
14158	c1 = VEC_WIDEN_MULT_LO_EXPR;
14159	c2 = VEC_WIDEN_MULT_HI_EXPR;
14160	break;
14161
14162	case DOT_PROD_EXPR:
14163	c1 = DOT_PROD_EXPR;
14164	c2 = DOT_PROD_EXPR;
14165	break;
14166
14167	case SAD_EXPR:
14168	c1 = SAD_EXPR;
14169	c2 = SAD_EXPR;
14170	break;
14171
14172	case VEC_WIDEN_MULT_EVEN_EXPR:
14173	/ Support the recursion induced just above. /
14174	c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14175	c2 = VEC_WIDEN_MULT_ODD_EXPR;
14176	break;
14177
14178	case WIDEN_LSHIFT_EXPR:
14179	c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14180	c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14181	break;
14182
14183	CASE_CONVERT:
14184	c1 = VEC_UNPACK_LO_EXPR;
14185	c2 = VEC_UNPACK_HI_EXPR;
14186	break;
14187
14188	case FLOAT_EXPR:
14189	c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14190	c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14191	break;
14192
14193	case FIX_TRUNC_EXPR:
14194	c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14195	c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14196	break;
14197
14198	default:
14199	gcc_unreachable ();
14200	}
14201
14202	if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14203	std::swap (a&: c1, b&: c2);
14204
14205	if (code == FIX_TRUNC_EXPR)
14206	{
14207	/ The signedness is determined from output operand. /
14208	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14209	optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14210	}
14211	else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14212	&& VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14213	&& VECTOR_BOOLEAN_TYPE_P (vectype)
14214	&& TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14215	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14216	{
14217	/ If the input and result modes are the same, a different optab*
14218	is needed where we pass in the number of units in vectype. /*
14219	optab1 = vec_unpacks_sbool_lo_optab;
14220	optab2 = vec_unpacks_sbool_hi_optab;
14221	}
14222
14223	vec_mode = TYPE_MODE (vectype);
14224	if (widening_fn_p (code))
14225	{
14226	/ If this is an internal fn then we must check whether the target*
14227	supports either a low-high split or an even-odd split. /*
14228	internal_fn ifn = as_internal_fn (code: (combined_fn) code);
14229
14230	internal_fn lo, hi, even, odd;
14231	lookup_hilo_internal_fn (ifn, &lo, &hi);
14232	*code1 = as_combined_fn (fn: lo);
14233	*code2 = as_combined_fn (fn: hi);
14234	optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14235	optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14236
14237	/ If we don't support low-high, then check for even-odd. /
14238	if (!optab1
14239	\|\| (icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
14240	\|\| !optab2
14241	\|\| (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
14242	{
14243	lookup_evenodd_internal_fn (ifn, &even, &odd);
14244	*code1 = as_combined_fn (fn: even);
14245	*code2 = as_combined_fn (fn: odd);
14246	optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14247	optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14248	}
14249	}
14250	else if (code.is_tree_code ())
14251	{
14252	if (code == FIX_TRUNC_EXPR)
14253	{
14254	/ The signedness is determined from output operand. /
14255	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14256	optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14257	}
14258	else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14259	&& VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14260	&& VECTOR_BOOLEAN_TYPE_P (vectype)
14261	&& TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14262	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14263	{
14264	/ If the input and result modes are the same, a different optab*
14265	is needed where we pass in the number of units in vectype. /*
14266	optab1 = vec_unpacks_sbool_lo_optab;
14267	optab2 = vec_unpacks_sbool_hi_optab;
14268	}
14269	else
14270	{
14271	optab1 = optab_for_tree_code (c1, vectype, optab_default);
14272	optab2 = optab_for_tree_code (c2, vectype, optab_default);
14273	}
14274	*code1 = c1;
14275	*code2 = c2;
14276	}
14277
14278	if (!optab1 \|\| !optab2)
14279	return false;
14280
14281	if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing
14282	\|\| (icode2 = optab_handler (op: optab2, mode: vec_mode)) == CODE_FOR_nothing)
14283	return false;
14284
14285
14286	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (wide_vectype)
14287	&& insn_data[icode2].operand[`0`].mode == TYPE_MODE (wide_vectype))
14288	{
14289	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14290	return true;
14291	/ For scalar masks we may have different boolean*
14292	vector types having the same QImode. Thus we
14293	add additional check for elements number. /*
14294	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14295	TYPE_VECTOR_SUBPARTS (wide_vectype) * `2`))
14296	return true;
14297	}
14298
14299	/ Check if it's a multi-step conversion that can be done using intermediate*
14300	types. /*
14301
14302	prev_type = vectype;
14303	prev_mode = vec_mode;
14304
14305	if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14306	return false;
14307
14308	/ We assume here that there will not be more than MAX_INTERM_CVT_STEPS*
14309	intermediate steps in promotion sequence. We try
14310	MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14311	not. /*
14312	interm_types->create (MAX_INTERM_CVT_STEPS);
14313	for (i = `0`; i < MAX_INTERM_CVT_STEPS; i++)
14314	{
14315	intermediate_mode = insn_data[icode1].operand[`0`].mode;
14316	if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14317	intermediate_type
14318	= vect_halve_mask_nunits (prev_type, intermediate_mode);
14319	else if (VECTOR_MODE_P (intermediate_mode))
14320	{
14321	tree intermediate_element_type
14322	= lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14323	TYPE_UNSIGNED (prev_type));
14324	intermediate_type
14325	= build_vector_type_for_mode (intermediate_element_type,
14326	intermediate_mode);
14327	}
14328	else
14329	intermediate_type
14330	= lang_hooks.types.type_for_mode (intermediate_mode,
14331	TYPE_UNSIGNED (prev_type));
14332
14333	if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14334	&& VECTOR_BOOLEAN_TYPE_P (prev_type)
14335	&& intermediate_mode == prev_mode
14336	&& SCALAR_INT_MODE_P (prev_mode))
14337	{
14338	/ If the input and result modes are the same, a different optab*
14339	is needed where we pass in the number of units in vectype. /*
14340	optab3 = vec_unpacks_sbool_lo_optab;
14341	optab4 = vec_unpacks_sbool_hi_optab;
14342	}
14343	else
14344	{
14345	optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14346	optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14347	}
14348
14349	if (!optab3 \|\| !optab4
14350	\|\| (icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing
14351	\|\| insn_data[icode1].operand[`0`].mode != intermediate_mode
14352	\|\| (icode2 = optab_handler (op: optab2, mode: prev_mode)) == CODE_FOR_nothing
14353	\|\| insn_data[icode2].operand[`0`].mode != intermediate_mode
14354	\|\| ((icode1 = optab_handler (op: optab3, mode: intermediate_mode))
14355	== CODE_FOR_nothing)
14356	\|\| ((icode2 = optab_handler (op: optab4, mode: intermediate_mode))
14357	== CODE_FOR_nothing))
14358	break;
14359
14360	interm_types->quick_push (obj: intermediate_type);
14361	(*multi_step_cvt)++;
14362
14363	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (wide_vectype)
14364	&& insn_data[icode2].operand[`0`].mode == TYPE_MODE (wide_vectype))
14365	{
14366	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14367	return true;
14368	if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14369	TYPE_VECTOR_SUBPARTS (wide_vectype) * `2`))
14370	return true;
14371	}
14372
14373	prev_type = intermediate_type;
14374	prev_mode = intermediate_mode;
14375	}
14376
14377	interm_types->release ();
14378	return false;
14379	}
14380
14381
14382	/ Function supportable_narrowing_operation*
14383
14384	Check whether an operation represented by the code CODE is a
14385	narrowing operation that is supported by the target platform in
14386	vector form (i.e., when operating on arguments of type VECTYPE_IN
14387	and producing a result of type VECTYPE_OUT).
14388
14389	Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14390	and FLOAT. This function checks if these operations are supported by
14391	the target platform directly via vector tree-codes.
14392
14393	Output:
14394	- CODE1 is the code of a vector operation to be used when
14395	vectorizing the operation, if available.
14396	- MULTI_STEP_CVT determines the number of required intermediate steps in
14397	case of multi-step conversion (like int->short->char - in that case
14398	MULTI_STEP_CVT will be 1).
14399	- INTERM_TYPES contains the intermediate type required to perform the
14400	narrowing operation (short in the above example). /*
14401
14402	bool
14403	supportable_narrowing_operation (code_helper code,
14404	tree vectype_out, tree vectype_in,
14405	code_helper code1, int* *multi_step_cvt,
14406	vec<tree> *interm_types)
14407	{
14408	machine_mode vec_mode;
14409	enum insn_code icode1;
14410	optab optab1, interm_optab;
14411	tree vectype = vectype_in;
14412	tree narrow_vectype = vectype_out;
14413	enum tree_code c1;
14414	tree intermediate_type, prev_type;
14415	machine_mode intermediate_mode, prev_mode;
14416	int i;
14417	unsigned HOST_WIDE_INT n_elts;
14418	bool uns;
14419
14420	if (!code.is_tree_code ())
14421	return false;
14422
14423	*multi_step_cvt = `0`;
14424	switch ((tree_code) code)
14425	{
14426	CASE_CONVERT:
14427	c1 = VEC_PACK_TRUNC_EXPR;
14428	if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14429	&& VECTOR_BOOLEAN_TYPE_P (vectype)
14430	&& SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14431	&& TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &n_elts)
14432	&& n_elts < BITS_PER_UNIT)
14433	optab1 = vec_pack_sbool_trunc_optab;
14434	else
14435	optab1 = optab_for_tree_code (c1, vectype, optab_default);
14436	break;
14437
14438	case FIX_TRUNC_EXPR:
14439	c1 = VEC_PACK_FIX_TRUNC_EXPR;
14440	/ The signedness is determined from output operand. /
14441	optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14442	break;
14443
14444	case FLOAT_EXPR:
14445	c1 = VEC_PACK_FLOAT_EXPR;
14446	optab1 = optab_for_tree_code (c1, vectype, optab_default);
14447	break;
14448
14449	default:
14450	gcc_unreachable ();
14451	}
14452
14453	if (!optab1)
14454	return false;
14455
14456	vec_mode = TYPE_MODE (vectype);
14457	if ((icode1 = optab_handler (op: optab1, mode: vec_mode)) == CODE_FOR_nothing)
14458	return false;
14459
14460	*code1 = c1;
14461
14462	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (narrow_vectype))
14463	{
14464	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14465	return true;
14466	/ For scalar masks we may have different boolean*
14467	vector types having the same QImode. Thus we
14468	add additional check for elements number. /*
14469	if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * `2`,
14470	TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14471	return true;
14472	}
14473
14474	if (code == FLOAT_EXPR)
14475	return false;
14476
14477	/ Check if it's a multi-step conversion that can be done using intermediate*
14478	types. /*
14479	prev_mode = vec_mode;
14480	prev_type = vectype;
14481	if (code == FIX_TRUNC_EXPR)
14482	uns = TYPE_UNSIGNED (vectype_out);
14483	else
14484	uns = TYPE_UNSIGNED (vectype);
14485
14486	/ For multi-step FIX_TRUNC_EXPR prefer signed floating to integer*
14487	conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14488	costly than signed. /*
14489	if (code == FIX_TRUNC_EXPR && uns)
14490	{
14491	enum insn_code icode2;
14492
14493	intermediate_type
14494	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), `0`);
14495	interm_optab
14496	= optab_for_tree_code (c1, intermediate_type, optab_default);
14497	if (interm_optab != unknown_optab
14498	&& (icode2 = optab_handler (op: optab1, mode: vec_mode)) != CODE_FOR_nothing
14499	&& insn_data[icode1].operand[`0`].mode
14500	== insn_data[icode2].operand[`0`].mode)
14501	{
14502	uns = false;
14503	optab1 = interm_optab;
14504	icode1 = icode2;
14505	}
14506	}
14507
14508	/ We assume here that there will not be more than MAX_INTERM_CVT_STEPS*
14509	intermediate steps in promotion sequence. We try
14510	MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. /*
14511	interm_types->create (MAX_INTERM_CVT_STEPS);
14512	for (i = `0`; i < MAX_INTERM_CVT_STEPS; i++)
14513	{
14514	intermediate_mode = insn_data[icode1].operand[`0`].mode;
14515	if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14516	intermediate_type
14517	= vect_double_mask_nunits (prev_type, intermediate_mode);
14518	else
14519	intermediate_type
14520	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
14521	if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14522	&& VECTOR_BOOLEAN_TYPE_P (prev_type)
14523	&& SCALAR_INT_MODE_P (prev_mode)
14524	&& TYPE_VECTOR_SUBPARTS (node: intermediate_type).is_constant (const_value: &n_elts)
14525	&& n_elts < BITS_PER_UNIT)
14526	interm_optab = vec_pack_sbool_trunc_optab;
14527	else
14528	interm_optab
14529	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14530	optab_default);
14531	if (!interm_optab
14532	\|\| ((icode1 = optab_handler (op: optab1, mode: prev_mode)) == CODE_FOR_nothing)
14533	\|\| insn_data[icode1].operand[`0`].mode != intermediate_mode
14534	\|\| ((icode1 = optab_handler (op: interm_optab, mode: intermediate_mode))
14535	== CODE_FOR_nothing))
14536	break;
14537
14538	interm_types->quick_push (obj: intermediate_type);
14539	(*multi_step_cvt)++;
14540
14541	if (insn_data[icode1].operand[`0`].mode == TYPE_MODE (narrow_vectype))
14542	{
14543	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14544	return true;
14545	if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * `2`,
14546	TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14547	return true;
14548	}
14549
14550	prev_mode = intermediate_mode;
14551	prev_type = intermediate_type;
14552	optab1 = interm_optab;
14553	}
14554
14555	interm_types->release ();
14556	return false;
14557	}
14558
14559	/ Generate and return a vector mask of MASK_TYPE such that*
14560	mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14561	Add the statements to SEQ. /*
14562
14563	tree
14564	vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14565	tree end_index, const char *name)
14566	{
14567	tree cmp_type = TREE_TYPE (start_index);
14568	gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14569	cmp_type, mask_type,
14570	OPTIMIZE_FOR_SPEED));
14571	gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, `3`,
14572	start_index, end_index,
14573	build_zero_cst (mask_type));
14574	tree tmp;
14575	if (name)
14576	tmp = make_temp_ssa_name (type: mask_type, NULL, name);
14577	else
14578	tmp = make_ssa_name (var: mask_type);
14579	gimple_call_set_lhs (gs: call, lhs: tmp);
14580	gimple_seq_add_stmt (seq, call);
14581	return tmp;
14582	}
14583
14584	/ Generate a vector mask of type MASK_TYPE for which index I is false iff*
14585	J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. /*
14586
14587	tree
14588	vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14589	tree end_index)
14590	{
14591	tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14592	return gimple_build (seq, code: BIT_NOT_EXPR, type: mask_type, ops: tmp);
14593	}
14594
14595	/ Try to compute the vector types required to vectorize STMT_INFO,*
14596	returning true on success and false if vectorization isn't possible.
14597	If GROUP_SIZE is nonzero and we're performing BB vectorization,
14598	take sure that the number of elements in the vectors is no bigger
14599	than GROUP_SIZE.
14600
14601	On success:
14602
14603	- Set STMT_VECTYPE_OUT to:*
14604	- NULL_TREE if the statement doesn't need to be vectorized;
14605	- the equivalent of STMT_VINFO_VECTYPE otherwise.
14606
14607	- Set NUNITS_VECTYPE_OUT to the vector type that contains the maximum*
14608	number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14609	statement does not help to determine the overall number of units. /*
14610
14611	opt_result
14612	vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14613	tree *stmt_vectype_out,
14614	tree *nunits_vectype_out,
14615	unsigned int group_size)
14616	{
14617	gimple *stmt = stmt_info->stmt;
14618
14619	/ For BB vectorization, we should always have a group size once we've*
14620	constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14621	are tentative requests during things like early data reference
14622	analysis and pattern recognition. /*
14623	if (is_a <bb_vec_info> (p: vinfo))
14624	gcc_assert (vinfo->slp_instances.is_empty () \|\| group_size != `0`);
14625	else
14626	group_size = `0`;
14627
14628	*stmt_vectype_out = NULL_TREE;
14629	*nunits_vectype_out = NULL_TREE;
14630
14631	if (gimple_get_lhs (stmt) == NULL_TREE
14632	/ Allow vector conditionals through here. /
14633	&& !is_a <gcond *> (p: stmt)
14634	/ MASK_STORE has no lhs, but is ok. /
14635	&& !gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14636	{
14637	if (is_a <gcall *> (p: stmt))
14638	{
14639	/ Ignore calls with no lhs. These must be calls to*
14640	#pragma omp simd functions, and what vectorization factor
14641	it really needs can't be determined until
14642	vectorizable_simd_clone_call. /*
14643	if (dump_enabled_p ())
14644	dump_printf_loc (MSG_NOTE, vect_location,
14645	"defer to SIMD clone analysis.\n");
14646	return opt_result::success ();
14647	}
14648
14649	return opt_result::failure_at (loc: stmt,
14650	fmt: "not vectorized: irregular stmt: %G", stmt);
14651	}
14652
14653	tree vectype;
14654	tree scalar_type = NULL_TREE;
14655	if (group_size == `0` && STMT_VINFO_VECTYPE (stmt_info))
14656	{
14657	vectype = STMT_VINFO_VECTYPE (stmt_info);
14658	if (dump_enabled_p ())
14659	dump_printf_loc (MSG_NOTE, vect_location,
14660	"precomputed vectype: %T\n", vectype);
14661	}
14662	else if (vect_use_mask_type_p (stmt_info))
14663	{
14664	unsigned int precision = stmt_info->mask_precision;
14665	scalar_type = build_nonstandard_integer_type (precision, `1`);
14666	vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14667	if (!vectype)
14668	return opt_result::failure_at (loc: stmt, fmt: "not vectorized: unsupported"
14669	" data-type %T\n", scalar_type);
14670	if (dump_enabled_p ())
14671	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14672	}
14673	else
14674	{
14675	/ If we got here with a gcond it means that the target had no available vector*
14676	mode for the scalar type. We can't vectorize so abort. /*
14677	if (is_a <gcond *> (p: stmt))
14678	return opt_result::failure_at (loc: stmt,
14679	fmt: "not vectorized:"
14680	" unsupported data-type for gcond %T\n",
14681	scalar_type);
14682
14683	if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14684	scalar_type = TREE_TYPE (DR_REF (dr));
14685	else if (gimple_call_internal_p (gs: stmt, fn: IFN_MASK_STORE))
14686	scalar_type = TREE_TYPE (gimple_call_arg (stmt, `3`));
14687	else
14688	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14689
14690	if (dump_enabled_p ())
14691	{
14692	if (group_size)
14693	dump_printf_loc (MSG_NOTE, vect_location,
14694	"get vectype for scalar type (group size %d):"
14695	" %T\n", group_size, scalar_type);
14696	else
14697	dump_printf_loc (MSG_NOTE, vect_location,
14698	"get vectype for scalar type: %T\n", scalar_type);
14699	}
14700	vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14701	if (!vectype)
14702	return opt_result::failure_at (loc: stmt,
14703	fmt: "not vectorized:"
14704	" unsupported data-type %T\n",
14705	scalar_type);
14706
14707	if (dump_enabled_p ())
14708	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14709	}
14710
14711	if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14712	return opt_result::failure_at (loc: stmt,
14713	fmt: "not vectorized: vector stmt in loop:%G",
14714	stmt);
14715
14716	*stmt_vectype_out = vectype;
14717
14718	/ Don't try to compute scalar types if the stmt produces a boolean*
14719	vector; use the existing vector type instead. /*
14720	tree nunits_vectype = vectype;
14721	if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14722	{
14723	/ The number of units is set according to the smallest scalar*
14724	type (or the largest vector size, but we only support one
14725	vector size per vectorization). /*
14726	scalar_type = vect_get_smallest_scalar_type (stmt_info,
14727	TREE_TYPE (vectype));
14728	if (scalar_type != TREE_TYPE (vectype))
14729	{
14730	if (dump_enabled_p ())
14731	dump_printf_loc (MSG_NOTE, vect_location,
14732	"get vectype for smallest scalar type: %T\n",
14733	scalar_type);
14734	nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14735	group_size);
14736	if (!nunits_vectype)
14737	return opt_result::failure_at
14738	(loc: stmt, fmt: "not vectorized: unsupported data-type %T\n",
14739	scalar_type);
14740	if (dump_enabled_p ())
14741	dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14742	nunits_vectype);
14743	}
14744	}
14745
14746	if (!multiple_p (a: TYPE_VECTOR_SUBPARTS (node: nunits_vectype),
14747	b: TYPE_VECTOR_SUBPARTS (node: *stmt_vectype_out)))
14748	return opt_result::failure_at (loc: stmt,
14749	fmt: "Not vectorized: Incompatible number "
14750	"of vector subparts between %T and %T\n",
14751	nunits_vectype, *stmt_vectype_out);
14752
14753	if (dump_enabled_p ())
14754	{
14755	dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14756	dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (node: nunits_vectype));
14757	dump_printf (MSG_NOTE, "\n");
14758	}
14759
14760	*nunits_vectype_out = nunits_vectype;
14761	return opt_result::success ();
14762	}
14763
14764	/ Generate and return statement sequence that sets vector length LEN that is:*
14765
14766	min_of_start_and_end = min (START_INDEX, END_INDEX);
14767	left_len = END_INDEX - min_of_start_and_end;
14768	rhs = min (left_len, LEN_LIMIT);
14769	LEN = rhs;
14770
14771	Note: the cost of the code generated by this function is modeled
14772	by vect_estimate_min_profitable_iters, so changes here may need
14773	corresponding changes there. /*
14774
14775	gimple_seq
14776	vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14777	{
14778	gimple_seq stmts = NULL;
14779	tree len_type = TREE_TYPE (len);
14780	gcc_assert (TREE_TYPE (start_index) == len_type);
14781
14782	tree min = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: start_index, ops: end_index);
14783	tree left_len = gimple_build (seq: &stmts, code: MINUS_EXPR, type: len_type, ops: end_index, ops: min);
14784	tree rhs = gimple_build (seq: &stmts, code: MIN_EXPR, type: len_type, ops: left_len, ops: len_limit);
14785	gimple* stmt = gimple_build_assign (len, rhs);
14786	gimple_seq_add_stmt (&stmts, stmt);
14787
14788	return stmts;
14789	}
14790
14791

source code of gcc/tree-vect-stmts.cc