tree-vect-data-refs.cc source code [gcc/tree-vect-data-refs.cc]

1	/ Data References Analysis and Manipulation Utilities for Vectorization.*
2	Copyright (C) 2003-2024 Free Software Foundation, Inc.
3	Contributed by Dorit Naishlos <dorit@il.ibm.com>
4	and Ira Rosen <irar@il.ibm.com>
5
6	This file is part of GCC.
7
8	GCC is free software; you can redistribute it and/or modify it under
9	the terms of the GNU General Public License as published by the Free
10	Software Foundation; either version 3, or (at your option) any later
11	version.
12
13	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14	WARRANTY; without even the implied warranty of MERCHANTABILITY or
15	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16	for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with GCC; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "coretypes.h"
25	#include "backend.h"
26	#include "target.h"
27	#include "rtl.h"
28	#include "tree.h"
29	#include "gimple.h"
30	#include "predict.h"
31	#include "memmodel.h"
32	#include "tm_p.h"
33	#include "ssa.h"
34	#include "optabs-tree.h"
35	#include "cgraph.h"
36	#include "dumpfile.h"
37	#include "alias.h"
38	#include "fold-const.h"
39	#include "stor-layout.h"
40	#include "tree-eh.h"
41	#include "gimplify.h"
42	#include "gimple-iterator.h"
43	#include "gimplify-me.h"
44	#include "tree-ssa-loop-ivopts.h"
45	#include "tree-ssa-loop-manip.h"
46	#include "tree-ssa-loop.h"
47	#include "cfgloop.h"
48	#include "tree-scalar-evolution.h"
49	#include "tree-vectorizer.h"
50	#include "expr.h"
51	#include "builtins.h"
52	#include "tree-cfg.h"
53	#include "tree-hash-traits.h"
54	#include "vec-perm-indices.h"
55	#include "internal-fn.h"
56	#include "gimple-fold.h"
57
58	/ Return true if load- or store-lanes optab OPTAB is implemented for*
59	COUNT vectors of type VECTYPE. NAME is the name of OPTAB. /*
60
61	static bool
62	vect_lanes_optab_supported_p (const char *name, convert_optab optab,
63	tree vectype, unsigned HOST_WIDE_INT count)
64	{
65	machine_mode mode, array_mode;
66	bool limit_p;
67
68	mode = TYPE_MODE (vectype);
69	if (!targetm.array_mode (mode, count).exists (mode: &array_mode))
70	{
71	poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
72	limit_p = !targetm.array_mode_supported_p (mode, count);
73	if (!int_mode_for_size (size: bits, limit: limit_p).exists (mode: &array_mode))
74	{
75	if (dump_enabled_p ())
76	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
77	"no array mode for %s[%wu]\n",
78	GET_MODE_NAME (mode), count);
79	return false;
80	}
81	}
82
83	if (convert_optab_handler (op: optab, to_mode: array_mode, from_mode: mode) == CODE_FOR_nothing)
84	{
85	if (dump_enabled_p ())
86	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
87	"cannot use %s<%s><%s>\n", name,
88	GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
89	return false;
90	}
91
92	if (dump_enabled_p ())
93	dump_printf_loc (MSG_NOTE, vect_location,
94	"can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
95	GET_MODE_NAME (mode));
96
97	return true;
98	}
99
100	/ Helper function to identify a simd clone call. If this is a call to a*
101	function with simd clones then return the corresponding cgraph_node,
102	otherwise return NULL. /*
103
104	static cgraph_node*
105	simd_clone_call_p (gimple *stmt)
106	{
107	gcall call = dyn_cast <gcall > (p: stmt);
108	if (!call)
109	return NULL;
110
111	tree fndecl = NULL_TREE;
112	if (gimple_call_internal_p (gs: call, fn: IFN_MASK_CALL))
113	fndecl = TREE_OPERAND (gimple_call_arg (stmt, `0`), `0`);
114	else
115	fndecl = gimple_call_fndecl (gs: stmt);
116
117	if (fndecl == NULL_TREE)
118	return NULL;
119
120	cgraph_node *node = cgraph_node::get (decl: fndecl);
121	if (node && node->simd_clones != NULL)
122	return node;
123
124	return NULL;
125	}
126
127
128
129	/ Return the smallest scalar part of STMT_INFO.*
130	This is used to determine the vectype of the stmt. We generally set the
131	vectype according to the type of the result (lhs). For stmts whose
132	result-type is different than the type of the arguments (e.g., demotion,
133	promotion), vectype will be reset appropriately (later). Note that we have
134	to visit the smallest datatype in this function, because that determines the
135	VF. If the smallest datatype in the loop is present only as the rhs of a
136	promotion operation - we'd miss it.
137	Such a case, where a variable of this datatype does not appear in the lhs
138	anywhere in the loop, can only occur if it's an invariant: e.g.:
139	'int_x = (int) short_inv', which we'd expect to have been optimized away by
140	invariant motion. However, we cannot rely on invariant motion to always
141	take invariants out of the loop, and so in the case of promotion we also
142	have to check the rhs.
143	LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
144	types. /*
145
146	tree
147	vect_get_smallest_scalar_type (stmt_vec_info stmt_info, tree scalar_type)
148	{
149	HOST_WIDE_INT lhs, rhs;
150
151	/ During the analysis phase, this function is called on arbitrary*
152	statements that might not have scalar results. /*
153	if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
154	return scalar_type;
155
156	lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
157
158	gassign assign = dyn_cast <gassign > (p: stmt_info->stmt);
159	if (assign)
160	{
161	scalar_type = TREE_TYPE (gimple_assign_lhs (assign));
162	if (gimple_assign_cast_p (s: assign)
163	\|\| gimple_assign_rhs_code (gs: assign) == DOT_PROD_EXPR
164	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_SUM_EXPR
165	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_MULT_EXPR
166	\|\| gimple_assign_rhs_code (gs: assign) == WIDEN_LSHIFT_EXPR
167	\|\| gimple_assign_rhs_code (gs: assign) == FLOAT_EXPR)
168	{
169	tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
170
171	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
172	if (rhs < lhs)
173	scalar_type = rhs_type;
174	}
175	}
176	else if (cgraph_node *node = simd_clone_call_p (stmt: stmt_info->stmt))
177	{
178	auto clone = node->simd_clones->simdclone;
179	for (unsigned int i = `0`; i < clone->nargs; ++i)
180	{
181	if (clone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
182	{
183	tree arg_scalar_type = TREE_TYPE (clone->args[i].vector_type);
184	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (arg_scalar_type));
185	if (rhs < lhs)
186	{
187	scalar_type = arg_scalar_type;
188	lhs = rhs;
189	}
190	}
191	}
192	}
193	else if (gcall call = dyn_cast <gcall > (p: stmt_info->stmt))
194	{
195	unsigned int i = `0`;
196	if (gimple_call_internal_p (gs: call))
197	{
198	internal_fn ifn = gimple_call_internal_fn (gs: call);
199	if (internal_load_fn_p (ifn))
200	/ For loads the LHS type does the trick. /
201	i = ~`0U`;
202	else if (internal_store_fn_p (ifn))
203	{
204	/ For stores use the tyep of the stored value. /
205	i = internal_fn_stored_value_index (ifn);
206	scalar_type = TREE_TYPE (gimple_call_arg (call, i));
207	i = ~`0U`;
208	}
209	else if (internal_fn_mask_index (ifn) == `0`)
210	i = `1`;
211	}
212	if (i < gimple_call_num_args (gs: call))
213	{
214	tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
215	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
216	{
217	rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
218	if (rhs < lhs)
219	scalar_type = rhs_type;
220	}
221	}
222	}
223
224	return scalar_type;
225	}
226
227
228	/ Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be*
229	tested at run-time. Return TRUE if DDR was successfully inserted.
230	Return false if versioning is not supported. /*
231
232	static opt_result
233	vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
234	{
235	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236
237	if ((unsigned) param_vect_max_version_for_alias_checks == `0`)
238	return opt_result::failure_at (loc: vect_location,
239	fmt: "will not create alias checks, as"
240	" --param vect-max-version-for-alias-checks"
241	" == 0\n");
242
243	opt_result res
244	= runtime_alias_check_p (ddr, loop,
245	optimize_loop_nest_for_speed_p (loop));
246	if (!res)
247	return res;
248
249	LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (obj: ddr);
250	return opt_result::success ();
251	}
252
253	/ Record that loop LOOP_VINFO needs to check that VALUE is nonzero. /
254
255	static void
256	vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
257	{
258	const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
259	for (unsigned int i = `0`; i < checks.length(); ++i)
260	if (checks [i] == value)
261	return;
262
263	if (dump_enabled_p ())
264	dump_printf_loc (MSG_NOTE, vect_location,
265	"need run-time check that %T is nonzero\n",
266	value);
267	LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (obj: value);
268	}
269
270	/ Return true if we know that the order of vectorized DR_INFO_A and*
271	vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
272	DR_INFO_B. At least one of the accesses is a write. /*
273
274	static bool
275	vect_preserves_scalar_order_p (dr_vec_info dr_info_a, dr_vec_info dr_info_b)
276	{
277	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
278	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
279
280	/ Single statements are always kept in their original order. /
281	if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
282	&& !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
283	return true;
284
285	/ If there is a loop invariant read involved we might vectorize it in*
286	the prologue, breaking scalar oder with respect to the in-loop store. /*
287	if ((DR_IS_READ (dr_info_a->dr) && integer_zerop (DR_STEP (dr_info_a->dr)))
288	\|\| (DR_IS_READ (dr_info_b->dr) && integer_zerop (DR_STEP (dr_info_b->dr))))
289	return false;
290
291	/ STMT_A and STMT_B belong to overlapping groups. All loads are*
292	emitted at the position of the first scalar load.
293	Stores in a group are emitted at the position of the last scalar store.
294	Compute that position and check whether the resulting order matches
295	the current one. /*
296	stmt_vec_info il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
297	if (il_a)
298	{
299	if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
300	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
301	s = DR_GROUP_NEXT_ELEMENT (s))
302	il_a = get_later_stmt (stmt1_info: il_a, stmt2_info: s);
303	else / DR_IS_READ /
304	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
305	s = DR_GROUP_NEXT_ELEMENT (s))
306	if (get_later_stmt (stmt1_info: il_a, stmt2_info: s) == il_a)
307	il_a = s;
308	}
309	else
310	il_a = stmtinfo_a;
311	stmt_vec_info il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
312	if (il_b)
313	{
314	if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
315	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
316	s = DR_GROUP_NEXT_ELEMENT (s))
317	il_b = get_later_stmt (stmt1_info: il_b, stmt2_info: s);
318	else / DR_IS_READ /
319	for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
320	s = DR_GROUP_NEXT_ELEMENT (s))
321	if (get_later_stmt (stmt1_info: il_b, stmt2_info: s) == il_b)
322	il_b = s;
323	}
324	else
325	il_b = stmtinfo_b;
326	bool a_after_b = (get_later_stmt (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b) == stmtinfo_a);
327	return (get_later_stmt (stmt1_info: il_a, stmt2_info: il_b) == il_a) == a_after_b;
328	}
329
330	/ A subroutine of vect_analyze_data_ref_dependence. Handle*
331	DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
332	distances. These distances are conservatively correct but they don't
333	reflect a guaranteed dependence.
334
335	Return true if this function does all the work necessary to avoid
336	an alias or false if the caller should use the dependence distances
337	to limit the vectorization factor in the usual way. LOOP_DEPTH is
338	the depth of the loop described by LOOP_VINFO and the other arguments
339	are as for vect_analyze_data_ref_dependence. /*
340
341	static bool
342	vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
343	loop_vec_info loop_vinfo,
344	int loop_depth, unsigned int *max_vf)
345	{
346	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
347	for (lambda_vector &dist_v : DDR_DIST_VECTS (ddr))
348	{
349	int dist = dist_v[loop_depth];
350	if (dist != `0` && !(dist > `0` && DDR_REVERSED_P (ddr)))
351	{
352	/ If the user asserted safelen >= DIST consecutive iterations*
353	can be executed concurrently, assume independence.
354
355	??? An alternative would be to add the alias check even
356	in this case, and vectorize the fallback loop with the
357	maximum VF set to safelen. However, if the user has
358	explicitly given a length, it's less likely that that
359	would be a win. /*
360	if (loop->safelen >= `2` && abs_hwi (x: dist) <= loop->safelen)
361	{
362	if ((unsigned int) loop->safelen < *max_vf)
363	*max_vf = loop->safelen;
364	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
365	continue;
366	}
367
368	/ For dependence distances of 2 or more, we have the option*
369	of limiting VF or checking for an alias at runtime.
370	Prefer to check at runtime if we can, to avoid limiting
371	the VF unnecessarily when the bases are in fact independent.
372
373	Note that the alias checks will be removed if the VF ends up
374	being small enough. /*
375	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
376	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
377	return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
378	&& !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
379	&& vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
380	}
381	}
382	return true;
383	}
384
385
386	/ Function vect_analyze_data_ref_dependence.*
387
388	FIXME: I needed to change the sense of the returned flag.
389
390	Return FALSE if there (might) exist a dependence between a memory-reference
391	DRA and a memory-reference DRB. When versioning for alias may check a
392	dependence at run-time, return TRUE. Adjust MAX_VF according to*
393	the data dependence. /*
394
395	static opt_result
396	vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
397	loop_vec_info loop_vinfo,
398	unsigned int *max_vf)
399	{
400	unsigned int i;
401	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
402	struct data_reference *dra = DDR_A (ddr);
403	struct data_reference *drb = DDR_B (ddr);
404	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
405	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
406	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
407	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
408	lambda_vector dist_v;
409	unsigned int loop_depth;
410
411	/ If user asserted safelen consecutive iterations can be*
412	executed concurrently, assume independence. /*
413	auto apply_safelen = [&]()
414	{
415	if (loop->safelen >= `2`)
416	{
417	if ((unsigned int) loop->safelen < *max_vf)
418	*max_vf = loop->safelen;
419	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
420	return true;
421	}
422	return false;
423	};
424
425	/ In loop analysis all data references should be vectorizable. /
426	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
427	\|\| !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
428	gcc_unreachable ();
429
430	/ Independent data accesses. /
431	if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
432	return opt_result::success ();
433
434	if (dra == drb
435	\|\| (DR_IS_READ (dra) && DR_IS_READ (drb)))
436	return opt_result::success ();
437
438	/ We do not have to consider dependences between accesses that belong*
439	to the same group, unless the stride could be smaller than the
440	group size. /*
441	if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
442	&& (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
443	== DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
444	&& !STMT_VINFO_STRIDED_P (stmtinfo_a))
445	return opt_result::success ();
446
447	/ Even if we have an anti-dependence then, as the vectorized loop covers at*
448	least two scalar iterations, there is always also a true dependence.
449	As the vectorizer does not re-order loads and stores we can ignore
450	the anti-dependence if TBAA can disambiguate both DRs similar to the
451	case with known negative distance anti-dependences (positive
452	distance anti-dependences would violate TBAA constraints). /*
453	if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
454	\|\| (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
455	&& !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
456	get_alias_set (DR_REF (drb))))
457	return opt_result::success ();
458
459	if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
460	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
461	{
462	if (apply_safelen ())
463	return opt_result::success ();
464
465	return opt_result::failure_at
466	(loc: stmtinfo_a->stmt,
467	fmt: "possible alias involving gather/scatter between %T and %T\n",
468	DR_REF (dra), DR_REF (drb));
469	}
470
471	/ Unknown data dependence. /
472	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
473	{
474	if (apply_safelen ())
475	return opt_result::success ();
476
477	if (dump_enabled_p ())
478	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
479	"versioning for alias required: "
480	"can't determine dependence between %T and %T\n",
481	DR_REF (dra), DR_REF (drb));
482
483	/ Add to list of ddrs that need to be tested at run-time. /
484	return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
485	}
486
487	/ Known data dependence. /
488	if (DDR_NUM_DIST_VECTS (ddr) == `0`)
489	{
490	if (apply_safelen ())
491	return opt_result::success ();
492
493	if (dump_enabled_p ())
494	dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
495	"versioning for alias required: "
496	"bad dist vector for %T and %T\n",
497	DR_REF (dra), DR_REF (drb));
498	/ Add to list of ddrs that need to be tested at run-time. /
499	return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
500	}
501
502	loop_depth = index_in_loop_nest (var: loop->num, DDR_LOOP_NEST (ddr));
503
504	if (DDR_COULD_BE_INDEPENDENT_P (ddr)
505	&& vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
506	loop_depth, max_vf))
507	return opt_result::success ();
508
509	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
510	{
511	int dist = dist_v[loop_depth];
512
513	if (dump_enabled_p ())
514	dump_printf_loc (MSG_NOTE, vect_location,
515	"dependence distance = %d.\n", dist);
516
517	if (dist == `0`)
518	{
519	if (dump_enabled_p ())
520	dump_printf_loc (MSG_NOTE, vect_location,
521	"dependence distance == 0 between %T and %T\n",
522	DR_REF (dra), DR_REF (drb));
523
524	/ When we perform grouped accesses and perform implicit CSE*
525	by detecting equal accesses and doing disambiguation with
526	runtime alias tests like for
527	.. = a[i];
528	.. = a[i+1];
529	a[i] = ..;
530	a[i+1] = ..;
531	*p = ..;
532	.. = a[i];
533	.. = a[i+1];
534	where we will end up loading { a[i], a[i+1] } once, make
535	sure that inserting group loads before the first load and
536	stores after the last store will do the right thing.
537	Similar for groups like
538	a[i] = ...;
539	... = a[i];
540	a[i+1] = ...;
541	where loads from the group interleave with the store. /*
542	if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
543	return opt_result::failure_at (loc: stmtinfo_a->stmt,
544	fmt: "READ_WRITE dependence"
545	" in interleaving.\n");
546
547	if (loop->safelen < `2`)
548	{
549	tree indicator = dr_zero_step_indicator (dra);
550	if (!indicator \|\| integer_zerop (indicator))
551	return opt_result::failure_at (loc: stmtinfo_a->stmt,
552	fmt: "access also has a zero step\n");
553	else if (TREE_CODE (indicator) != INTEGER_CST)
554	vect_check_nonzero_value (loop_vinfo, value: indicator);
555	}
556	continue;
557	}
558
559	if (dist > `0` && DDR_REVERSED_P (ddr))
560	{
561	/ If DDR_REVERSED_P the order of the data-refs in DDR was*
562	reversed (to make distance vector positive), and the actual
563	distance is negative. /*
564	if (dump_enabled_p ())
565	dump_printf_loc (MSG_NOTE, vect_location,
566	"dependence distance negative.\n");
567	/ When doing outer loop vectorization, we need to check if there is*
568	a backward dependence at the inner loop level if the dependence
569	at the outer loop is reversed. See PR81740. /*
570	if (nested_in_vect_loop_p (loop, stmt_info: stmtinfo_a)
571	\|\| nested_in_vect_loop_p (loop, stmt_info: stmtinfo_b))
572	{
573	unsigned inner_depth = index_in_loop_nest (var: loop->inner->num,
574	DDR_LOOP_NEST (ddr));
575	if (dist_v[inner_depth] < `0`)
576	return opt_result::failure_at (loc: stmtinfo_a->stmt,
577	fmt: "not vectorized, dependence "
578	"between data-refs %T and %T\n",
579	DR_REF (dra), DR_REF (drb));
580	}
581	/ Record a negative dependence distance to later limit the*
582	amount of stmt copying / unrolling we can perform.
583	Only need to handle read-after-write dependence. /*
584	if (DR_IS_READ (drb)
585	&& (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == `0`
586	\|\| STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
587	STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
588	continue;
589	}
590
591	unsigned int abs_dist = abs (x: dist);
592	if (abs_dist >= `2` && abs_dist < *max_vf)
593	{
594	/ The dependence distance requires reduction of the maximal*
595	vectorization factor. /*
596	*max_vf = abs_dist;
597	if (dump_enabled_p ())
598	dump_printf_loc (MSG_NOTE, vect_location,
599	"adjusting maximal vectorization factor to %i\n",
600	*max_vf);
601	}
602
603	if (abs_dist >= *max_vf)
604	{
605	/ Dependence distance does not create dependence, as far as*
606	vectorization is concerned, in this case. /*
607	if (dump_enabled_p ())
608	dump_printf_loc (MSG_NOTE, vect_location,
609	"dependence distance >= VF.\n");
610	continue;
611	}
612
613	return opt_result::failure_at (loc: stmtinfo_a->stmt,
614	fmt: "not vectorized, possible dependence "
615	"between data-refs %T and %T\n",
616	DR_REF (dra), DR_REF (drb));
617	}
618
619	return opt_result::success ();
620	}
621
622	/ Function vect_analyze_early_break_dependences.*
623
624	Examine all the data references in the loop and make sure that if we have
625	multiple exits that we are able to safely move stores such that they become
626	safe for vectorization. The function also calculates the place where to move
627	the instructions to and computes what the new vUSE chain should be.
628
629	This works in tandem with the CFG that will be produced by
630	slpeel_tree_duplicate_loop_to_edge_cfg later on.
631
632	This function tries to validate whether an early break vectorization
633	is possible for the current instruction sequence. Returns True i
634	possible, otherwise False.
635
636	Requirements:
637	- Any memory access must be to a fixed size buffer.
638	- There must not be any loads and stores to the same object.
639	- Multiple loads are allowed as long as they don't alias.
640
641	NOTE:
642	This implementation is very conservative. Any overlapping loads/stores
643	that take place before the early break statement gets rejected aside from
644	WAR dependencies.
645
646	i.e.:
647
648	a[i] = 8
649	c = a[i]
650	if (b[i])
651	...
652
653	is not allowed, but
654
655	c = a[i]
656	a[i] = 8
657	if (b[i])
658	...
659
660	is which is the common case. /*
661
662	static opt_result
663	vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
664	{
665	DUMP_VECT_SCOPE ("vect_analyze_early_break_dependences");
666
667	/ List of all load data references found during traversal. /
668	auto_vec<data_reference *> bases;
669	basic_block dest_bb = NULL;
670
671	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
672	class loop *loop_nest = loop_outer (loop);
673
674	if (dump_enabled_p ())
675	dump_printf_loc (MSG_NOTE, vect_location,
676	"loop contains multiple exits, analyzing"
677	" statement dependencies.\n");
678
679	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
680	if (dump_enabled_p ())
681	dump_printf_loc (MSG_NOTE, vect_location,
682	"alternate exit has been chosen as main exit.\n");
683
684	/ Since we don't support general control flow, the location we'll move the*
685	side-effects to is always the latch connected exit. When we support
686	general control flow we can do better but for now this is fine. Move
687	side-effects to the in-loop destination of the last early exit. For the
688	PEELED case we move the side-effects to the latch block as this is
689	guaranteed to be the last block to be executed when a vector iteration
690	finished. /*
691	if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
692	dest_bb = loop->latch;
693	else
694	dest_bb = single_pred (bb: loop->latch);
695
696	/ We start looking from dest_bb, for the non-PEELED case we don't want to*
697	move any stores already present, but we do want to read and validate the
698	loads. /*
699	basic_block bb = dest_bb;
700
701	/ We move stores across all loads to the beginning of dest_bb, so*
702	the first block processed below doesn't need dependence checking. /*
703	bool check_deps = false;
704
705	do
706	{
707	gimple_stmt_iterator gsi = gsi_last_bb (bb);
708
709	/ Now analyze all the remaining statements and try to determine which*
710	instructions are allowed/needed to be moved. /*
711	while (!gsi_end_p (i: gsi))
712	{
713	gimple *stmt = gsi_stmt (i: gsi);
714	gsi_prev (i: &gsi);
715	if (is_gimple_debug (gs: stmt))
716	continue;
717
718	stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
719	auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
720	if (!dr_ref)
721	continue;
722
723	/ We know everything below dest_bb is safe since we know we*
724	had a full vector iteration when reaching it. Either by
725	the loop entry / IV exit test being last or because this
726	is the loop latch itself. /*
727	if (!check_deps)
728	continue;
729
730	/ Check if vector accesses to the object will be within bounds.*
731	must be a constant or assume loop will be versioned or niters
732	bounded by VF so accesses are within range. We only need to check
733	the reads since writes are moved to a safe place where if we get
734	there we know they are safe to perform. /*
735	if (DR_IS_READ (dr_ref)
736	&& !ref_within_array_bound (stmt, DR_REF (dr_ref)))
737	{
738	if (dump_enabled_p ())
739	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
740	"early breaks not supported: vectorization "
741	"would %s beyond size of obj.\n",
742	DR_IS_READ (dr_ref) ? "read" : "write");
743	return opt_result::failure_at (loc: stmt,
744	fmt: "can't safely apply code motion to "
745	"dependencies of %G to vectorize "
746	"the early exit.\n", stmt);
747	}
748
749	if (DR_IS_READ (dr_ref))
750	bases.safe_push (obj: dr_ref);
751	else if (DR_IS_WRITE (dr_ref))
752	{
753	/ We are moving writes down in the CFG. To be sure that this*
754	is valid after vectorization we have to check all the loads
755	we are sinking the stores past to see if any of them may
756	alias or are the same object.
757
758	Same objects will not be an issue because unless the store
759	is marked volatile the value can be forwarded. If the
760	store is marked volatile we don't vectorize the loop
761	anyway.
762
763	That leaves the check for aliasing. We don't really need
764	to care about the stores aliasing with each other since the
765	stores are moved in order so the effects are still observed
766	correctly. This leaves the check for WAR dependencies
767	which we would be introducing here if the DR can alias.
768	The check is quadratic in loads/stores but I have not found
769	a better API to do this. I believe all loads and stores
770	must be checked. We also must check them when we
771	encountered the store, since we don't care about loads past
772	the store. /*
773
774	for (auto dr_read : bases)
775	if (dr_may_alias_p (dr_ref, dr_read, loop_nest))
776	{
777	if (dump_enabled_p ())
778	dump_printf_loc (MSG_MISSED_OPTIMIZATION,
779	vect_location,
780	"early breaks not supported: "
781	"overlapping loads and stores "
782	"found before the break "
783	"statement.\n");
784
785	return opt_result::failure_at (loc: stmt,
786	fmt: "can't safely apply code motion to dependencies"
787	" to vectorize the early exit. %G may alias with"
788	" %G\n", stmt, dr_read->stmt);
789	}
790	}
791
792	if (gimple_vdef (g: stmt))
793	{
794	if (dump_enabled_p ())
795	dump_printf_loc (MSG_NOTE, vect_location,
796	"==> recording stmt %G", stmt);
797
798	LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (obj: stmt);
799	}
800	else if (gimple_vuse (g: stmt))
801	{
802	LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).safe_insert (ix: `0`, obj: stmt);
803	if (dump_enabled_p ())
804	dump_printf_loc (MSG_NOTE, vect_location,
805	"marked statement for vUSE update: %G", stmt);
806	}
807	}
808
809	if (!single_pred_p (bb))
810	{
811	gcc_assert (bb == loop->header);
812	break;
813	}
814
815	/ If we possibly sink through a virtual PHI make sure to elide that. /
816	if (gphi *vphi = get_virtual_phi (bb))
817	LOOP_VINFO_EARLY_BRK_STORES (loop_vinfo).safe_push (obj: vphi);
818
819	/ All earlier blocks need dependence checking. /
820	check_deps = true;
821	bb = single_pred (bb);
822	}
823	while (`1`);
824
825	/ We don't allow outer -> inner loop transitions which should have been*
826	trapped already during loop form analysis. /*
827	gcc_assert (dest_bb->loop_father == loop);
828
829	/ Check that the destination block we picked has only one pred. To relax this we*
830	have to take special care when moving the statements. We don't currently support
831	such control flow however this check is there to simplify how we handle
832	labels that may be present anywhere in the IL. This check is to ensure that the
833	labels aren't significant for the CFG. /*
834	if (!single_pred (bb: dest_bb))
835	return opt_result::failure_at (loc: vect_location,
836	fmt: "chosen loop exit block (BB %d) does not have a "
837	"single predecessor which is currently not "
838	"supported for early break vectorization.\n",
839	dest_bb->index);
840
841	LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
842
843	if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
844	{
845	/ All uses shall be updated to that of the first load. Entries are*
846	stored in reverse order. /*
847	tree vuse = gimple_vuse (LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).last ());
848	for (auto g : LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo))
849	{
850	if (dump_enabled_p ())
851	dump_printf_loc (MSG_NOTE, vect_location,
852	"will update use: %T, mem_ref: %G", vuse, g);
853	}
854	}
855
856	if (dump_enabled_p ())
857	dump_printf_loc (MSG_NOTE, vect_location,
858	"recorded statements to be moved to BB %d\n",
859	LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo)->index);
860
861	return opt_result::success ();
862	}
863
864	/ Function vect_analyze_data_ref_dependences.*
865
866	Examine all the data references in the loop, and make sure there do not
867	exist any data dependences between them. Set MAX_VF according to*
868	the maximum vectorization factor the data dependences allow. /*
869
870	opt_result
871	vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
872	unsigned int *max_vf)
873	{
874	unsigned int i;
875	struct data_dependence_relation *ddr;
876
877	DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
878
879	if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
880	{
881	LOOP_VINFO_DDRS (loop_vinfo)
882	.create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
883	* LOOP_VINFO_DATAREFS (loop_vinfo).length ());
884	/ We do not need read-read dependences. /
885	bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
886	&LOOP_VINFO_DDRS (loop_vinfo),
887	LOOP_VINFO_LOOP_NEST (loop_vinfo),
888	false);
889	gcc_assert (res);
890	}
891
892	LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
893
894	/ For epilogues we either have no aliases or alias versioning*
895	was applied to original loop. Therefore we may just get max_vf
896	using VF of original loop. /*
897	if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
898	*max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
899	else
900	FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
901	{
902	opt_result res
903	= vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
904	if (!res)
905	return res;
906	}
907
908	/ If we have early break statements in the loop, check to see if they*
909	are of a form we can vectorizer. /*
910	if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
911	return vect_analyze_early_break_dependences (loop_vinfo);
912
913	return opt_result::success ();
914	}
915
916
917	/ Function vect_slp_analyze_data_ref_dependence.*
918
919	Return TRUE if there (might) exist a dependence between a memory-reference
920	DRA and a memory-reference DRB for VINFO. When versioning for alias
921	may check a dependence at run-time, return FALSE. Adjust MAX_VF*
922	according to the data dependence. /*
923
924	static bool
925	vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
926	struct data_dependence_relation *ddr)
927	{
928	struct data_reference *dra = DDR_A (ddr);
929	struct data_reference *drb = DDR_B (ddr);
930	dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
931	dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
932
933	/ We need to check dependences of statements marked as unvectorizable*
934	as well, they still can prohibit vectorization. /*
935
936	/ Independent data accesses. /
937	if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
938	return false;
939
940	if (dra == drb)
941	return false;
942
943	/ Read-read is OK. /
944	if (DR_IS_READ (dra) && DR_IS_READ (drb))
945	return false;
946
947	/ If dra and drb are part of the same interleaving chain consider*
948	them independent. /*
949	if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
950	&& (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
951	== DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
952	return false;
953
954	/ Unknown data dependence. /
955	if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
956	{
957	if (dump_enabled_p ())
958	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
959	"can't determine dependence between %T and %T\n",
960	DR_REF (dra), DR_REF (drb));
961	}
962	else if (dump_enabled_p ())
963	dump_printf_loc (MSG_NOTE, vect_location,
964	"determined dependence between %T and %T\n",
965	DR_REF (dra), DR_REF (drb));
966
967	return true;
968	}
969
970
971	/ Analyze dependences involved in the transform of a store SLP NODE. /
972
973	static bool
974	vect_slp_analyze_store_dependences (vec_info *vinfo, slp_tree node)
975	{
976	/ This walks over all stmts involved in the SLP store done*
977	in NODE verifying we can sink them up to the last stmt in the
978	group. /*
979	stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
980	gcc_assert (DR_IS_WRITE (STMT_VINFO_DATA_REF (last_access_info)));
981
982	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
983	{
984	stmt_vec_info access_info
985	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
986	if (access_info == last_access_info)
987	continue;
988	data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
989	ao_ref ref;
990	bool ref_initialized_p = false;
991	for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
992	gsi_stmt (i: gsi) != last_access_info->stmt; gsi_next (i: &gsi))
993	{
994	gimple *stmt = gsi_stmt (i: gsi);
995	if (! gimple_vuse (g: stmt))
996	continue;
997
998	/ If we couldn't record a (single) data reference for this*
999	stmt we have to resort to the alias oracle. /*
1000	stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1001	data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1002	if (!dr_b)
1003	{
1004	/ We are moving a store - this means*
1005	we cannot use TBAA for disambiguation. /*
1006	if (!ref_initialized_p)
1007	ao_ref_init (&ref, DR_REF (dr_a));
1008	if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
1009	\|\| ref_maybe_used_by_stmt_p (stmt, &ref, false))
1010	return false;
1011	continue;
1012	}
1013
1014	gcc_assert (!gimple_visited_p (stmt));
1015
1016	ddr_p ddr = initialize_data_dependence_relation (dr_a,
1017	dr_b, vNULL);
1018	bool dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1019	free_dependence_relation (ddr);
1020	if (dependent)
1021	return false;
1022	}
1023	}
1024	return true;
1025	}
1026
1027	/ Analyze dependences involved in the transform of a load SLP NODE. STORES*
1028	contain the vector of scalar stores of this instance if we are
1029	disambiguating the loads. /*
1030
1031	static bool
1032	vect_slp_analyze_load_dependences (vec_info *vinfo, slp_tree node,
1033	vec<stmt_vec_info> stores,
1034	stmt_vec_info last_store_info)
1035	{
1036	/ This walks over all stmts involved in the SLP load done*
1037	in NODE verifying we can hoist them up to the first stmt in the
1038	group. /*
1039	stmt_vec_info first_access_info = vect_find_first_scalar_stmt_in_slp (node);
1040	gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (first_access_info)));
1041
1042	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (node).length (); ++k)
1043	{
1044	stmt_vec_info access_info
1045	= vect_orig_stmt (SLP_TREE_SCALAR_STMTS (node)[k]);
1046	if (access_info == first_access_info)
1047	continue;
1048	data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
1049	ao_ref ref;
1050	bool ref_initialized_p = false;
1051	hash_set<stmt_vec_info> grp_visited;
1052	for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
1053	gsi_stmt (i: gsi) != first_access_info->stmt; gsi_prev (i: &gsi))
1054	{
1055	gimple *stmt = gsi_stmt (i: gsi);
1056	if (! gimple_vdef (g: stmt))
1057	continue;
1058
1059	stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
1060
1061	/ If we run into a store of this same instance (we've just*
1062	marked those) then delay dependence checking until we run
1063	into the last store because this is where it will have
1064	been sunk to (and we verified that we can do that already). /*
1065	if (gimple_visited_p (stmt))
1066	{
1067	if (stmt_info != last_store_info)
1068	continue;
1069
1070	for (stmt_vec_info &store_info : stores)
1071	{
1072	data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
1073	ddr_p ddr = initialize_data_dependence_relation
1074	(dr_a, store_dr, vNULL);
1075	bool dependent
1076	= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1077	free_dependence_relation (ddr);
1078	if (dependent)
1079	return false;
1080	}
1081	continue;
1082	}
1083
1084	auto check_hoist = [&] (stmt_vec_info stmt_info) -> bool
1085	{
1086	/ We are hoisting a load - this means we can use TBAA for*
1087	disambiguation. /*
1088	if (!ref_initialized_p)
1089	ao_ref_init (&ref, DR_REF (dr_a));
1090	if (stmt_may_clobber_ref_p_1 (stmt_info->stmt, &ref, true))
1091	{
1092	/ If we couldn't record a (single) data reference for this*
1093	stmt we have to give up now. /*
1094	data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
1095	if (!dr_b)
1096	return false;
1097	ddr_p ddr = initialize_data_dependence_relation (dr_a,
1098	dr_b, vNULL);
1099	bool dependent
1100	= vect_slp_analyze_data_ref_dependence (vinfo, ddr);
1101	free_dependence_relation (ddr);
1102	if (dependent)
1103	return false;
1104	}
1105	/ No dependence. /
1106	return true;
1107	};
1108	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1109	{
1110	/ When we run into a store group we have to honor*
1111	that earlier stores might be moved here. We don't
1112	know exactly which and where to since we lack a
1113	back-mapping from DR to SLP node, so assume all
1114	earlier stores are sunk here. It's enough to
1115	consider the last stmt of a group for this.
1116	??? Both this and the fact that we disregard that
1117	the conflicting instance might be removed later
1118	is overly conservative. /*
1119	if (!grp_visited.add (DR_GROUP_FIRST_ELEMENT (stmt_info)))
1120	for (auto store_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1121	store_info != NULL;
1122	store_info = DR_GROUP_NEXT_ELEMENT (store_info))
1123	if ((store_info == stmt_info
1124	\|\| get_later_stmt (stmt1_info: store_info, stmt2_info: stmt_info) == stmt_info)
1125	&& !check_hoist (store_info))
1126	return false;
1127	}
1128	else
1129	{
1130	if (!check_hoist (stmt_info))
1131	return false;
1132	}
1133	}
1134	}
1135	return true;
1136	}
1137
1138
1139	/ Function vect_analyze_data_ref_dependences.*
1140
1141	Examine all the data references in the basic-block, and make sure there
1142	do not exist any data dependences between them. Set MAX_VF according to*
1143	the maximum vectorization factor the data dependences allow. /*
1144
1145	bool
1146	vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
1147	{
1148	DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
1149
1150	/ The stores of this instance are at the root of the SLP tree. /
1151	slp_tree store = NULL;
1152	if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store)
1153	store = SLP_INSTANCE_TREE (instance);
1154
1155	/ Verify we can sink stores to the vectorized stmt insert location. /
1156	stmt_vec_info last_store_info = NULL;
1157	if (store)
1158	{
1159	if (! vect_slp_analyze_store_dependences (vinfo, node: store))
1160	return false;
1161
1162	/ Mark stores in this instance and remember the last one. /
1163	last_store_info = vect_find_last_scalar_stmt_in_slp (store);
1164	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1165	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, visited_p: true);
1166	}
1167
1168	bool res = true;
1169
1170	/ Verify we can sink loads to the vectorized stmt insert location,*
1171	special-casing stores of this instance. /*
1172	for (slp_tree &load : SLP_INSTANCE_LOADS (instance))
1173	if (! vect_slp_analyze_load_dependences (vinfo, node: load,
1174	stores: store
1175	? SLP_TREE_SCALAR_STMTS (store)
1176	: vNULL, last_store_info))
1177	{
1178	res = false;
1179	break;
1180	}
1181
1182	/ Unset the visited flag. /
1183	if (store)
1184	for (unsigned k = `0`; k < SLP_TREE_SCALAR_STMTS (store).length (); ++k)
1185	gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, visited_p: false);
1186
1187	return res;
1188	}
1189
1190	/ Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET*
1191	applied. /*
1192
1193	int
1194	dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
1195	{
1196	HOST_WIDE_INT diff = `0`;
1197	/ Alignment is only analyzed for the first element of a DR group,*
1198	use that but adjust misalignment by the offset of the access. /*
1199	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
1200	{
1201	dr_vec_info *first_dr
1202	= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
1203	/ vect_analyze_data_ref_accesses guarantees that DR_INIT are*
1204	INTEGER_CSTs and the first element in the group has the lowest
1205	address. /*
1206	diff = (TREE_INT_CST_LOW (DR_INIT (dr_info->dr))
1207	- TREE_INT_CST_LOW (DR_INIT (first_dr->dr)));
1208	gcc_assert (diff >= `0`);
1209	dr_info = first_dr;
1210	}
1211
1212	int misalign = dr_info->misalignment;
1213	gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
1214	if (misalign == DR_MISALIGNMENT_UNKNOWN)
1215	return misalign;
1216
1217	/ If the access is only aligned for a vector type with smaller alignment*
1218	requirement the access has unknown misalignment. /*
1219	if (maybe_lt (a: dr_info->target_alignment * BITS_PER_UNIT,
1220	b: targetm.vectorize.preferred_vector_alignment (vectype)))
1221	return DR_MISALIGNMENT_UNKNOWN;
1222
1223	/ Apply the offset from the DR group start and the externally supplied*
1224	offset which can for example result from a negative stride access. /*
1225	poly_int64 misalignment = misalign + diff + offset;
1226
1227	/ vect_compute_data_ref_alignment will have ensured that target_alignment*
1228	is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN. /*
1229	unsigned HOST_WIDE_INT target_alignment_c
1230	= dr_info->target_alignment.to_constant ();
1231	if (!known_misalignment (value: misalignment, align: target_alignment_c, misalign: &misalign))
1232	return DR_MISALIGNMENT_UNKNOWN;
1233	return misalign;
1234	}
1235
1236	/ Record the base alignment guarantee given by DRB, which occurs*
1237	in STMT_INFO. /*
1238
1239	static void
1240	vect_record_base_alignment (vec_info *vinfo, stmt_vec_info stmt_info,
1241	innermost_loop_behavior *drb)
1242	{
1243	bool existed;
1244	std::pair<stmt_vec_info, innermost_loop_behavior *> &entry
1245	= vinfo->base_alignments.get_or_insert (k: drb->base_address, existed: &existed);
1246	if (!existed \|\| entry.second->base_alignment < drb->base_alignment)
1247	{
1248	entry = std::make_pair (x&: stmt_info, y&: drb);
1249	if (dump_enabled_p ())
1250	dump_printf_loc (MSG_NOTE, vect_location,
1251	"recording new base alignment for %T\n"
1252	" alignment: %d\n"
1253	" misalignment: %d\n"
1254	" based on: %G",
1255	drb->base_address,
1256	drb->base_alignment,
1257	drb->base_misalignment,
1258	stmt_info->stmt);
1259	}
1260	}
1261
1262	/ If the region we're going to vectorize is reached, all unconditional*
1263	data references occur at least once. We can therefore pool the base
1264	alignment guarantees from each unconditional reference. Do this by
1265	going through all the data references in VINFO and checking whether
1266	the containing statement makes the reference unconditionally. If so,
1267	record the alignment of the base address in VINFO so that it can be
1268	used for all other references with the same base. /*
1269
1270	void
1271	vect_record_base_alignments (vec_info *vinfo)
1272	{
1273	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1274	class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1275	for (data_reference *dr : vinfo->shared->datarefs)
1276	{
1277	dr_vec_info *dr_info = vinfo->lookup_dr (dr);
1278	stmt_vec_info stmt_info = dr_info->stmt;
1279	if (!DR_IS_CONDITIONAL_IN_STMT (dr)
1280	&& STMT_VINFO_VECTORIZABLE (stmt_info)
1281	&& !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1282	{
1283	vect_record_base_alignment (vinfo, stmt_info, drb: &DR_INNERMOST (dr));
1284
1285	/ If DR is nested in the loop that is being vectorized, we can also*
1286	record the alignment of the base wrt the outer loop. /*
1287	if (loop && nested_in_vect_loop_p (loop, stmt_info))
1288	vect_record_base_alignment
1289	(vinfo, stmt_info, drb: &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1290	}
1291	}
1292	}
1293
1294	/ Function vect_compute_data_ref_alignment*
1295
1296	Compute the misalignment of the data reference DR_INFO when vectorizing
1297	with VECTYPE.
1298
1299	Output:
1300	1. initialized misalignment info for DR_INFO
1301
1302	FOR NOW: No analysis is actually performed. Misalignment is calculated
1303	only for trivial cases. TODO. /*
1304
1305	static void
1306	vect_compute_data_ref_alignment (vec_info vinfo, dr_vec_info dr_info,
1307	tree vectype)
1308	{
1309	stmt_vec_info stmt_info = dr_info->stmt;
1310	vec_base_alignments *base_alignments = &vinfo->base_alignments;
1311	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1312	class loop *loop = NULL;
1313	tree ref = DR_REF (dr_info->dr);
1314
1315	if (dump_enabled_p ())
1316	dump_printf_loc (MSG_NOTE, vect_location,
1317	"vect_compute_data_ref_alignment:\n");
1318
1319	if (loop_vinfo)
1320	loop = LOOP_VINFO_LOOP (loop_vinfo);
1321
1322	/ Initialize misalignment to unknown. /
1323	SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1324
1325	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1326	return;
1327
1328	innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
1329	bool step_preserves_misalignment_p;
1330
1331	poly_uint64 vector_alignment
1332	= exact_div (a: targetm.vectorize.preferred_vector_alignment (vectype),
1333	BITS_PER_UNIT);
1334	SET_DR_TARGET_ALIGNMENT (dr_info, vector_alignment);
1335
1336	/ If the main loop has peeled for alignment we have no way of knowing*
1337	whether the data accesses in the epilogues are aligned. We can't at
1338	compile time answer the question whether we have entered the main loop or
1339	not. Fixes PR 92351. /*
1340	if (loop_vinfo)
1341	{
1342	loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
1343	if (orig_loop_vinfo
1344	&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != `0`)
1345	return;
1346	}
1347
1348	unsigned HOST_WIDE_INT vect_align_c;
1349	if (!vector_alignment.is_constant (const_value: &vect_align_c))
1350	return;
1351
1352	/ No step for BB vectorization. /
1353	if (!loop)
1354	{
1355	gcc_assert (integer_zerop (drb->step));
1356	step_preserves_misalignment_p = true;
1357	}
1358
1359	/ In case the dataref is in an inner-loop of the loop that is being*
1360	vectorized (LOOP), we use the base and misalignment information
1361	relative to the outer-loop (LOOP). This is ok only if the misalignment
1362	stays the same throughout the execution of the inner-loop, which is why
1363	we have to check that the stride of the dataref in the inner-loop evenly
1364	divides by the vector alignment. /*
1365	else if (nested_in_vect_loop_p (loop, stmt_info))
1366	{
1367	step_preserves_misalignment_p
1368	= (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == `0`;
1369
1370	if (dump_enabled_p ())
1371	{
1372	if (step_preserves_misalignment_p)
1373	dump_printf_loc (MSG_NOTE, vect_location,
1374	"inner step divides the vector alignment.\n");
1375	else
1376	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1377	"inner step doesn't divide the vector"
1378	" alignment.\n");
1379	}
1380	}
1381
1382	/ Similarly we can only use base and misalignment information relative to*
1383	an innermost loop if the misalignment stays the same throughout the
1384	execution of the loop. As above, this is the case if the stride of
1385	the dataref evenly divides by the alignment. /*
1386	else
1387	{
1388	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1389	step_preserves_misalignment_p
1390	= multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, b: vect_align_c);
1391
1392	if (!step_preserves_misalignment_p && dump_enabled_p ())
1393	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1394	"step doesn't divide the vector alignment.\n");
1395	}
1396
1397	unsigned int base_alignment = drb->base_alignment;
1398	unsigned int base_misalignment = drb->base_misalignment;
1399
1400	/ Calculate the maximum of the pooled base address alignment and the*
1401	alignment that we can compute for DR itself. /*
1402	std::pair<stmt_vec_info, innermost_loop_behavior > entry
1403	= base_alignments->get (k: drb->base_address);
1404	if (entry
1405	&& base_alignment < (*entry).second->base_alignment
1406	&& (loop_vinfo
1407	\|\| (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: stmt_info->stmt),
1408	gimple_bb (g: entry->first->stmt))
1409	&& (gimple_bb (g: stmt_info->stmt) != gimple_bb (g: entry->first->stmt)
1410	\|\| (entry->first->dr_aux.group <= dr_info->group)))))
1411	{
1412	base_alignment = entry->second->base_alignment;
1413	base_misalignment = entry->second->base_misalignment;
1414	}
1415
1416	if (drb->offset_alignment < vect_align_c
1417	\|\| !step_preserves_misalignment_p
1418	/ We need to know whether the step wrt the vectorized loop is*
1419	negative when computing the starting misalignment below. /*
1420	\|\| TREE_CODE (drb->step) != INTEGER_CST)
1421	{
1422	if (dump_enabled_p ())
1423	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1424	"Unknown alignment for access: %T\n", ref);
1425	return;
1426	}
1427
1428	if (base_alignment < vect_align_c)
1429	{
1430	unsigned int max_alignment;
1431	tree base = get_base_for_alignment (drb->base_address, &max_alignment);
1432	if (max_alignment < vect_align_c
1433	\|\| !vect_can_force_dr_alignment_p (base,
1434	vect_align_c * BITS_PER_UNIT))
1435	{
1436	if (dump_enabled_p ())
1437	dump_printf_loc (MSG_NOTE, vect_location,
1438	"can't force alignment of ref: %T\n", ref);
1439	return;
1440	}
1441
1442	/ Force the alignment of the decl.*
1443	NOTE: This is the only change to the code we make during
1444	the analysis phase, before deciding to vectorize the loop. /*
1445	if (dump_enabled_p ())
1446	dump_printf_loc (MSG_NOTE, vect_location,
1447	"force alignment of %T\n", ref);
1448
1449	dr_info->base_decl = base;
1450	dr_info->base_misaligned = true;
1451	base_misalignment = `0`;
1452	}
1453	poly_int64 misalignment
1454	= base_misalignment + wi::to_poly_offset (t: drb->init).force_shwi ();
1455
1456	unsigned int const_misalignment;
1457	if (!known_misalignment (value: misalignment, align: vect_align_c, misalign: &const_misalignment))
1458	{
1459	if (dump_enabled_p ())
1460	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1461	"Non-constant misalignment for access: %T\n", ref);
1462	return;
1463	}
1464
1465	SET_DR_MISALIGNMENT (dr_info, const_misalignment);
1466
1467	if (dump_enabled_p ())
1468	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1469	"misalign = %d bytes of ref %T\n",
1470	const_misalignment, ref);
1471
1472	return;
1473	}
1474
1475	/ Return whether DR_INFO, which is related to DR_PEEL_INFO in*
1476	that it only differs in DR_INIT, is aligned if DR_PEEL_INFO
1477	is made aligned via peeling. /*
1478
1479	static bool
1480	vect_dr_aligned_if_related_peeled_dr_is (dr_vec_info *dr_info,
1481	dr_vec_info *dr_peel_info)
1482	{
1483	if (multiple_p (DR_TARGET_ALIGNMENT (dr_peel_info),
1484	DR_TARGET_ALIGNMENT (dr_info)))
1485	{
1486	poly_offset_int diff
1487	= (wi::to_poly_offset (DR_INIT (dr_peel_info->dr))
1488	- wi::to_poly_offset (DR_INIT (dr_info->dr)));
1489	if (known_eq (diff, `0`)
1490	\|\| multiple_p (a: diff, DR_TARGET_ALIGNMENT (dr_info)))
1491	return true;
1492	}
1493	return false;
1494	}
1495
1496	/ Return whether DR_INFO is aligned if DR_PEEL_INFO is made*
1497	aligned via peeling. /*
1498
1499	static bool
1500	vect_dr_aligned_if_peeled_dr_is (dr_vec_info *dr_info,
1501	dr_vec_info *dr_peel_info)
1502	{
1503	if (!operand_equal_p (DR_BASE_ADDRESS (dr_info->dr),
1504	DR_BASE_ADDRESS (dr_peel_info->dr), flags: `0`)
1505	\|\| !operand_equal_p (DR_OFFSET (dr_info->dr),
1506	DR_OFFSET (dr_peel_info->dr), flags: `0`)
1507	\|\| !operand_equal_p (DR_STEP (dr_info->dr),
1508	DR_STEP (dr_peel_info->dr), flags: `0`))
1509	return false;
1510
1511	return vect_dr_aligned_if_related_peeled_dr_is (dr_info, dr_peel_info);
1512	}
1513
1514	/ Compute the value for dr_info->misalign so that the access appears*
1515	aligned. This is used by peeling to compensate for dr_misalignment
1516	applying the offset for negative step. /*
1517
1518	int
1519	vect_dr_misalign_for_aligned_access (dr_vec_info *dr_info)
1520	{
1521	if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) >= `0`)
1522	return `0`;
1523
1524	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1525	poly_int64 misalignment
1526	= ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
1527	* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1528
1529	unsigned HOST_WIDE_INT target_alignment_c;
1530	int misalign;
1531	if (!dr_info->target_alignment.is_constant (const_value: &target_alignment_c)
1532	\|\| !known_misalignment (value: misalignment, align: target_alignment_c, misalign: &misalign))
1533	return DR_MISALIGNMENT_UNKNOWN;
1534	return misalign;
1535	}
1536
1537	/ Function vect_update_misalignment_for_peel.*
1538	Sets DR_INFO's misalignment
1539	- to 0 if it has the same alignment as DR_PEEL_INFO,
1540	- to the misalignment computed using NPEEL if DR_INFO's salignment is known,
1541	- to -1 (unknown) otherwise.
1542
1543	DR_INFO - the data reference whose misalignment is to be adjusted.
1544	DR_PEEL_INFO - the data reference whose misalignment is being made
1545	zero in the vector loop by the peel.
1546	NPEEL - the number of iterations in the peel loop if the misalignment
1547	of DR_PEEL_INFO is known at compile time. /*
1548
1549	static void
1550	vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1551	dr_vec_info dr_peel_info, int* npeel)
1552	{
1553	/ If dr_info is aligned of dr_peel_info is, then mark it so. /
1554	if (vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info))
1555	{
1556	SET_DR_MISALIGNMENT (dr_info,
1557	vect_dr_misalign_for_aligned_access (dr_peel_info));
1558	return;
1559	}
1560
1561	unsigned HOST_WIDE_INT alignment;
1562	if (DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment)
1563	&& known_alignment_for_access_p (dr_info,
1564	STMT_VINFO_VECTYPE (dr_info->stmt))
1565	&& known_alignment_for_access_p (dr_info: dr_peel_info,
1566	STMT_VINFO_VECTYPE (dr_peel_info->stmt)))
1567	{
1568	int misal = dr_info->misalignment;
1569	misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1570	misal &= alignment - `1`;
1571	set_dr_misalignment (dr_info, val: misal);
1572	return;
1573	}
1574
1575	if (dump_enabled_p ())
1576	dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1577	"to unknown (-1).\n");
1578	SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
1579	}
1580
1581	/ Return true if alignment is relevant for DR_INFO. /
1582
1583	static bool
1584	vect_relevant_for_alignment_p (dr_vec_info *dr_info)
1585	{
1586	stmt_vec_info stmt_info = dr_info->stmt;
1587
1588	if (!STMT_VINFO_RELEVANT_P (stmt_info))
1589	return false;
1590
1591	/ For interleaving, only the alignment of the first access matters. /
1592	if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1593	&& DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1594	return false;
1595
1596	/ Scatter-gather and invariant accesses continue to address individual*
1597	scalars, so vector-level alignment is irrelevant. /*
1598	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1599	\|\| integer_zerop (DR_STEP (dr_info->dr)))
1600	return false;
1601
1602	/ Strided accesses perform only component accesses, alignment is*
1603	irrelevant for them. /*
1604	if (STMT_VINFO_STRIDED_P (stmt_info)
1605	&& !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1606	return false;
1607
1608	return true;
1609	}
1610
1611	/ Given an memory reference EXP return whether its alignment is less*
1612	than its size. /*
1613
1614	static bool
1615	not_size_aligned (tree exp)
1616	{
1617	if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
1618	return true;
1619
1620	return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
1621	> get_object_alignment (exp));
1622	}
1623
1624	/ Function vector_alignment_reachable_p*
1625
1626	Return true if vector alignment for DR_INFO is reachable by peeling
1627	a few loop iterations. Return false otherwise. /*
1628
1629	static bool
1630	vector_alignment_reachable_p (dr_vec_info *dr_info)
1631	{
1632	stmt_vec_info stmt_info = dr_info->stmt;
1633	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1634
1635	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1636	{
1637	/ For interleaved access we peel only if number of iterations in*
1638	the prolog loop ({VF - misalignment}), is a multiple of the
1639	number of the interleaved accesses. /*
1640	int elem_size, mis_in_elements;
1641
1642	/ FORNOW: handle only known alignment. /
1643	if (!known_alignment_for_access_p (dr_info, vectype))
1644	return false;
1645
1646	poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (node: vectype);
1647	poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1648	elem_size = vector_element_size (vector_size, nelements);
1649	mis_in_elements = dr_misalignment (dr_info, vectype) / elem_size;
1650
1651	if (!multiple_p (a: nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
1652	return false;
1653	}
1654
1655	/ If misalignment is known at the compile time then allow peeling*
1656	only if natural alignment is reachable through peeling. /*
1657	if (known_alignment_for_access_p (dr_info, vectype)
1658	&& !aligned_access_p (dr_info, vectype))
1659	{
1660	HOST_WIDE_INT elmsize =
1661	int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1662	if (dump_enabled_p ())
1663	{
1664	dump_printf_loc (MSG_NOTE, vect_location,
1665	"data size = %wd. misalignment = %d.\n", elmsize,
1666	dr_misalignment (dr_info, vectype));
1667	}
1668	if (dr_misalignment (dr_info, vectype) % elmsize)
1669	{
1670	if (dump_enabled_p ())
1671	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1672	"data size does not divide the misalignment.\n");
1673	return false;
1674	}
1675	}
1676
1677	if (!known_alignment_for_access_p (dr_info, vectype))
1678	{
1679	tree type = TREE_TYPE (DR_REF (dr_info->dr));
1680	bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
1681	if (dump_enabled_p ())
1682	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683	"Unknown misalignment, %snaturally aligned\n",
1684	is_packed ? "not " : "");
1685	return targetm.vectorize.vector_alignment_reachable (type, is_packed);
1686	}
1687
1688	return true;
1689	}
1690
1691
1692	/ Calculate the cost of the memory access represented by DR_INFO. /
1693
1694	static void
1695	vect_get_data_access_cost (vec_info vinfo, dr_vec_info dr_info,
1696	dr_alignment_support alignment_support_scheme,
1697	int misalignment,
1698	unsigned int *inside_cost,
1699	unsigned int *outside_cost,
1700	stmt_vector_for_cost *body_cost_vec,
1701	stmt_vector_for_cost *prologue_cost_vec)
1702	{
1703	stmt_vec_info stmt_info = dr_info->stmt;
1704	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
1705	int ncopies;
1706
1707	if (PURE_SLP_STMT (stmt_info))
1708	ncopies = `1`;
1709	else
1710	ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
1711
1712	if (DR_IS_READ (dr_info->dr))
1713	vect_get_load_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1714	misalignment, true, inside_cost,
1715	outside_cost, prologue_cost_vec, body_cost_vec, false);
1716	else
1717	vect_get_store_cost (vinfo,stmt_info, ncopies, alignment_support_scheme,
1718	misalignment, inside_cost, body_cost_vec);
1719
1720	if (dump_enabled_p ())
1721	dump_printf_loc (MSG_NOTE, vect_location,
1722	"vect_get_data_access_cost: inside_cost = %d, "
1723	"outside_cost = %d.\n", inside_cost, outside_cost);
1724	}
1725
1726
1727	typedef struct _vect_peel_info
1728	{
1729	dr_vec_info *dr_info;
1730	int npeel;
1731	unsigned int count;
1732	} *vect_peel_info;
1733
1734	typedef struct _vect_peel_extended_info
1735	{
1736	vec_info *vinfo;
1737	struct _vect_peel_info peel_info;
1738	unsigned int inside_cost;
1739	unsigned int outside_cost;
1740	} *vect_peel_extended_info;
1741
1742
1743	/ Peeling hashtable helpers. /
1744
1745	struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1746	{
1747	static inline hashval_t hash (const _vect_peel_info *);
1748	static inline bool equal (const _vect_peel_info , const* _vect_peel_info *);
1749	};
1750
1751	inline hashval_t
1752	peel_info_hasher::hash (const _vect_peel_info *peel_info)
1753	{
1754	return (hashval_t) peel_info->npeel;
1755	}
1756
1757	inline bool
1758	peel_info_hasher::equal (const _vect_peel_info a, const* _vect_peel_info *b)
1759	{
1760	return (a->npeel == b->npeel);
1761	}
1762
1763
1764	/ Insert DR_INFO into peeling hash table with NPEEL as key. /
1765
1766	static void
1767	vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
1768	loop_vec_info loop_vinfo, dr_vec_info *dr_info,
1769	int npeel, bool supportable_if_not_aligned)
1770	{
1771	struct _vect_peel_info elem, *slot;
1772	_vect_peel_info **new_slot;
1773
1774	elem.npeel = npeel;
1775	slot = peeling_htab->find (value: &elem);
1776	if (slot)
1777	slot->count++;
1778	else
1779	{
1780	slot = XNEW (struct _vect_peel_info);
1781	slot->npeel = npeel;
1782	slot->dr_info = dr_info;
1783	slot->count = `1`;
1784	new_slot = peeling_htab->find_slot (value: slot, insert: INSERT);
1785	*new_slot = slot;
1786	}
1787
1788	/ If this DR is not supported with unknown misalignment then bias*
1789	this slot when the cost model is disabled. /*
1790	if (!supportable_if_not_aligned
1791	&& unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1792	slot->count += VECT_MAX_COST;
1793	}
1794
1795
1796	/ Traverse peeling hash table to find peeling option that aligns maximum*
1797	number of data accesses. /*
1798
1799	int
1800	vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1801	_vect_peel_extended_info *max)
1802	{
1803	vect_peel_info elem = *slot;
1804
1805	if (elem->count > max->peel_info.count
1806	\|\| (elem->count == max->peel_info.count
1807	&& max->peel_info.npeel > elem->npeel))
1808	{
1809	max->peel_info.npeel = elem->npeel;
1810	max->peel_info.count = elem->count;
1811	max->peel_info.dr_info = elem->dr_info;
1812	}
1813
1814	return `1`;
1815	}
1816
1817	/ Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking*
1818	data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
1819	npeel is computed at runtime but DR0_INFO's misalignment will be zero
1820	after peeling. /*
1821
1822	static void
1823	vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
1824	dr_vec_info *dr0_info,
1825	unsigned int *inside_cost,
1826	unsigned int *outside_cost,
1827	stmt_vector_for_cost *body_cost_vec,
1828	stmt_vector_for_cost *prologue_cost_vec,
1829	unsigned int npeel)
1830	{
1831	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1832
1833	bool dr0_alignment_known_p
1834	= (dr0_info
1835	&& known_alignment_for_access_p (dr_info: dr0_info,
1836	STMT_VINFO_VECTYPE (dr0_info->stmt)));
1837
1838	for (data_reference *dr : datarefs)
1839	{
1840	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1841	if (!vect_relevant_for_alignment_p (dr_info))
1842	continue;
1843
1844	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1845	dr_alignment_support alignment_support_scheme;
1846	int misalignment;
1847	unsigned HOST_WIDE_INT alignment;
1848
1849	bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1850	size_zero_node) < `0`;
1851	poly_int64 off = `0`;
1852	if (negative)
1853	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
1854	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1855
1856	if (npeel == `0`)
1857	misalignment = dr_misalignment (dr_info, vectype, offset: off);
1858	else if (dr_info == dr0_info
1859	\|\| vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info: dr0_info))
1860	misalignment = `0`;
1861	else if (!dr0_alignment_known_p
1862	\|\| !known_alignment_for_access_p (dr_info, vectype)
1863	\|\| !DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment))
1864	misalignment = DR_MISALIGNMENT_UNKNOWN;
1865	else
1866	{
1867	misalignment = dr_misalignment (dr_info, vectype, offset: off);
1868	misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
1869	misalignment &= alignment - `1`;
1870	}
1871	alignment_support_scheme
1872	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
1873	misalignment);
1874
1875	vect_get_data_access_cost (vinfo: loop_vinfo, dr_info,
1876	alignment_support_scheme, misalignment,
1877	inside_cost, outside_cost,
1878	body_cost_vec, prologue_cost_vec);
1879	}
1880	}
1881
1882	/ Traverse peeling hash table and calculate cost for each peeling option.*
1883	Find the one with the lowest cost. /*
1884
1885	int
1886	vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1887	_vect_peel_extended_info *min)
1888	{
1889	vect_peel_info elem = *slot;
1890	int dummy;
1891	unsigned int inside_cost = `0`, outside_cost = `0`;
1892	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: min->vinfo);
1893	stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1894	epilogue_cost_vec;
1895
1896	prologue_cost_vec.create (nelems: `2`);
1897	body_cost_vec.create (nelems: `2`);
1898	epilogue_cost_vec.create (nelems: `2`);
1899
1900	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info: elem->dr_info, inside_cost: &inside_cost,
1901	outside_cost: &outside_cost, body_cost_vec: &body_cost_vec,
1902	prologue_cost_vec: &prologue_cost_vec, npeel: elem->npeel);
1903
1904	body_cost_vec.release ();
1905
1906	outside_cost += vect_get_known_peeling_cost
1907	(loop_vinfo, elem->npeel, &dummy,
1908	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1909	&prologue_cost_vec, &epilogue_cost_vec);
1910
1911	/ Prologue and epilogue costs are added to the target model later.*
1912	These costs depend only on the scalar iteration cost, the
1913	number of peeling iterations finally chosen, and the number of
1914	misaligned statements. So discard the information found here. /*
1915	prologue_cost_vec.release ();
1916	epilogue_cost_vec.release ();
1917
1918	if (inside_cost < min->inside_cost
1919	\|\| (inside_cost == min->inside_cost
1920	&& outside_cost < min->outside_cost))
1921	{
1922	min->inside_cost = inside_cost;
1923	min->outside_cost = outside_cost;
1924	min->peel_info.dr_info = elem->dr_info;
1925	min->peel_info.npeel = elem->npeel;
1926	min->peel_info.count = elem->count;
1927	}
1928
1929	return `1`;
1930	}
1931
1932
1933	/ Choose best peeling option by traversing peeling hash table and either*
1934	choosing an option with the lowest cost (if cost model is enabled) or the
1935	option that aligns as many accesses as possible. /*
1936
1937	static struct _vect_peel_extended_info
1938	vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
1939	loop_vec_info loop_vinfo)
1940	{
1941	struct _vect_peel_extended_info res;
1942
1943	res.peel_info.dr_info = NULL;
1944	res.vinfo = loop_vinfo;
1945
1946	if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
1947	{
1948	res.inside_cost = INT_MAX;
1949	res.outside_cost = INT_MAX;
1950	peeling_htab->traverse <_vect_peel_extended_info *,
1951	vect_peeling_hash_get_lowest_cost> (argument: &res);
1952	}
1953	else
1954	{
1955	res.peel_info.count = `0`;
1956	peeling_htab->traverse <_vect_peel_extended_info *,
1957	vect_peeling_hash_get_most_frequent> (argument: &res);
1958	res.inside_cost = `0`;
1959	res.outside_cost = `0`;
1960	}
1961
1962	return res;
1963	}
1964
1965	/ Return true if the new peeling NPEEL is supported. /
1966
1967	static bool
1968	vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
1969	unsigned npeel)
1970	{
1971	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1972	enum dr_alignment_support supportable_dr_alignment;
1973
1974	bool dr0_alignment_known_p
1975	= known_alignment_for_access_p (dr_info: dr0_info,
1976	STMT_VINFO_VECTYPE (dr0_info->stmt));
1977
1978	/ Ensure that all data refs can be vectorized after the peel. /
1979	for (data_reference *dr : datarefs)
1980	{
1981	if (dr == dr0_info->dr)
1982	continue;
1983
1984	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1985	if (!vect_relevant_for_alignment_p (dr_info)
1986	\|\| vect_dr_aligned_if_peeled_dr_is (dr_info, dr_peel_info: dr0_info))
1987	continue;
1988
1989	tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
1990	int misalignment;
1991	unsigned HOST_WIDE_INT alignment;
1992	if (!dr0_alignment_known_p
1993	\|\| !known_alignment_for_access_p (dr_info, vectype)
1994	\|\| !DR_TARGET_ALIGNMENT (dr_info).is_constant (const_value: &alignment))
1995	misalignment = DR_MISALIGNMENT_UNKNOWN;
1996	else
1997	{
1998	misalignment = dr_misalignment (dr_info, vectype);
1999	misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
2000	misalignment &= alignment - `1`;
2001	}
2002	supportable_dr_alignment
2003	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2004	misalignment);
2005	if (supportable_dr_alignment == dr_unaligned_unsupported)
2006	return false;
2007	}
2008
2009	return true;
2010	}
2011
2012	/ Compare two data-references DRA and DRB to group them into chunks*
2013	with related alignment. /*
2014
2015	static int
2016	dr_align_group_sort_cmp (const void dra_, const* void *drb_)
2017	{
2018	data_reference_p dra = (data_reference_p )const_cast<void *>(dra_);
2019	data_reference_p drb = (data_reference_p )const_cast<void *>(drb_);
2020	int cmp;
2021
2022	/ Stabilize sort. /
2023	if (dra == drb)
2024	return `0`;
2025
2026	/ Ordering of DRs according to base. /
2027	cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2028	DR_BASE_ADDRESS (drb));
2029	if (cmp != `0`)
2030	return cmp;
2031
2032	/ And according to DR_OFFSET. /
2033	cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2034	if (cmp != `0`)
2035	return cmp;
2036
2037	/ And after step. /
2038	cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2039	if (cmp != `0`)
2040	return cmp;
2041
2042	/ Then sort after DR_INIT. In case of identical DRs sort after stmt UID. /
2043	cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
2044	if (cmp == `0`)
2045	return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -`1` : `1`;
2046	return cmp;
2047	}
2048
2049	/ Function vect_enhance_data_refs_alignment*
2050
2051	This pass will use loop versioning and loop peeling in order to enhance
2052	the alignment of data references in the loop.
2053
2054	FOR NOW: we assume that whatever versioning/peeling takes place, only the
2055	original loop is to be vectorized. Any other loops that are created by
2056	the transformations performed in this pass - are not supposed to be
2057	vectorized. This restriction will be relaxed.
2058
2059	This pass will require a cost model to guide it whether to apply peeling
2060	or versioning or a combination of the two. For example, the scheme that
2061	intel uses when given a loop with several memory accesses, is as follows:
2062	choose one memory access ('p') which alignment you want to force by doing
2063	peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2064	other accesses are not necessarily aligned, or (2) use loop versioning to
2065	generate one loop in which all accesses are aligned, and another loop in
2066	which only 'p' is necessarily aligned.
2067
2068	("Automatic Intra-Register Vectorization for the Intel Architecture",
2069	Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2070	Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2071
2072	Devising a cost model is the most critical aspect of this work. It will
2073	guide us on which access to peel for, whether to use loop versioning, how
2074	many versions to create, etc. The cost model will probably consist of
2075	generic considerations as well as target specific considerations (on
2076	powerpc for example, misaligned stores are more painful than misaligned
2077	loads).
2078
2079	Here are the general steps involved in alignment enhancements:
2080
2081	-- original loop, before alignment analysis:
2082	for (i=0; i<N; i++){
2083	x = q[i]; # DR_MISALIGNMENT(q) = unknown
2084	p[i] = y; # DR_MISALIGNMENT(p) = unknown
2085	}
2086
2087	-- After vect_compute_data_refs_alignment:
2088	for (i=0; i<N; i++){
2089	x = q[i]; # DR_MISALIGNMENT(q) = 3
2090	p[i] = y; # DR_MISALIGNMENT(p) = unknown
2091	}
2092
2093	-- Possibility 1: we do loop versioning:
2094	if (p is aligned) {
2095	for (i=0; i<N; i++){ # loop 1A
2096	x = q[i]; # DR_MISALIGNMENT(q) = 3
2097	p[i] = y; # DR_MISALIGNMENT(p) = 0
2098	}
2099	}
2100	else {
2101	for (i=0; i<N; i++){ # loop 1B
2102	x = q[i]; # DR_MISALIGNMENT(q) = 3
2103	p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2104	}
2105	}
2106
2107	-- Possibility 2: we do loop peeling:
2108	for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2109	x = q[i];
2110	p[i] = y;
2111	}
2112	for (i = 3; i < N; i++){ # loop 2A
2113	x = q[i]; # DR_MISALIGNMENT(q) = 0
2114	p[i] = y; # DR_MISALIGNMENT(p) = unknown
2115	}
2116
2117	-- Possibility 3: combination of loop peeling and versioning:
2118	for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2119	x = q[i];
2120	p[i] = y;
2121	}
2122	if (p is aligned) {
2123	for (i = 3; i<N; i++){ # loop 3A
2124	x = q[i]; # DR_MISALIGNMENT(q) = 0
2125	p[i] = y; # DR_MISALIGNMENT(p) = 0
2126	}
2127	}
2128	else {
2129	for (i = 3; i<N; i++){ # loop 3B
2130	x = q[i]; # DR_MISALIGNMENT(q) = 0
2131	p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2132	}
2133	}
2134
2135	These loops are later passed to loop_transform to be vectorized. The
2136	vectorizer will use the alignment information to guide the transformation
2137	(whether to generate regular loads/stores, or with special handling for
2138	misalignment). /*
2139
2140	opt_result
2141	vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2142	{
2143	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2144	dr_vec_info *first_store = NULL;
2145	dr_vec_info *dr0_info = NULL;
2146	struct data_reference *dr;
2147	unsigned int i;
2148	bool do_peeling = false;
2149	bool do_versioning = false;
2150	unsigned int npeel = `0`;
2151	bool one_misalignment_known = false;
2152	bool one_misalignment_unknown = false;
2153	bool one_dr_unsupportable = false;
2154	dr_vec_info *unsupportable_dr_info = NULL;
2155	unsigned int dr0_same_align_drs = `0`, first_store_same_align_drs = `0`;
2156	hash_table<peel_info_hasher> peeling_htab (`1`);
2157
2158	DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
2159
2160	/ Reset data so we can safely be called multiple times. /
2161	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (size: `0`);
2162	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = `0`;
2163
2164	if (LOOP_VINFO_DATAREFS (loop_vinfo).is_empty ())
2165	return opt_result::success ();
2166
2167	/ Sort the vector of datarefs so DRs that have the same or dependent*
2168	alignment are next to each other. /*
2169	auto_vec<data_reference_p> datarefs
2170	= LOOP_VINFO_DATAREFS (loop_vinfo).copy ();
2171	datarefs.qsort (dr_align_group_sort_cmp);
2172
2173	/ Compute the number of DRs that become aligned when we peel*
2174	a dataref so it becomes aligned. /*
2175	auto_vec<unsigned> n_same_align_refs (datarefs.length ());
2176	n_same_align_refs.quick_grow_cleared (len: datarefs.length ());
2177	unsigned i0;
2178	for (i0 = `0`; i0 < datarefs.length (); ++i0)
2179	if (DR_BASE_ADDRESS (datarefs[i0]))
2180	break;
2181	for (i = i0 + `1`; i <= datarefs.length (); ++i)
2182	{
2183	if (i == datarefs.length ()
2184	\|\| !operand_equal_p (DR_BASE_ADDRESS (datarefs[i0]),
2185	DR_BASE_ADDRESS (datarefs[i]), flags: `0`)
2186	\|\| !operand_equal_p (DR_OFFSET (datarefs[i0]),
2187	DR_OFFSET (datarefs[i]), flags: `0`)
2188	\|\| !operand_equal_p (DR_STEP (datarefs[i0]),
2189	DR_STEP (datarefs[i]), flags: `0`))
2190	{
2191	/ The subgroup [i0, i-1] now only differs in DR_INIT and*
2192	possibly DR_TARGET_ALIGNMENT. Still the whole subgroup
2193	will get known misalignment if we align one of the refs
2194	with the largest DR_TARGET_ALIGNMENT. /*
2195	for (unsigned j = i0; j < i; ++j)
2196	{
2197	dr_vec_info *dr_infoj = loop_vinfo->lookup_dr (datarefs [j]);
2198	for (unsigned k = i0; k < i; ++k)
2199	{
2200	if (k == j)
2201	continue;
2202	dr_vec_info *dr_infok = loop_vinfo->lookup_dr (datarefs [k]);
2203	if (vect_dr_aligned_if_related_peeled_dr_is (dr_info: dr_infok,
2204	dr_peel_info: dr_infoj))
2205	n_same_align_refs [j]++;
2206	}
2207	}
2208	i0 = i;
2209	}
2210	}
2211
2212	/ While cost model enhancements are expected in the future, the high level*
2213	view of the code at this time is as follows:
2214
2215	A) If there is a misaligned access then see if peeling to align
2216	this access can make all data references satisfy
2217	vect_supportable_dr_alignment. If so, update data structures
2218	as needed and return true.
2219
2220	B) If peeling wasn't possible and there is a data reference with an
2221	unknown misalignment that does not satisfy vect_supportable_dr_alignment
2222	then see if loop versioning checks can be used to make all data
2223	references satisfy vect_supportable_dr_alignment. If so, update
2224	data structures as needed and return true.
2225
2226	C) If neither peeling nor versioning were successful then return false if
2227	any data reference does not satisfy vect_supportable_dr_alignment.
2228
2229	D) Return true (all data references satisfy vect_supportable_dr_alignment).
2230
2231	Note, Possibility 3 above (which is peeling and versioning together) is not
2232	being done at this time. /*
2233
2234	/ (1) Peeling to force alignment. /
2235
2236	/ (1.1) Decide whether to perform peeling, and how many iterations to peel:*
2237	Considerations:
2238	+ How many accesses will become aligned due to the peeling
2239	- How many accesses will become unaligned due to the peeling,
2240	and the cost of misaligned accesses.
2241	- The cost of peeling (the extra runtime checks, the increase
2242	in code size). /*
2243
2244	FOR_EACH_VEC_ELT (datarefs, i, dr)
2245	{
2246	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2247	if (!vect_relevant_for_alignment_p (dr_info))
2248	continue;
2249
2250	stmt_vec_info stmt_info = dr_info->stmt;
2251	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2252	do_peeling = vector_alignment_reachable_p (dr_info);
2253	if (do_peeling)
2254	{
2255	if (known_alignment_for_access_p (dr_info, vectype))
2256	{
2257	unsigned int npeel_tmp = `0`;
2258	bool negative = tree_int_cst_compare (DR_STEP (dr),
2259	size_zero_node) < `0`;
2260
2261	/ If known_alignment_for_access_p then we have set*
2262	DR_MISALIGNMENT which is only done if we know it at compiler
2263	time, so it is safe to assume target alignment is constant.
2264	*/
2265	unsigned int target_align =
2266	DR_TARGET_ALIGNMENT (dr_info).to_constant ();
2267	unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
2268	poly_int64 off = `0`;
2269	if (negative)
2270	off = (TYPE_VECTOR_SUBPARTS (node: vectype) - `1`) * -dr_size;
2271	unsigned int mis = dr_misalignment (dr_info, vectype, offset: off);
2272	mis = negative ? mis : -mis;
2273	if (mis != `0`)
2274	npeel_tmp = (mis & (target_align - `1`)) / dr_size;
2275
2276	/ For multiple types, it is possible that the bigger type access*
2277	will have more than one peeling option. E.g., a loop with two
2278	types: one of size (vector size / 4), and the other one of
2279	size (vector size / 8). Vectorization factor will 8. If both
2280	accesses are misaligned by 3, the first one needs one scalar
2281	iteration to be aligned, and the second one needs 5. But the
2282	first one will be aligned also by peeling 5 scalar
2283	iterations, and in that case both accesses will be aligned.
2284	Hence, except for the immediate peeling amount, we also want
2285	to try to add full vector size, while we don't exceed
2286	vectorization factor.
2287	We do this automatically for cost model, since we calculate
2288	cost for every peeling option. /*
2289	poly_uint64 nscalars = npeel_tmp;
2290	if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
2291	{
2292	poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2293	nscalars = (STMT_SLP_TYPE (stmt_info)
2294	? vf * DR_GROUP_SIZE (stmt_info) : vf);
2295	}
2296
2297	/ Save info about DR in the hash table. Also include peeling*
2298	amounts according to the explanation above. Indicate
2299	the alignment status when the ref is not aligned.
2300	??? Rather than using unknown alignment here we should
2301	prune all entries from the peeling hashtable which cause
2302	DRs to be not supported. /*
2303	bool supportable_if_not_aligned
2304	= vect_supportable_dr_alignment
2305	(loop_vinfo, dr_info, vectype, DR_MISALIGNMENT_UNKNOWN);
2306	while (known_le (npeel_tmp, nscalars))
2307	{
2308	vect_peeling_hash_insert (peeling_htab: &peeling_htab, loop_vinfo,
2309	dr_info, npeel: npeel_tmp,
2310	supportable_if_not_aligned);
2311	npeel_tmp += MAX (`1`, target_align / dr_size);
2312	}
2313
2314	one_misalignment_known = true;
2315	}
2316	else
2317	{
2318	/ If we don't know any misalignment values, we prefer*
2319	peeling for data-ref that has the maximum number of data-refs
2320	with the same alignment, unless the target prefers to align
2321	stores over load. /*
2322	unsigned same_align_drs = n_same_align_refs [i];
2323	if (!dr0_info
2324	\|\| dr0_same_align_drs < same_align_drs)
2325	{
2326	dr0_same_align_drs = same_align_drs;
2327	dr0_info = dr_info;
2328	}
2329	/ For data-refs with the same number of related*
2330	accesses prefer the one where the misalign
2331	computation will be invariant in the outermost loop. /*
2332	else if (dr0_same_align_drs == same_align_drs)
2333	{
2334	class loop ivloop0, ivloop;
2335	ivloop0 = outermost_invariant_loop_for_expr
2336	(loop, DR_BASE_ADDRESS (dr0_info->dr));
2337	ivloop = outermost_invariant_loop_for_expr
2338	(loop, DR_BASE_ADDRESS (dr));
2339	if ((ivloop && !ivloop0)
2340	\|\| (ivloop && ivloop0
2341	&& flow_loop_nested_p (ivloop, ivloop0)))
2342	dr0_info = dr_info;
2343	}
2344
2345	one_misalignment_unknown = true;
2346
2347	/ Check for data refs with unsupportable alignment that*
2348	can be peeled. /*
2349	enum dr_alignment_support supportable_dr_alignment
2350	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2351	DR_MISALIGNMENT_UNKNOWN);
2352	if (supportable_dr_alignment == dr_unaligned_unsupported)
2353	{
2354	one_dr_unsupportable = true;
2355	unsupportable_dr_info = dr_info;
2356	}
2357
2358	if (!first_store && DR_IS_WRITE (dr))
2359	{
2360	first_store = dr_info;
2361	first_store_same_align_drs = same_align_drs;
2362	}
2363	}
2364	}
2365	else
2366	{
2367	if (!aligned_access_p (dr_info, vectype))
2368	{
2369	if (dump_enabled_p ())
2370	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2371	"vector alignment may not be reachable\n");
2372	break;
2373	}
2374	}
2375	}
2376
2377	/ Check if we can possibly peel the loop. /
2378	if (!vect_can_advance_ivs_p (loop_vinfo)
2379	\|\| !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_IV_EXIT (loop_vinfo),
2380	loop_preheader_edge (loop))
2381	\|\| loop->inner
2382	\|\| LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
2383	do_peeling = false;
2384
2385	struct _vect_peel_extended_info peel_for_known_alignment;
2386	struct _vect_peel_extended_info peel_for_unknown_alignment;
2387	struct _vect_peel_extended_info best_peel;
2388
2389	peel_for_unknown_alignment.inside_cost = INT_MAX;
2390	peel_for_unknown_alignment.outside_cost = INT_MAX;
2391	peel_for_unknown_alignment.peel_info.count = `0`;
2392
2393	if (do_peeling
2394	&& one_misalignment_unknown)
2395	{
2396	/ Check if the target requires to prefer stores over loads, i.e., if*
2397	misaligned stores are more expensive than misaligned loads (taking
2398	drs with same alignment into account). /*
2399	unsigned int load_inside_cost = `0`;
2400	unsigned int load_outside_cost = `0`;
2401	unsigned int store_inside_cost = `0`;
2402	unsigned int store_outside_cost = `0`;
2403	unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / `2`;
2404
2405	stmt_vector_for_cost dummy;
2406	dummy.create (nelems: `2`);
2407	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
2408	inside_cost: &load_inside_cost,
2409	outside_cost: &load_outside_cost,
2410	body_cost_vec: &dummy, prologue_cost_vec: &dummy, npeel: estimated_npeels);
2411	dummy.release ();
2412
2413	if (first_store)
2414	{
2415	dummy.create (nelems: `2`);
2416	vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info: first_store,
2417	inside_cost: &store_inside_cost,
2418	outside_cost: &store_outside_cost,
2419	body_cost_vec: &dummy, prologue_cost_vec: &dummy,
2420	npeel: estimated_npeels);
2421	dummy.release ();
2422	}
2423	else
2424	{
2425	store_inside_cost = INT_MAX;
2426	store_outside_cost = INT_MAX;
2427	}
2428
2429	if (load_inside_cost > store_inside_cost
2430	\|\| (load_inside_cost == store_inside_cost
2431	&& load_outside_cost > store_outside_cost))
2432	{
2433	dr0_info = first_store;
2434	dr0_same_align_drs = first_store_same_align_drs;
2435	peel_for_unknown_alignment.inside_cost = store_inside_cost;
2436	peel_for_unknown_alignment.outside_cost = store_outside_cost;
2437	}
2438	else
2439	{
2440	peel_for_unknown_alignment.inside_cost = load_inside_cost;
2441	peel_for_unknown_alignment.outside_cost = load_outside_cost;
2442	}
2443
2444	stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2445	prologue_cost_vec.create (nelems: `2`);
2446	epilogue_cost_vec.create (nelems: `2`);
2447
2448	int dummy2;
2449	peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
2450	(loop_vinfo, estimated_npeels, &dummy2,
2451	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2452	&prologue_cost_vec, &epilogue_cost_vec);
2453
2454	prologue_cost_vec.release ();
2455	epilogue_cost_vec.release ();
2456
2457	peel_for_unknown_alignment.peel_info.count = dr0_same_align_drs + `1`;
2458	}
2459
2460	peel_for_unknown_alignment.peel_info.npeel = `0`;
2461	peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
2462
2463	best_peel = peel_for_unknown_alignment;
2464
2465	peel_for_known_alignment.inside_cost = INT_MAX;
2466	peel_for_known_alignment.outside_cost = INT_MAX;
2467	peel_for_known_alignment.peel_info.count = `0`;
2468	peel_for_known_alignment.peel_info.dr_info = NULL;
2469
2470	if (do_peeling && one_misalignment_known)
2471	{
2472	/ Peeling is possible, but there is no data access that is not supported*
2473	unless aligned. So we try to choose the best possible peeling from
2474	the hash table. /*
2475	peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
2476	(peeling_htab: &peeling_htab, loop_vinfo);
2477	}
2478
2479	/ Compare costs of peeling for known and unknown alignment. /
2480	if (peel_for_known_alignment.peel_info.dr_info != NULL
2481	&& peel_for_unknown_alignment.inside_cost
2482	>= peel_for_known_alignment.inside_cost)
2483	{
2484	best_peel = peel_for_known_alignment;
2485
2486	/ If the best peeling for known alignment has NPEEL == 0, perform no*
2487	peeling at all except if there is an unsupportable dr that we can
2488	align. /*
2489	if (best_peel.peel_info.npeel == `0` && !one_dr_unsupportable)
2490	do_peeling = false;
2491	}
2492
2493	/ If there is an unsupportable data ref, prefer this over all choices so far*
2494	since we'd have to discard a chosen peeling except when it accidentally
2495	aligned the unsupportable data ref. /*
2496	if (one_dr_unsupportable)
2497	dr0_info = unsupportable_dr_info;
2498	else if (do_peeling)
2499	{
2500	/ Calculate the penalty for no peeling, i.e. leaving everything as-is.*
2501	TODO: Use nopeel_outside_cost or get rid of it? /*
2502	unsigned nopeel_inside_cost = `0`;
2503	unsigned nopeel_outside_cost = `0`;
2504
2505	stmt_vector_for_cost dummy;
2506	dummy.create (nelems: `2`);
2507	vect_get_peeling_costs_all_drs (loop_vinfo, NULL, inside_cost: &nopeel_inside_cost,
2508	outside_cost: &nopeel_outside_cost, body_cost_vec: &dummy, prologue_cost_vec: &dummy, npeel: `0`);
2509	dummy.release ();
2510
2511	/ Add epilogue costs. As we do not peel for alignment here, no prologue*
2512	costs will be recorded. /*
2513	stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2514	prologue_cost_vec.create (nelems: `2`);
2515	epilogue_cost_vec.create (nelems: `2`);
2516
2517	int dummy2;
2518	nopeel_outside_cost += vect_get_known_peeling_cost
2519	(loop_vinfo, `0`, &dummy2,
2520	&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2521	&prologue_cost_vec, &epilogue_cost_vec);
2522
2523	prologue_cost_vec.release ();
2524	epilogue_cost_vec.release ();
2525
2526	npeel = best_peel.peel_info.npeel;
2527	dr0_info = best_peel.peel_info.dr_info;
2528
2529	/ If no peeling is not more expensive than the best peeling we*
2530	have so far, don't perform any peeling. /*
2531	if (nopeel_inside_cost <= best_peel.inside_cost)
2532	do_peeling = false;
2533	}
2534
2535	if (do_peeling)
2536	{
2537	stmt_vec_info stmt_info = dr0_info->stmt;
2538	if (known_alignment_for_access_p (dr_info: dr0_info,
2539	STMT_VINFO_VECTYPE (stmt_info)))
2540	{
2541	bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
2542	size_zero_node) < `0`;
2543	if (!npeel)
2544	{
2545	/ Since it's known at compile time, compute the number of*
2546	iterations in the peeled loop (the peeling factor) for use in
2547	updating DR_MISALIGNMENT values. The peeling factor is the
2548	vectorization factor minus the misalignment as an element
2549	count. /*
2550	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2551	poly_int64 off = `0`;
2552	if (negative)
2553	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
2554	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2555	unsigned int mis
2556	= dr_misalignment (dr_info: dr0_info, vectype, offset: off);
2557	mis = negative ? mis : -mis;
2558	/ If known_alignment_for_access_p then we have set*
2559	DR_MISALIGNMENT which is only done if we know it at compiler
2560	time, so it is safe to assume target alignment is constant.
2561	*/
2562	unsigned int target_align =
2563	DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
2564	npeel = ((mis & (target_align - `1`))
2565	/ vect_get_scalar_dr_size (dr_info: dr0_info));
2566	}
2567
2568	/ For interleaved data access every iteration accesses all the*
2569	members of the group, therefore we divide the number of iterations
2570	by the group size. /*
2571	if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2572	npeel /= DR_GROUP_SIZE (stmt_info);
2573
2574	if (dump_enabled_p ())
2575	dump_printf_loc (MSG_NOTE, vect_location,
2576	"Try peeling by %d\n", npeel);
2577	}
2578
2579	/ Ensure that all datarefs can be vectorized after the peel. /
2580	if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
2581	do_peeling = false;
2582
2583	/ Check if all datarefs are supportable and log. /
2584	if (do_peeling
2585	&& npeel == `0`
2586	&& known_alignment_for_access_p (dr_info: dr0_info,
2587	STMT_VINFO_VECTYPE (stmt_info)))
2588	return opt_result::success ();
2589
2590	/ Cost model #1 - honor --param vect-max-peeling-for-alignment. /
2591	if (do_peeling)
2592	{
2593	unsigned max_allowed_peel
2594	= param_vect_max_peeling_for_alignment;
2595	if (loop_cost_model (loop) <= VECT_COST_MODEL_CHEAP)
2596	max_allowed_peel = `0`;
2597	if (max_allowed_peel != (unsigned)-`1`)
2598	{
2599	unsigned max_peel = npeel;
2600	if (max_peel == `0`)
2601	{
2602	poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2603	unsigned HOST_WIDE_INT target_align_c;
2604	if (target_align.is_constant (const_value: &target_align_c))
2605	max_peel =
2606	target_align_c / vect_get_scalar_dr_size (dr_info: dr0_info) - `1`;
2607	else
2608	{
2609	do_peeling = false;
2610	if (dump_enabled_p ())
2611	dump_printf_loc (MSG_NOTE, vect_location,
2612	"Disable peeling, max peels set and vector"
2613	" alignment unknown\n");
2614	}
2615	}
2616	if (max_peel > max_allowed_peel)
2617	{
2618	do_peeling = false;
2619	if (dump_enabled_p ())
2620	dump_printf_loc (MSG_NOTE, vect_location,
2621	"Disable peeling, max peels reached: %d\n", max_peel);
2622	}
2623	}
2624	}
2625
2626	/ Cost model #2 - if peeling may result in a remaining loop not*
2627	iterating enough to be vectorized then do not peel. Since this
2628	is a cost heuristic rather than a correctness decision, use the
2629	most likely runtime value for variable vectorization factors. /*
2630	if (do_peeling
2631	&& LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2632	{
2633	unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2634	unsigned int max_peel = npeel == `0` ? assumed_vf - `1` : npeel;
2635	if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2636	< assumed_vf + max_peel)
2637	do_peeling = false;
2638	}
2639
2640	if (do_peeling)
2641	{
2642	/ (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.*
2643	If the misalignment of DR_i is identical to that of dr0 then set
2644	DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2645	dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2646	by the peeling factor times the element size of DR_i (MOD the
2647	vectorization factor times the size). Otherwise, the
2648	misalignment of DR_i must be set to unknown. /*
2649	FOR_EACH_VEC_ELT (datarefs, i, dr)
2650	if (dr != dr0_info->dr)
2651	{
2652	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2653	if (!vect_relevant_for_alignment_p (dr_info))
2654	continue;
2655
2656	vect_update_misalignment_for_peel (dr_info, dr_peel_info: dr0_info, npeel);
2657	}
2658
2659	LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
2660	if (npeel)
2661	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
2662	else
2663	LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = -`1`;
2664	SET_DR_MISALIGNMENT (dr0_info,
2665	vect_dr_misalign_for_aligned_access (dr0_info));
2666	if (dump_enabled_p ())
2667	{
2668	dump_printf_loc (MSG_NOTE, vect_location,
2669	"Alignment of access forced using peeling.\n");
2670	dump_printf_loc (MSG_NOTE, vect_location,
2671	"Peeling for alignment will be applied.\n");
2672	}
2673
2674	/ The inside-loop cost will be accounted for in vectorizable_load*
2675	and vectorizable_store correctly with adjusted alignments.
2676	Drop the body_cst_vec on the floor here. /*
2677	return opt_result::success ();
2678	}
2679	}
2680
2681	/ (2) Versioning to force alignment. /
2682
2683	/ Try versioning if:*
2684	1) optimize loop for speed and the cost-model is not cheap
2685	2) there is at least one unsupported misaligned data ref with an unknown
2686	misalignment, and
2687	3) all misaligned data refs with a known misalignment are supported, and
2688	4) the number of runtime alignment checks is within reason. /*
2689
2690	do_versioning
2691	= (optimize_loop_nest_for_speed_p (loop)
2692	&& !loop->inner / FORNOW /
2693	&& loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
2694
2695	if (do_versioning)
2696	{
2697	FOR_EACH_VEC_ELT (datarefs, i, dr)
2698	{
2699	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2700	if (!vect_relevant_for_alignment_p (dr_info))
2701	continue;
2702
2703	stmt_vec_info stmt_info = dr_info->stmt;
2704	if (STMT_VINFO_STRIDED_P (stmt_info))
2705	{
2706	do_versioning = false;
2707	break;
2708	}
2709
2710	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2711	bool negative = tree_int_cst_compare (DR_STEP (dr),
2712	size_zero_node) < `0`;
2713	poly_int64 off = `0`;
2714	if (negative)
2715	off = ((TYPE_VECTOR_SUBPARTS (node: vectype) - `1`)
2716	* -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2717	int misalignment;
2718	if ((misalignment = dr_misalignment (dr_info, vectype, offset: off)) == `0`)
2719	continue;
2720
2721	enum dr_alignment_support supportable_dr_alignment
2722	= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
2723	misalignment);
2724	if (supportable_dr_alignment == dr_unaligned_unsupported)
2725	{
2726	if (misalignment != DR_MISALIGNMENT_UNKNOWN
2727	\|\| (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2728	>= (unsigned) param_vect_max_version_for_alignment_checks))
2729	{
2730	do_versioning = false;
2731	break;
2732	}
2733
2734	/ At present we don't support versioning for alignment*
2735	with variable VF, since there's no guarantee that the
2736	VF is a power of two. We could relax this if we added
2737	a way of enforcing a power-of-two size. /*
2738	unsigned HOST_WIDE_INT size;
2739	if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (const_value: &size))
2740	{
2741	do_versioning = false;
2742	break;
2743	}
2744
2745	/ Forcing alignment in the first iteration is no good if*
2746	we don't keep it across iterations. For now, just disable
2747	versioning in this case.
2748	?? We could actually unroll the loop to achieve the required
2749	overall step alignment, and forcing the alignment could be
2750	done by doing some iterations of the non-vectorized loop. /*
2751	if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2752	* DR_STEP_ALIGNMENT (dr),
2753	DR_TARGET_ALIGNMENT (dr_info)))
2754	{
2755	do_versioning = false;
2756	break;
2757	}
2758
2759	/ The rightmost bits of an aligned address must be zeros.*
2760	Construct the mask needed for this test. For example,
2761	GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2762	mask must be 15 = 0xf. /*
2763	int mask = size - `1`;
2764
2765	/ FORNOW: use the same mask to test all potentially unaligned*
2766	references in the loop. /*
2767	if (LOOP_VINFO_PTR_MASK (loop_vinfo)
2768	&& LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
2769	{
2770	do_versioning = false;
2771	break;
2772	}
2773
2774	LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2775	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (obj: stmt_info);
2776	}
2777	}
2778
2779	/ Versioning requires at least one misaligned data reference. /
2780	if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2781	do_versioning = false;
2782	else if (!do_versioning)
2783	LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (size: `0`);
2784	}
2785
2786	if (do_versioning)
2787	{
2788	const vec<stmt_vec_info> &may_misalign_stmts
2789	= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2790	stmt_vec_info stmt_info;
2791
2792	/ It can now be assumed that the data references in the statements*
2793	in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2794	of the loop being vectorized. /*
2795	FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
2796	{
2797	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2798	SET_DR_MISALIGNMENT (dr_info,
2799	vect_dr_misalign_for_aligned_access (dr_info));
2800	if (dump_enabled_p ())
2801	dump_printf_loc (MSG_NOTE, vect_location,
2802	"Alignment of access forced using versioning.\n");
2803	}
2804
2805	if (dump_enabled_p ())
2806	dump_printf_loc (MSG_NOTE, vect_location,
2807	"Versioning for alignment will be applied.\n");
2808
2809	/ Peeling and versioning can't be done together at this time. /
2810	gcc_assert (! (do_peeling && do_versioning));
2811
2812	return opt_result::success ();
2813	}
2814
2815	/ This point is reached if neither peeling nor versioning is being done. /
2816	gcc_assert (! (do_peeling \|\| do_versioning));
2817
2818	return opt_result::success ();
2819	}
2820
2821
2822	/ Function vect_analyze_data_refs_alignment*
2823
2824	Analyze the alignment of the data-references in the loop.
2825	Return FALSE if a data reference is found that cannot be vectorized. /*
2826
2827	opt_result
2828	vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2829	{
2830	DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
2831
2832	vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2833	struct data_reference *dr;
2834	unsigned int i;
2835
2836	vect_record_base_alignments (vinfo: loop_vinfo);
2837	FOR_EACH_VEC_ELT (datarefs, i, dr)
2838	{
2839	dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
2840	if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
2841	{
2842	if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt)
2843	&& DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt)
2844	continue;
2845	vect_compute_data_ref_alignment (vinfo: loop_vinfo, dr_info,
2846	STMT_VINFO_VECTYPE (dr_info->stmt));
2847	}
2848	}
2849
2850	return opt_result::success ();
2851	}
2852
2853
2854	/ Analyze alignment of DRs of stmts in NODE. /
2855
2856	static bool
2857	vect_slp_analyze_node_alignment (vec_info *vinfo, slp_tree node)
2858	{
2859	/ Alignment is maintained in the first element of the group. /
2860	stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[`0`];
2861	first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
2862	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2863	tree vectype = SLP_TREE_VECTYPE (node);
2864	poly_uint64 vector_alignment
2865	= exact_div (a: targetm.vectorize.preferred_vector_alignment (vectype),
2866	BITS_PER_UNIT);
2867	if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
2868	vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2869	/ Re-analyze alignment when we're facing a vectorization with a bigger*
2870	alignment requirement. /*
2871	else if (known_lt (dr_info->target_alignment, vector_alignment))
2872	{
2873	poly_uint64 old_target_alignment = dr_info->target_alignment;
2874	int old_misalignment = dr_info->misalignment;
2875	vect_compute_data_ref_alignment (vinfo, dr_info, SLP_TREE_VECTYPE (node));
2876	/ But keep knowledge about a smaller alignment. /
2877	if (old_misalignment != DR_MISALIGNMENT_UNKNOWN
2878	&& dr_info->misalignment == DR_MISALIGNMENT_UNKNOWN)
2879	{
2880	dr_info->target_alignment = old_target_alignment;
2881	dr_info->misalignment = old_misalignment;
2882	}
2883	}
2884	/ When we ever face unordered target alignments the first one wins in terms*
2885	of analyzing and the other will become unknown in dr_misalignment. /*
2886	return true;
2887	}
2888
2889	/ Function vect_slp_analyze_instance_alignment*
2890
2891	Analyze the alignment of the data-references in the SLP instance.
2892	Return FALSE if a data reference is found that cannot be vectorized. /*
2893
2894	bool
2895	vect_slp_analyze_instance_alignment (vec_info *vinfo,
2896	slp_instance instance)
2897	{
2898	DUMP_VECT_SCOPE ("vect_slp_analyze_instance_alignment");
2899
2900	slp_tree node;
2901	unsigned i;
2902	FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2903	if (! vect_slp_analyze_node_alignment (vinfo, node))
2904	return false;
2905
2906	if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
2907	&& ! vect_slp_analyze_node_alignment
2908	(vinfo, SLP_INSTANCE_TREE (instance)))
2909	return false;
2910
2911	return true;
2912	}
2913
2914
2915	/ Analyze groups of accesses: check that DR_INFO belongs to a group of*
2916	accesses of legal size, step, etc. Detect gaps, single element
2917	interleaving, and other special cases. Set grouped access info.
2918	Collect groups of strided stores for further use in SLP analysis.
2919	Worker for vect_analyze_group_access. /*
2920
2921	static bool
2922	vect_analyze_group_access_1 (vec_info vinfo, dr_vec_info dr_info)
2923	{
2924	data_reference *dr = dr_info->dr;
2925	tree step = DR_STEP (dr);
2926	tree scalar_type = TREE_TYPE (DR_REF (dr));
2927	HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2928	stmt_vec_info stmt_info = dr_info->stmt;
2929	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
2930	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
2931	HOST_WIDE_INT dr_step = -`1`;
2932	HOST_WIDE_INT groupsize, last_accessed_element = `1`;
2933	bool slp_impossible = false;
2934
2935	/ For interleaving, GROUPSIZE is STEP counted in elements, i.e., the*
2936	size of the interleaving group (including gaps). /*
2937	if (tree_fits_shwi_p (step))
2938	{
2939	dr_step = tree_to_shwi (step);
2940	/ Check that STEP is a multiple of type size. Otherwise there is*
2941	a non-element-sized gap at the end of the group which we
2942	cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
2943	??? As we can handle non-constant step fine here we should
2944	simply remove uses of DR_GROUP_GAP between the last and first
2945	element and instead rely on DR_STEP. DR_GROUP_SIZE then would
2946	simply not include that gap. /*
2947	if ((dr_step % type_size) != `0`)
2948	{
2949	if (dump_enabled_p ())
2950	dump_printf_loc (MSG_NOTE, vect_location,
2951	"Step %T is not a multiple of the element size"
2952	" for %T\n",
2953	step, DR_REF (dr));
2954	return false;
2955	}
2956	groupsize = absu_hwi (x: dr_step) / type_size;
2957	}
2958	else
2959	groupsize = `0`;
2960
2961	/ Not consecutive access is possible only if it is a part of interleaving. /
2962	if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
2963	{
2964	/ Check if it this DR is a part of interleaving, and is a single*
2965	element of the group that is accessed in the loop. /*
2966
2967	/ Gaps are supported only for loads. STEP must be a multiple of the type*
2968	size. /*
2969	if (DR_IS_READ (dr)
2970	&& (dr_step % type_size) == `0`
2971	&& groupsize > `0`
2972	/ This could be UINT_MAX but as we are generating code in a very*
2973	inefficient way we have to cap earlier.
2974	See PR91403 for example. /*
2975	&& groupsize <= `4096`)
2976	{
2977	DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
2978	DR_GROUP_SIZE (stmt_info) = groupsize;
2979	DR_GROUP_GAP (stmt_info) = groupsize - `1`;
2980	if (dump_enabled_p ())
2981	dump_printf_loc (MSG_NOTE, vect_location,
2982	"Detected single element interleaving %T"
2983	" step %T\n",
2984	DR_REF (dr), step);
2985
2986	return true;
2987	}
2988
2989	if (dump_enabled_p ())
2990	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2991	"not consecutive access %G", stmt_info->stmt);
2992
2993	if (bb_vinfo)
2994	{
2995	/ Mark the statement as unvectorizable. /
2996	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2997	return true;
2998	}
2999
3000	if (dump_enabled_p ())
3001	dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
3002	STMT_VINFO_STRIDED_P (stmt_info) = true;
3003	return true;
3004	}
3005
3006	if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3007	{
3008	/ First stmt in the interleaving chain. Check the chain. /
3009	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3010	struct data_reference *data_ref = dr;
3011	unsigned int count = `1`;
3012	tree prev_init = DR_INIT (data_ref);
3013	HOST_WIDE_INT diff, gaps = `0`;
3014
3015	/ By construction, all group members have INTEGER_CST DR_INITs. /
3016	while (next)
3017	{
3018	/ We never have the same DR multiple times. /
3019	gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
3020	DR_INIT (STMT_VINFO_DATA_REF (next))) != `0`);
3021
3022	data_ref = STMT_VINFO_DATA_REF (next);
3023
3024	/ All group members have the same STEP by construction. /
3025	gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, `0`));
3026
3027	/ Check that the distance between two accesses is equal to the type*
3028	size. Otherwise, we have gaps. /*
3029	diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
3030	- TREE_INT_CST_LOW (prev_init)) / type_size;
3031	if (diff < `1` \|\| diff > UINT_MAX)
3032	{
3033	/ For artificial testcases with array accesses with large*
3034	constant indices we can run into overflow issues which
3035	can end up fooling the groupsize constraint below so
3036	check the individual gaps (which are represented as
3037	unsigned int) as well. /*
3038	if (dump_enabled_p ())
3039	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3040	"interleaved access with gap larger "
3041	"than representable\n");
3042	return false;
3043	}
3044	if (diff != `1`)
3045	{
3046	/ FORNOW: SLP of accesses with gaps is not supported. /
3047	slp_impossible = true;
3048	if (DR_IS_WRITE (data_ref))
3049	{
3050	if (dump_enabled_p ())
3051	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3052	"interleaved store with gaps\n");
3053	return false;
3054	}
3055
3056	gaps += diff - `1`;
3057	}
3058
3059	last_accessed_element += diff;
3060
3061	/ Store the gap from the previous member of the group. If there is no*
3062	gap in the access, DR_GROUP_GAP is always 1. /*
3063	DR_GROUP_GAP (next) = diff;
3064
3065	prev_init = DR_INIT (data_ref);
3066	next = DR_GROUP_NEXT_ELEMENT (next);
3067	/ Count the number of data-refs in the chain. /
3068	count++;
3069	}
3070
3071	if (groupsize == `0`)
3072	groupsize = count + gaps;
3073
3074	/ This could be UINT_MAX but as we are generating code in a very*
3075	inefficient way we have to cap earlier. See PR78699 for example. /*
3076	if (groupsize > `4096`)
3077	{
3078	if (dump_enabled_p ())
3079	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080	"group is too large\n");
3081	return false;
3082	}
3083
3084	/ Check that the size of the interleaving is equal to count for stores,*
3085	i.e., that there are no gaps. /*
3086	if (groupsize != count
3087	&& !DR_IS_READ (dr))
3088	{
3089	groupsize = count;
3090	STMT_VINFO_STRIDED_P (stmt_info) = true;
3091	}
3092
3093	/ If there is a gap after the last load in the group it is the*
3094	difference between the groupsize and the last accessed
3095	element.
3096	When there is no gap, this difference should be 0. /*
3097	DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
3098
3099	DR_GROUP_SIZE (stmt_info) = groupsize;
3100	if (dump_enabled_p ())
3101	{
3102	dump_printf_loc (MSG_NOTE, vect_location,
3103	"Detected interleaving ");
3104	if (DR_IS_READ (dr))
3105	dump_printf (MSG_NOTE, "load ");
3106	else if (STMT_VINFO_STRIDED_P (stmt_info))
3107	dump_printf (MSG_NOTE, "strided store ");
3108	else
3109	dump_printf (MSG_NOTE, "store ");
3110	dump_printf (MSG_NOTE, "of size %u\n",
3111	(unsigned)groupsize);
3112	dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
3113	next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3114	while (next)
3115	{
3116	if (DR_GROUP_GAP (next) != `1`)
3117	dump_printf_loc (MSG_NOTE, vect_location,
3118	"\t<gap of %d elements>\n",
3119	DR_GROUP_GAP (next) - `1`);
3120	dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
3121	next = DR_GROUP_NEXT_ELEMENT (next);
3122	}
3123	if (DR_GROUP_GAP (stmt_info) != `0`)
3124	dump_printf_loc (MSG_NOTE, vect_location,
3125	"\t<gap of %d elements>\n",
3126	DR_GROUP_GAP (stmt_info));
3127	}
3128
3129	/ SLP: create an SLP data structure for every interleaving group of*
3130	stores for further analysis in vect_analyse_slp. /*
3131	if (DR_IS_WRITE (dr) && !slp_impossible)
3132	{
3133	if (loop_vinfo)
3134	LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (obj: stmt_info);
3135	if (bb_vinfo)
3136	BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (obj: stmt_info);
3137	}
3138	}
3139
3140	return true;
3141	}
3142
3143	/ Analyze groups of accesses: check that DR_INFO belongs to a group of*
3144	accesses of legal size, step, etc. Detect gaps, single element
3145	interleaving, and other special cases. Set grouped access info.
3146	Collect groups of strided stores for further use in SLP analysis. /*
3147
3148	static bool
3149	vect_analyze_group_access (vec_info vinfo, dr_vec_info dr_info)
3150	{
3151	if (!vect_analyze_group_access_1 (vinfo, dr_info))
3152	{
3153	/ Dissolve the group if present. /
3154	stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
3155	while (stmt_info)
3156	{
3157	stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
3158	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3159	DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
3160	stmt_info = next;
3161	}
3162	return false;
3163	}
3164	return true;
3165	}
3166
3167	/ Analyze the access pattern of the data-reference DR_INFO.*
3168	In case of non-consecutive accesses call vect_analyze_group_access() to
3169	analyze groups of accesses. /*
3170
3171	static bool
3172	vect_analyze_data_ref_access (vec_info vinfo, dr_vec_info dr_info)
3173	{
3174	data_reference *dr = dr_info->dr;
3175	tree step = DR_STEP (dr);
3176	tree scalar_type = TREE_TYPE (DR_REF (dr));
3177	stmt_vec_info stmt_info = dr_info->stmt;
3178	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
3179	class loop *loop = NULL;
3180
3181	if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
3182	return true;
3183
3184	if (loop_vinfo)
3185	loop = LOOP_VINFO_LOOP (loop_vinfo);
3186
3187	if (loop_vinfo && !step)
3188	{
3189	if (dump_enabled_p ())
3190	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3191	"bad data-ref access in loop\n");
3192	return false;
3193	}
3194
3195	/ Allow loads with zero step in inner-loop vectorization. /
3196	if (loop_vinfo && integer_zerop (step))
3197	{
3198	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3199	if (!nested_in_vect_loop_p (loop, stmt_info))
3200	return DR_IS_READ (dr);
3201	/ Allow references with zero step for outer loops marked*
3202	with pragma omp simd only - it guarantees absence of
3203	loop-carried dependencies between inner loop iterations. /*
3204	if (loop->safelen < `2`)
3205	{
3206	if (dump_enabled_p ())
3207	dump_printf_loc (MSG_NOTE, vect_location,
3208	"zero step in inner loop of nest\n");
3209	return false;
3210	}
3211	}
3212
3213	if (loop && nested_in_vect_loop_p (loop, stmt_info))
3214	{
3215	/ Interleaved accesses are not yet supported within outer-loop*
3216	vectorization for references in the inner-loop. /*
3217	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3218
3219	/ For the rest of the analysis we use the outer-loop step. /
3220	step = STMT_VINFO_DR_STEP (stmt_info);
3221	if (integer_zerop (step))
3222	{
3223	if (dump_enabled_p ())
3224	dump_printf_loc (MSG_NOTE, vect_location,
3225	"zero step in outer loop.\n");
3226	return DR_IS_READ (dr);
3227	}
3228	}
3229
3230	/ Consecutive? /
3231	if (TREE_CODE (step) == INTEGER_CST)
3232	{
3233	HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
3234	if (!tree_int_cst_compare (t1: step, TYPE_SIZE_UNIT (scalar_type))
3235	\|\| (dr_step < `0`
3236	&& !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
3237	{
3238	/ Mark that it is not interleaving. /
3239	DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
3240	return true;
3241	}
3242	}
3243
3244	if (loop && nested_in_vect_loop_p (loop, stmt_info))
3245	{
3246	if (dump_enabled_p ())
3247	dump_printf_loc (MSG_NOTE, vect_location,
3248	"grouped access in outer loop.\n");
3249	return false;
3250	}
3251
3252
3253	/ Assume this is a DR handled by non-constant strided load case. /
3254	if (TREE_CODE (step) != INTEGER_CST)
3255	return (STMT_VINFO_STRIDED_P (stmt_info)
3256	&& (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3257	\|\| vect_analyze_group_access (vinfo, dr_info)));
3258
3259	/ Not consecutive access - check if it's a part of interleaving group. /
3260	return vect_analyze_group_access (vinfo, dr_info);
3261	}
3262
3263	/ Compare two data-references DRA and DRB to group them into chunks*
3264	suitable for grouping. /*
3265
3266	static int
3267	dr_group_sort_cmp (const void dra_, const* void *drb_)
3268	{
3269	dr_vec_info dra_info = (dr_vec_info )const_cast*<void* *>(dra_);
3270	dr_vec_info drb_info = (dr_vec_info )const_cast*<void* *>(drb_);
3271	data_reference_p dra = dra_info->dr;
3272	data_reference_p drb = drb_info->dr;
3273	int cmp;
3274
3275	/ Stabilize sort. /
3276	if (dra == drb)
3277	return `0`;
3278
3279	/ Different group IDs lead never belong to the same group. /
3280	if (dra_info->group != drb_info->group)
3281	return dra_info->group < drb_info->group ? -`1` : `1`;
3282
3283	/ Ordering of DRs according to base. /
3284	cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3285	DR_BASE_ADDRESS (drb));
3286	if (cmp != `0`)
3287	return cmp;
3288
3289	/ And according to DR_OFFSET. /
3290	cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
3291	if (cmp != `0`)
3292	return cmp;
3293
3294	/ Put reads before writes. /
3295	if (DR_IS_READ (dra) != DR_IS_READ (drb))
3296	return DR_IS_READ (dra) ? -`1` : `1`;
3297
3298	/ Then sort after access size. /
3299	cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
3300	TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
3301	if (cmp != `0`)
3302	return cmp;
3303
3304	/ And after step. /
3305	cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
3306	if (cmp != `0`)
3307	return cmp;
3308
3309	/ Then sort after DR_INIT. In case of identical DRs sort after stmt UID. /
3310	cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
3311	if (cmp == `0`)
3312	return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -`1` : `1`;
3313	return cmp;
3314	}
3315
3316	/ If OP is the result of a conversion, return the unconverted value,*
3317	otherwise return null. /*
3318
3319	static tree
3320	strip_conversion (tree op)
3321	{
3322	if (TREE_CODE (op) != SSA_NAME)
3323	return NULL_TREE;
3324	gimple *stmt = SSA_NAME_DEF_STMT (op);
3325	if (!is_gimple_assign (gs: stmt)
3326	\|\| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
3327	return NULL_TREE;
3328	return gimple_assign_rhs1 (gs: stmt);
3329	}
3330
3331	/ Return true if vectorizable_* routines can handle statements STMT1_INFO*
3332	and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
3333	be grouped in SLP mode. /*
3334
3335	static bool
3336	can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
3337	bool allow_slp_p)
3338	{
3339	if (gimple_assign_single_p (gs: stmt1_info->stmt))
3340	return gimple_assign_single_p (gs: stmt2_info->stmt);
3341
3342	gcall call1 = dyn_cast <gcall > (p: stmt1_info->stmt);
3343	if (call1 && gimple_call_internal_p (gs: call1))
3344	{
3345	/ Check for two masked loads or two masked stores. /
3346	gcall call2 = dyn_cast <gcall > (p: stmt2_info->stmt);
3347	if (!call2 \|\| !gimple_call_internal_p (gs: call2))
3348	return false;
3349	internal_fn ifn = gimple_call_internal_fn (gs: call1);
3350	if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
3351	return false;
3352	if (ifn != gimple_call_internal_fn (gs: call2))
3353	return false;
3354
3355	/ Check that the masks are the same. Cope with casts of masks,*
3356	like those created by build_mask_conversion. /*
3357	tree mask1 = gimple_call_arg (gs: call1, index: `2`);
3358	tree mask2 = gimple_call_arg (gs: call2, index: `2`);
3359	if (!operand_equal_p (mask1, mask2, flags: `0`) && !allow_slp_p)
3360	{
3361	mask1 = strip_conversion (op: mask1);
3362	if (!mask1)
3363	return false;
3364	mask2 = strip_conversion (op: mask2);
3365	if (!mask2)
3366	return false;
3367	if (!operand_equal_p (mask1, mask2, flags: `0`))
3368	return false;
3369	}
3370	return true;
3371	}
3372
3373	return false;
3374	}
3375
3376	/ Function vect_analyze_data_ref_accesses.*
3377
3378	Analyze the access pattern of all the data references in the loop.
3379
3380	FORNOW: the only access pattern that is considered vectorizable is a
3381	simple step 1 (consecutive) access.
3382
3383	FORNOW: handle only arrays and pointer accesses. /*
3384
3385	opt_result
3386	vect_analyze_data_ref_accesses (vec_info *vinfo,
3387	vec<int> *dataref_groups)
3388	{
3389	unsigned int i;
3390	vec<data_reference_p> datarefs = vinfo->shared->datarefs;
3391
3392	DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
3393
3394	if (datarefs.is_empty ())
3395	return opt_result::success ();
3396
3397	/ Sort the array of datarefs to make building the interleaving chains*
3398	linear. Don't modify the original vector's order, it is needed for
3399	determining what dependencies are reversed. /*
3400	vec<dr_vec_info *> datarefs_copy;
3401	datarefs_copy.create (nelems: datarefs.length ());
3402	for (unsigned i = `0`; i < datarefs.length (); i++)
3403	{
3404	dr_vec_info *dr_info = vinfo->lookup_dr (datarefs [i]);
3405	/ If the caller computed DR grouping use that, otherwise group by*
3406	basic blocks. /*
3407	if (dataref_groups)
3408	dr_info->group = (*dataref_groups)[i];
3409	else
3410	dr_info->group = gimple_bb (DR_STMT (datarefs[i]))->index;
3411	datarefs_copy.quick_push (obj: dr_info);
3412	}
3413	datarefs_copy.qsort (dr_group_sort_cmp);
3414	hash_set<stmt_vec_info> to_fixup;
3415
3416	/ Build the interleaving chains. /
3417	for (i = `0`; i < datarefs_copy.length () - `1`;)
3418	{
3419	dr_vec_info *dr_info_a = datarefs_copy [i];
3420	data_reference_p dra = dr_info_a->dr;
3421	int dra_group_id = dr_info_a->group;
3422	stmt_vec_info stmtinfo_a = dr_info_a->stmt;
3423	stmt_vec_info lastinfo = NULL;
3424	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
3425	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
3426	{
3427	++i;
3428	continue;
3429	}
3430	for (i = i + `1`; i < datarefs_copy.length (); ++i)
3431	{
3432	dr_vec_info *dr_info_b = datarefs_copy [i];
3433	data_reference_p drb = dr_info_b->dr;
3434	int drb_group_id = dr_info_b->group;
3435	stmt_vec_info stmtinfo_b = dr_info_b->stmt;
3436	if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
3437	\|\| STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
3438	break;
3439
3440	/ ??? Imperfect sorting (non-compatible types, non-modulo*
3441	accesses, same accesses) can lead to a group to be artificially
3442	split here as we don't just skip over those. If it really
3443	matters we can push those to a worklist and re-iterate
3444	over them. The we can just skip ahead to the next DR here. /*
3445
3446	/ DRs in a different DR group should not be put into the same*
3447	interleaving group. /*
3448	if (dra_group_id != drb_group_id)
3449	break;
3450
3451	/ Check that the data-refs have same first location (except init)*
3452	and they are both either store or load (not load and store,
3453	not masked loads or stores). /*
3454	if (DR_IS_READ (dra) != DR_IS_READ (drb)
3455	\|\| data_ref_compare_tree (DR_BASE_ADDRESS (dra),
3456	DR_BASE_ADDRESS (drb)) != `0`
3457	\|\| data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != `0`
3458	\|\| !can_group_stmts_p (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b, allow_slp_p: true))
3459	break;
3460
3461	/ Check that the data-refs have the same constant size. /
3462	tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
3463	tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
3464	if (!tree_fits_uhwi_p (sza)
3465	\|\| !tree_fits_uhwi_p (szb)
3466	\|\| !tree_int_cst_equal (sza, szb))
3467	break;
3468
3469	/ Check that the data-refs have the same step. /
3470	if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != `0`)
3471	break;
3472
3473	/ Check the types are compatible.*
3474	??? We don't distinguish this during sorting. /*
3475	if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
3476	TREE_TYPE (DR_REF (drb))))
3477	break;
3478
3479	/ Check that the DR_INITs are compile-time constants. /
3480	if (!tree_fits_shwi_p (DR_INIT (dra))
3481	\|\| !tree_fits_shwi_p (DR_INIT (drb)))
3482	break;
3483
3484	/ Different .GOMP_SIMD_LANE calls still give the same lane,*
3485	just hold extra information. /*
3486	if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3487	&& STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3488	&& data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == `0`)
3489	break;
3490
3491	/ Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). /
3492	HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3493	HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
3494	HOST_WIDE_INT init_prev
3495	= TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-`1`]->dr));
3496	gcc_assert (init_a <= init_b
3497	&& init_a <= init_prev
3498	&& init_prev <= init_b);
3499
3500	/ Do not place the same access in the interleaving chain twice. /
3501	if (init_b == init_prev)
3502	{
3503	gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-`1`]->dr))
3504	< gimple_uid (DR_STMT (drb)));
3505	/ Simply link in duplicates and fix up the chain below. /
3506	}
3507	else
3508	{
3509	/ If init_b == init_a + the size of the type * k, we have an*
3510	interleaving, and DRA is accessed before DRB. /*
3511	unsigned HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3512	if (type_size_a == `0`
3513	\|\| (((unsigned HOST_WIDE_INT)init_b - init_a)
3514	% type_size_a != `0`))
3515	break;
3516
3517	/ If we have a store, the accesses are adjacent. This splits*
3518	groups into chunks we support (we don't support vectorization
3519	of stores with gaps). /*
3520	if (!DR_IS_READ (dra)
3521	&& (((unsigned HOST_WIDE_INT)init_b - init_prev)
3522	!= type_size_a))
3523	break;
3524
3525	/ If the step (if not zero or non-constant) is smaller than the*
3526	difference between data-refs' inits this splits groups into
3527	suitable sizes. /*
3528	if (tree_fits_shwi_p (DR_STEP (dra)))
3529	{
3530	unsigned HOST_WIDE_INT step
3531	= absu_hwi (x: tree_to_shwi (DR_STEP (dra)));
3532	if (step != `0`
3533	&& step <= ((unsigned HOST_WIDE_INT)init_b - init_a))
3534	break;
3535	}
3536	}
3537
3538	if (dump_enabled_p ())
3539	dump_printf_loc (MSG_NOTE, vect_location,
3540	DR_IS_READ (dra)
3541	? "Detected interleaving load %T and %T\n"
3542	: "Detected interleaving store %T and %T\n",
3543	DR_REF (dra), DR_REF (drb));
3544
3545	/ Link the found element into the group list. /
3546	if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3547	{
3548	DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
3549	lastinfo = stmtinfo_a;
3550	}
3551	DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3552	DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
3553	lastinfo = stmtinfo_b;
3554
3555	STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3556	= !can_group_stmts_p (stmt1_info: stmtinfo_a, stmt2_info: stmtinfo_b, allow_slp_p: false);
3557
3558	if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3559	dump_printf_loc (MSG_NOTE, vect_location,
3560	"Load suitable for SLP vectorization only.\n");
3561
3562	if (init_b == init_prev
3563	&& !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3564	&& dump_enabled_p ())
3565	dump_printf_loc (MSG_NOTE, vect_location,
3566	"Queuing group with duplicate access for fixup\n");
3567	}
3568	}
3569
3570	/ Fixup groups with duplicate entries by splitting it. /
3571	while (`1`)
3572	{
3573	hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3574	if (!(it != to_fixup.end ()))
3575	break;
3576	stmt_vec_info grp = *it;
3577	to_fixup.remove (k: grp);
3578
3579	/ Find the earliest duplicate group member. /
3580	unsigned first_duplicate = -`1u`;
3581	stmt_vec_info next, g = grp;
3582	while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3583	{
3584	if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3585	DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3586	&& gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3587	first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3588	g = next;
3589	}
3590	if (first_duplicate == -`1U`)
3591	continue;
3592
3593	/ Then move all stmts after the first duplicate to a new group.*
3594	Note this is a heuristic but one with the property that it*
3595	is fixed up completely. /*
3596	g = grp;
3597	stmt_vec_info newgroup = NULL, ng = grp;
3598	while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3599	{
3600	if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3601	{
3602	DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3603	if (!newgroup)
3604	newgroup = next;
3605	else
3606	DR_GROUP_NEXT_ELEMENT (ng) = next;
3607	ng = next;
3608	DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3609	}
3610	else
3611	g = DR_GROUP_NEXT_ELEMENT (g);
3612	}
3613	DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3614
3615	/ Fixup the new group which still may contain duplicates. /
3616	to_fixup.add (k: newgroup);
3617	}
3618
3619	dr_vec_info *dr_info;
3620	FOR_EACH_VEC_ELT (datarefs_copy, i, dr_info)
3621	{
3622	if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3623	&& !vect_analyze_data_ref_access (vinfo, dr_info))
3624	{
3625	if (dump_enabled_p ())
3626	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3627	"not vectorized: complicated access pattern.\n");
3628
3629	if (is_a <bb_vec_info> (p: vinfo))
3630	{
3631	/ Mark the statement as not vectorizable. /
3632	STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3633	continue;
3634	}
3635	else
3636	{
3637	datarefs_copy.release ();
3638	return opt_result::failure_at (loc: dr_info->stmt->stmt,
3639	fmt: "not vectorized:"
3640	" complicated access pattern.\n");
3641	}
3642	}
3643	}
3644
3645	datarefs_copy.release ();
3646	return opt_result::success ();
3647	}
3648
3649	/ Function vect_vfa_segment_size.*
3650
3651	Input:
3652	DR_INFO: The data reference.
3653	LENGTH_FACTOR: segment length to consider.
3654
3655	Return a value suitable for the dr_with_seg_len::seg_len field.
3656	This is the "distance travelled" by the pointer from the first
3657	iteration in the segment to the last. Note that it does not include
3658	the size of the access; in effect it only describes the first byte. /*
3659
3660	static tree
3661	vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
3662	{
3663	length_factor = size_binop (MINUS_EXPR,
3664	fold_convert (sizetype, length_factor),
3665	size_one_node);
3666	return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
3667	length_factor);
3668	}
3669
3670	/ Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),*
3671	gives the worst-case number of bytes covered by the segment. /*
3672
3673	static unsigned HOST_WIDE_INT
3674	vect_vfa_access_size (vec_info vinfo, dr_vec_info dr_info)
3675	{
3676	stmt_vec_info stmt_vinfo = dr_info->stmt;
3677	tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
3678	unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3679	unsigned HOST_WIDE_INT access_size = ref_size;
3680	if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
3681	{
3682	gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
3683	access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
3684	}
3685	tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3686	int misalignment;
3687	if (STMT_VINFO_VEC_STMTS (stmt_vinfo).exists ()
3688	&& ((misalignment = dr_misalignment (dr_info, vectype)), true)
3689	&& (vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment)
3690	== dr_explicit_realign_optimized))
3691	{
3692	/ We might access a full vector's worth. /
3693	access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
3694	}
3695	return access_size;
3696	}
3697
3698	/ Get the minimum alignment for all the scalar accesses that DR_INFO*
3699	describes. /*
3700
3701	static unsigned int
3702	vect_vfa_align (dr_vec_info *dr_info)
3703	{
3704	return dr_alignment (dr: dr_info->dr);
3705	}
3706
3707	/ Function vect_no_alias_p.*
3708
3709	Given data references A and B with equal base and offset, see whether
3710	the alias relation can be decided at compilation time. Return 1 if
3711	it can and the references alias, 0 if it can and the references do
3712	not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
3713	SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3714	of dr_with_seg_len::{seg_len,access_size} for A and B. /*
3715
3716	static int
3717	vect_compile_time_alias (dr_vec_info a, dr_vec_info b,
3718	tree segment_length_a, tree segment_length_b,
3719	unsigned HOST_WIDE_INT access_size_a,
3720	unsigned HOST_WIDE_INT access_size_b)
3721	{
3722	poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3723	poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
3724	poly_uint64 const_length_a;
3725	poly_uint64 const_length_b;
3726
3727	/ For negative step, we need to adjust address range by TYPE_SIZE_UNIT*
3728	bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3729	[a, a+12) /*
3730	if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < `0`)
3731	{
3732	const_length_a = (-wi::to_poly_wide (t: segment_length_a)).force_uhwi ();
3733	offset_a -= const_length_a;
3734	}
3735	else
3736	const_length_a = tree_to_poly_uint64 (segment_length_a);
3737	if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < `0`)
3738	{
3739	const_length_b = (-wi::to_poly_wide (t: segment_length_b)).force_uhwi ();
3740	offset_b -= const_length_b;
3741	}
3742	else
3743	const_length_b = tree_to_poly_uint64 (segment_length_b);
3744
3745	const_length_a += access_size_a;
3746	const_length_b += access_size_b;
3747
3748	if (ranges_known_overlap_p (pos1: offset_a, size1: const_length_a,
3749	pos2: offset_b, size2: const_length_b))
3750	return `1`;
3751
3752	if (!ranges_maybe_overlap_p (pos1: offset_a, size1: const_length_a,
3753	pos2: offset_b, size2: const_length_b))
3754	return `0`;
3755
3756	return -`1`;
3757	}
3758
3759	/ Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH*
3760	in DDR is >= VF. /*
3761
3762	static bool
3763	dependence_distance_ge_vf (data_dependence_relation *ddr,
3764	unsigned int loop_depth, poly_uint64 vf)
3765	{
3766	if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3767	\|\| DDR_NUM_DIST_VECTS (ddr) == `0`)
3768	return false;
3769
3770	/ If the dependence is exact, we should have limited the VF instead. /
3771	gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3772
3773	unsigned int i;
3774	lambda_vector dist_v;
3775	FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3776	{
3777	HOST_WIDE_INT dist = dist_v[loop_depth];
3778	if (dist != `0`
3779	&& !(dist > `0` && DDR_REVERSED_P (ddr))
3780	&& maybe_lt (a: (unsigned HOST_WIDE_INT) abs_hwi (x: dist), b: vf))
3781	return false;
3782	}
3783
3784	if (dump_enabled_p ())
3785	dump_printf_loc (MSG_NOTE, vect_location,
3786	"dependence distance between %T and %T is >= VF\n",
3787	DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
3788
3789	return true;
3790	}
3791
3792	/ Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. /
3793
3794	static void
3795	dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
3796	{
3797	dump_printf (dump_kind, "%s (%T) >= ",
3798	lower_bound.unsigned_p ? "unsigned" : "abs",
3799	lower_bound.expr);
3800	dump_dec (dump_kind, lower_bound.min_value);
3801	}
3802
3803	/ Record that the vectorized loop requires the vec_lower_bound described*
3804	by EXPR, UNSIGNED_P and MIN_VALUE. /*
3805
3806	static void
3807	vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3808	poly_uint64 min_value)
3809	{
3810	vec<vec_lower_bound> &lower_bounds
3811	= LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3812	for (unsigned int i = `0`; i < lower_bounds.length (); ++i)
3813	if (operand_equal_p (lower_bounds [i].expr, expr, flags: `0`))
3814	{
3815	unsigned_p &= lower_bounds [i].unsigned_p;
3816	min_value = upper_bound (a: lower_bounds [i].min_value, b: min_value);
3817	if (lower_bounds [i].unsigned_p != unsigned_p
3818	\|\| maybe_lt (a: lower_bounds [i].min_value, b: min_value))
3819	{
3820	lower_bounds [i].unsigned_p = unsigned_p;
3821	lower_bounds [i].min_value = min_value;
3822	if (dump_enabled_p ())
3823	{
3824	dump_printf_loc (MSG_NOTE, vect_location,
3825	"updating run-time check to ");
3826	dump_lower_bound (dump_kind: MSG_NOTE, lower_bound: lower_bounds [i]);
3827	dump_printf (MSG_NOTE, "\n");
3828	}
3829	}
3830	return;
3831	}
3832
3833	vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3834	if (dump_enabled_p ())
3835	{
3836	dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3837	dump_lower_bound (dump_kind: MSG_NOTE, lower_bound);
3838	dump_printf (MSG_NOTE, "\n");
3839	}
3840	LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (obj: lower_bound);
3841	}
3842
3843	/ Return true if it's unlikely that the step of the vectorized form of DR_INFO*
3844	will span fewer than GAP bytes. /*
3845
3846	static bool
3847	vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
3848	poly_int64 gap)
3849	{
3850	stmt_vec_info stmt_info = dr_info->stmt;
3851	HOST_WIDE_INT count
3852	= estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3853	if (DR_GROUP_FIRST_ELEMENT (stmt_info))
3854	count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
3855	return (estimated_poly_value (x: gap)
3856	<= count * vect_get_scalar_dr_size (dr_info));
3857	}
3858
3859	/ Return true if we know that there is no alias between DR_INFO_A and*
3860	DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
3861	When returning true, set LOWER_BOUND_OUT to this N. /
3862
3863	static bool
3864	vectorizable_with_step_bound_p (dr_vec_info dr_info_a, dr_vec_info dr_info_b,
3865	poly_uint64 *lower_bound_out)
3866	{
3867	/ Check that there is a constant gap of known sign between DR_A*
3868	and DR_B. /*
3869	data_reference *dr_a = dr_info_a->dr;
3870	data_reference *dr_b = dr_info_b->dr;
3871	poly_int64 init_a, init_b;
3872	if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), flags: `0`)
3873	\|\| !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), flags: `0`)
3874	\|\| !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), flags: `0`)
3875	\|\| !poly_int_tree_p (DR_INIT (dr_a), value: &init_a)
3876	\|\| !poly_int_tree_p (DR_INIT (dr_b), value: &init_b)
3877	\|\| !ordered_p (a: init_a, b: init_b))
3878	return false;
3879
3880	/ Sort DR_A and DR_B by the address they access. /
3881	if (maybe_lt (a: init_b, b: init_a))
3882	{
3883	std::swap (a&: init_a, b&: init_b);
3884	std::swap (a&: dr_info_a, b&: dr_info_b);
3885	std::swap (a&: dr_a, b&: dr_b);
3886	}
3887
3888	/ If the two accesses could be dependent within a scalar iteration,*
3889	make sure that we'd retain their order. /*
3890	if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
3891	&& !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
3892	return false;
3893
3894	/ There is no alias if abs (DR_STEP) is greater than or equal to*
3895	the bytes spanned by the combination of the two accesses. /*
3896	*lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info: dr_info_b) - init_a;
3897	return true;
3898	}
3899
3900	/ Function vect_prune_runtime_alias_test_list.*
3901
3902	Prune a list of ddrs to be tested at run-time by versioning for alias.
3903	Merge several alias checks into one if possible.
3904	Return FALSE if resulting list of ddrs is longer then allowed by
3905	PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. /*
3906
3907	opt_result
3908	vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3909	{
3910	typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3911	hash_set <tree_pair_hash> compared_objects;
3912
3913	const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3914	vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3915	= LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3916	const vec<vec_object_pair> &check_unequal_addrs
3917	= LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
3918	poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3919	tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3920
3921	ddr_p ddr;
3922	unsigned int i;
3923	tree length_factor;
3924
3925	DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
3926
3927	/ Step values are irrelevant for aliasing if the number of vector*
3928	iterations is equal to the number of scalar iterations (which can
3929	happen for fully-SLP loops). /*
3930	bool vf_one_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), `1U`);
3931
3932	if (!vf_one_p)
3933	{
3934	/ Convert the checks for nonzero steps into bound tests. /
3935	tree value;
3936	FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3937	vect_check_lower_bound (loop_vinfo, expr: value, unsigned_p: true, min_value: `1`);
3938	}
3939
3940	if (may_alias_ddrs.is_empty ())
3941	return opt_result::success ();
3942
3943	comp_alias_ddrs.create (nelems: may_alias_ddrs.length ());
3944
3945	unsigned int loop_depth
3946	= index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3947	LOOP_VINFO_LOOP_NEST (loop_vinfo));
3948
3949	/ First, we collect all data ref pairs for aliasing checks. /
3950	FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
3951	{
3952	poly_uint64 lower_bound;
3953	tree segment_length_a, segment_length_b;
3954	unsigned HOST_WIDE_INT access_size_a, access_size_b;
3955	unsigned int align_a, align_b;
3956
3957	/ Ignore the alias if the VF we chose ended up being no greater*
3958	than the dependence distance. /*
3959	if (dependence_distance_ge_vf (ddr, loop_depth, vf: vect_factor))
3960	continue;
3961
3962	if (DDR_OBJECT_A (ddr))
3963	{
3964	vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3965	if (!compared_objects.add (k: new_pair))
3966	{
3967	if (dump_enabled_p ())
3968	dump_printf_loc (MSG_NOTE, vect_location,
3969	"checking that %T and %T"
3970	" have different addresses\n",
3971	new_pair.first, new_pair.second);
3972	LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (obj: new_pair);
3973	}
3974	continue;
3975	}
3976
3977	dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
3978	stmt_vec_info stmt_info_a = dr_info_a->stmt;
3979
3980	dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
3981	stmt_vec_info stmt_info_b = dr_info_b->stmt;
3982
3983	bool preserves_scalar_order_p
3984	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
3985	bool ignore_step_p
3986	= (vf_one_p
3987	&& (preserves_scalar_order_p
3988	\|\| operand_equal_p (DR_STEP (dr_info_a->dr),
3989	DR_STEP (dr_info_b->dr))));
3990
3991	/ Skip the pair if inter-iteration dependencies are irrelevant*
3992	and intra-iteration dependencies are guaranteed to be honored. /*
3993	if (ignore_step_p
3994	&& (preserves_scalar_order_p
3995	\|\| vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3996	lower_bound_out: &lower_bound)))
3997	{
3998	if (dump_enabled_p ())
3999	dump_printf_loc (MSG_NOTE, vect_location,
4000	"no need for alias check between "
4001	"%T and %T when VF is 1\n",
4002	DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4003	continue;
4004	}
4005
4006	/ See whether we can handle the alias using a bounds check on*
4007	the step, and whether that's likely to be the best approach.
4008	(It might not be, for example, if the minimum step is much larger
4009	than the number of bytes handled by one vector iteration.) /*
4010	if (!ignore_step_p
4011	&& TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
4012	&& vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
4013	lower_bound_out: &lower_bound)
4014	&& (vect_small_gap_p (loop_vinfo, dr_info: dr_info_a, gap: lower_bound)
4015	\|\| vect_small_gap_p (loop_vinfo, dr_info: dr_info_b, gap: lower_bound)))
4016	{
4017	bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
4018	if (dump_enabled_p ())
4019	{
4020	dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
4021	"%T and %T when the step %T is outside ",
4022	DR_REF (dr_info_a->dr),
4023	DR_REF (dr_info_b->dr),
4024	DR_STEP (dr_info_a->dr));
4025	if (unsigned_p)
4026	dump_printf (MSG_NOTE, "[0");
4027	else
4028	{
4029	dump_printf (MSG_NOTE, "(");
4030	dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
4031	}
4032	dump_printf (MSG_NOTE, ", ");
4033	dump_dec (MSG_NOTE, lower_bound);
4034	dump_printf (MSG_NOTE, ")\n");
4035	}
4036	vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
4037	unsigned_p, min_value: lower_bound);
4038	continue;
4039	}
4040
4041	stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
4042	if (dr_group_first_a)
4043	{
4044	stmt_info_a = dr_group_first_a;
4045	dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
4046	}
4047
4048	stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
4049	if (dr_group_first_b)
4050	{
4051	stmt_info_b = dr_group_first_b;
4052	dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
4053	}
4054
4055	if (ignore_step_p)
4056	{
4057	segment_length_a = size_zero_node;
4058	segment_length_b = size_zero_node;
4059	}
4060	else
4061	{
4062	if (!operand_equal_p (DR_STEP (dr_info_a->dr),
4063	DR_STEP (dr_info_b->dr), flags: `0`))
4064	length_factor = scalar_loop_iters;
4065	else
4066	length_factor = size_int (vect_factor);
4067	segment_length_a = vect_vfa_segment_size (dr_info: dr_info_a, length_factor);
4068	segment_length_b = vect_vfa_segment_size (dr_info: dr_info_b, length_factor);
4069	}
4070	access_size_a = vect_vfa_access_size (vinfo: loop_vinfo, dr_info: dr_info_a);
4071	access_size_b = vect_vfa_access_size (vinfo: loop_vinfo, dr_info: dr_info_b);
4072	align_a = vect_vfa_align (dr_info: dr_info_a);
4073	align_b = vect_vfa_align (dr_info: dr_info_b);
4074
4075	/ See whether the alias is known at compilation time. /
4076	if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
4077	DR_BASE_ADDRESS (dr_info_b->dr), flags: `0`)
4078	&& operand_equal_p (DR_OFFSET (dr_info_a->dr),
4079	DR_OFFSET (dr_info_b->dr), flags: `0`)
4080	&& TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
4081	&& TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
4082	&& poly_int_tree_p (t: segment_length_a)
4083	&& poly_int_tree_p (t: segment_length_b))
4084	{
4085	int res = vect_compile_time_alias (a: dr_info_a, b: dr_info_b,
4086	segment_length_a,
4087	segment_length_b,
4088	access_size_a,
4089	access_size_b);
4090	if (res >= `0` && dump_enabled_p ())
4091	{
4092	dump_printf_loc (MSG_NOTE, vect_location,
4093	"can tell at compile time that %T and %T",
4094	DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
4095	if (res == `0`)
4096	dump_printf (MSG_NOTE, " do not alias\n");
4097	else
4098	dump_printf (MSG_NOTE, " alias\n");
4099	}
4100
4101	if (res == `0`)
4102	continue;
4103
4104	if (res == `1`)
4105	return opt_result::failure_at (loc: stmt_info_b->stmt,
4106	fmt: "not vectorized:"
4107	" compilation time alias: %G%G",
4108	stmt_info_a->stmt,
4109	stmt_info_b->stmt);
4110	}
4111
4112	dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
4113	access_size_a, align_a);
4114	dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
4115	access_size_b, align_b);
4116	/ Canonicalize the order to be the one that's needed for accurate*
4117	RAW, WAR and WAW flags, in cases where the data references are
4118	well-ordered. The order doesn't really matter otherwise,
4119	but we might as well be consistent. /*
4120	if (get_later_stmt (stmt1_info: stmt_info_a, stmt2_info: stmt_info_b) == stmt_info_a)
4121	std::swap (a&: dr_a, b&: dr_b);
4122
4123	dr_with_seg_len_pair_t dr_with_seg_len_pair
4124	(dr_a, dr_b, (preserves_scalar_order_p
4125	? dr_with_seg_len_pair_t::WELL_ORDERED
4126	: dr_with_seg_len_pair_t::REORDERED));
4127
4128	comp_alias_ddrs.safe_push (obj: dr_with_seg_len_pair);
4129	}
4130
4131	prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
4132
4133	unsigned int count = (comp_alias_ddrs.length ()
4134	+ check_unequal_addrs.length ());
4135
4136	if (count
4137	&& (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo))
4138	== VECT_COST_MODEL_VERY_CHEAP))
4139	return opt_result::failure_at
4140	(loc: vect_location, fmt: "would need a runtime alias check\n");
4141
4142	if (dump_enabled_p ())
4143	dump_printf_loc (MSG_NOTE, vect_location,
4144	"improved number of alias checks from %d to %d\n",
4145	may_alias_ddrs.length (), count);
4146	unsigned limit = param_vect_max_version_for_alias_checks;
4147	if (loop_cost_model (LOOP_VINFO_LOOP (loop_vinfo)) == VECT_COST_MODEL_CHEAP)
4148	limit = param_vect_max_version_for_alias_checks * `6` / `10`;
4149	if (count > limit)
4150	return opt_result::failure_at
4151	(loc: vect_location,
4152	fmt: "number of versioning for alias run-time tests exceeds %d "
4153	"(--param vect-max-version-for-alias-checks)\n", limit);
4154
4155	return opt_result::success ();
4156	}
4157
4158	/ Check whether we can use an internal function for a gather load*
4159	or scatter store. READ_P is true for loads and false for stores.
4160	MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
4161	the type of the memory elements being loaded or stored. OFFSET_TYPE
4162	is the type of the offset that is being applied to the invariant
4163	base address. SCALE is the amount by which the offset should
4164	be multiplied after* it has been converted to address width.*
4165
4166	Return true if the function is supported, storing the function id in
4167	IFN_OUT and the vector type for the offset in OFFSET_VECTYPE_OUT. */
4168
4169	bool
4170	vect_gather_scatter_fn_p (vec_info vinfo, bool* read_p, bool masked_p,
4171	tree vectype, tree memory_type, tree offset_type,
4172	int scale, internal_fn *ifn_out,
4173	tree *offset_vectype_out)
4174	{
4175	unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
4176	unsigned int element_bits = vector_element_bits (vectype);
4177	if (element_bits != memory_bits)
4178	/ For now the vector elements must be the same width as the*
4179	memory elements. /*
4180	return false;
4181
4182	/ Work out which function we need. /
4183	internal_fn ifn, alt_ifn, alt_ifn2;
4184	if (read_p)
4185	{
4186	ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
4187	alt_ifn = IFN_MASK_GATHER_LOAD;
4188	/ When target supports MASK_LEN_GATHER_LOAD, we always*
4189	use MASK_LEN_GATHER_LOAD regardless whether len and
4190	mask are valid or not. /*
4191	alt_ifn2 = IFN_MASK_LEN_GATHER_LOAD;
4192	}
4193	else
4194	{
4195	ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
4196	alt_ifn = IFN_MASK_SCATTER_STORE;
4197	/ When target supports MASK_LEN_SCATTER_STORE, we always*
4198	use MASK_LEN_SCATTER_STORE regardless whether len and
4199	mask are valid or not. /*
4200	alt_ifn2 = IFN_MASK_LEN_SCATTER_STORE;
4201	}
4202
4203	for (;;)
4204	{
4205	tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
4206	if (!offset_vectype)
4207	return false;
4208
4209	/ Test whether the target supports this combination. /
4210	if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
4211	offset_vectype, scale))
4212	{
4213	*ifn_out = ifn;
4214	*offset_vectype_out = offset_vectype;
4215	return true;
4216	}
4217	else if (!masked_p
4218	&& internal_gather_scatter_fn_supported_p (alt_ifn, vectype,
4219	memory_type,
4220	offset_vectype,
4221	scale))
4222	{
4223	*ifn_out = alt_ifn;
4224	*offset_vectype_out = offset_vectype;
4225	return true;
4226	}
4227	else if (internal_gather_scatter_fn_supported_p (alt_ifn2, vectype,
4228	memory_type,
4229	offset_vectype, scale))
4230	{
4231	*ifn_out = alt_ifn2;
4232	*offset_vectype_out = offset_vectype;
4233	return true;
4234	}
4235
4236	if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
4237	&& TYPE_PRECISION (offset_type) >= element_bits)
4238	return false;
4239
4240	offset_type = build_nonstandard_integer_type
4241	(TYPE_PRECISION (offset_type) * `2`, TYPE_UNSIGNED (offset_type));
4242	}
4243	}
4244
4245	/ STMT_INFO is a call to an internal gather load or scatter store function.*
4246	Describe the operation in INFO. /*
4247
4248	static void
4249	vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
4250	gather_scatter_info *info)
4251	{
4252	gcall call = as_a <gcall > (p: stmt_info->stmt);
4253	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4254	data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4255
4256	info->ifn = gimple_call_internal_fn (gs: call);
4257	info->decl = NULL_TREE;
4258	info->base = gimple_call_arg (gs: call, index: `0`);
4259	info->offset = gimple_call_arg (gs: call, index: `1`);
4260	info->offset_dt = vect_unknown_def_type;
4261	info->offset_vectype = NULL_TREE;
4262	info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, `2`));
4263	info->element_type = TREE_TYPE (vectype);
4264	info->memory_type = TREE_TYPE (DR_REF (dr));
4265	}
4266
4267	/ Return true if a non-affine read or write in STMT_INFO is suitable for a*
4268	gather load or scatter store. Describe the operation in INFO if so. /
4269
4270	bool
4271	vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
4272	gather_scatter_info *info)
4273	{
4274	HOST_WIDE_INT scale = `1`;
4275	poly_int64 pbitpos, pbitsize;
4276	class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4277	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4278	tree offtype = NULL_TREE;
4279	tree decl = NULL_TREE, base, off;
4280	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4281	tree memory_type = TREE_TYPE (DR_REF (dr));
4282	machine_mode pmode;
4283	int punsignedp, reversep, pvolatilep = `0`;
4284	internal_fn ifn;
4285	tree offset_vectype;
4286	bool masked_p = false;
4287
4288	/ See whether this is already a call to a gather/scatter internal function.*
4289	If not, see whether it's a masked load or store. /*
4290	gcall call = dyn_cast <gcall > (p: stmt_info->stmt);
4291	if (call && gimple_call_internal_p (gs: call))
4292	{
4293	ifn = gimple_call_internal_fn (gs: call);
4294	if (internal_gather_scatter_fn_p (ifn))
4295	{
4296	vect_describe_gather_scatter_call (stmt_info, info);
4297	return true;
4298	}
4299	masked_p = (ifn == IFN_MASK_LOAD \|\| ifn == IFN_MASK_STORE);
4300	}
4301
4302	/ True if we should aim to use internal functions rather than*
4303	built-in functions. /*
4304	bool use_ifn_p = (DR_IS_READ (dr)
4305	? supports_vec_gather_load_p (TYPE_MODE (vectype))
4306	: supports_vec_scatter_store_p (TYPE_MODE (vectype)));
4307
4308	base = DR_REF (dr);
4309	/ For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,*
4310	see if we can use the def stmt of the address. /*
4311	if (masked_p
4312	&& TREE_CODE (base) == MEM_REF
4313	&& TREE_CODE (TREE_OPERAND (base, `0`)) == SSA_NAME
4314	&& integer_zerop (TREE_OPERAND (base, `1`))
4315	&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, `0`)))
4316	{
4317	gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, `0`));
4318	if (is_gimple_assign (gs: def_stmt)
4319	&& gimple_assign_rhs_code (gs: def_stmt) == ADDR_EXPR)
4320	base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), `0`);
4321	}
4322
4323	/ The gather and scatter builtins need address of the form*
4324	loop_invariant + vector {1, 2, 4, 8}*
4325	or
4326	loop_invariant + sign_extend (vector) { 1, 2, 4, 8 }.*
4327	Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
4328	of loop invariants/SSA_NAMEs defined in the loop, with casts,
4329	multiplications and additions in it. To get a vector, we need
4330	a single SSA_NAME that will be defined in the loop and will
4331	contain everything that is not loop invariant and that can be
4332	vectorized. The following code attempts to find such a preexistng
4333	SSA_NAME OFF and put the loop invariants into a tree BASE
4334	that can be gimplified before the loop. /*
4335	base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
4336	&punsignedp, &reversep, &pvolatilep);
4337	if (reversep)
4338	return false;
4339
4340	/ PR 107346. Packed structs can have fields at offsets that are not*
4341	multiples of BITS_PER_UNIT. Do not use gather/scatters in such cases. /*
4342	if (!multiple_p (a: pbitpos, BITS_PER_UNIT))
4343	return false;
4344
4345	/ We need to be able to form an address to the base which for example*
4346	isn't possible for hard registers. /*
4347	if (may_be_nonaddressable_p (expr: base))
4348	return false;
4349
4350	poly_int64 pbytepos = exact_div (a: pbitpos, BITS_PER_UNIT);
4351
4352	if (TREE_CODE (base) == MEM_REF)
4353	{
4354	if (!integer_zerop (TREE_OPERAND (base, `1`)))
4355	{
4356	if (off == NULL_TREE)
4357	off = wide_int_to_tree (sizetype, cst: mem_ref_offset (base));
4358	else
4359	off = size_binop (PLUS_EXPR, off,
4360	fold_convert (sizetype, TREE_OPERAND (base, `1`)));
4361	}
4362	base = TREE_OPERAND (base, `0`);
4363	}
4364	else
4365	base = build_fold_addr_expr (base);
4366
4367	if (off == NULL_TREE)
4368	off = size_zero_node;
4369
4370	/ If base is not loop invariant, either off is 0, then we start with just*
4371	the constant offset in the loop invariant BASE and continue with base
4372	as OFF, otherwise give up.
4373	We could handle that case by gimplifying the addition of base + off
4374	into some SSA_NAME and use that as off, but for now punt. /*
4375	if (!expr_invariant_in_loop_p (loop, base))
4376	{
4377	if (!integer_zerop (off))
4378	return false;
4379	off = base;
4380	base = size_int (pbytepos);
4381	}
4382	/ Otherwise put base + constant offset into the loop invariant BASE*
4383	and continue with OFF. /*
4384	else
4385	{
4386	base = fold_convert (sizetype, base);
4387	base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
4388	}
4389
4390	/ OFF at this point may be either a SSA_NAME or some tree expression*
4391	from get_inner_reference. Try to peel off loop invariants from it
4392	into BASE as long as possible. /*
4393	STRIP_NOPS (off);
4394	while (offtype == NULL_TREE)
4395	{
4396	enum tree_code code;
4397	tree op0, op1, add = NULL_TREE;
4398
4399	if (TREE_CODE (off) == SSA_NAME)
4400	{
4401	gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4402
4403	if (expr_invariant_in_loop_p (loop, off))
4404	return false;
4405
4406	if (gimple_code (g: def_stmt) != GIMPLE_ASSIGN)
4407	break;
4408
4409	op0 = gimple_assign_rhs1 (gs: def_stmt);
4410	code = gimple_assign_rhs_code (gs: def_stmt);
4411	op1 = gimple_assign_rhs2 (gs: def_stmt);
4412	}
4413	else
4414	{
4415	if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
4416	return false;
4417	code = TREE_CODE (off);
4418	extract_ops_from_tree (expr: off, code: &code, op0: &op0, op1: &op1);
4419	}
4420	switch (code)
4421	{
4422	case POINTER_PLUS_EXPR:
4423	case PLUS_EXPR:
4424	if (expr_invariant_in_loop_p (loop, op0))
4425	{
4426	add = op0;
4427	off = op1;
4428	do_add:
4429	add = fold_convert (sizetype, add);
4430	if (scale != `1`)
4431	add = size_binop (MULT_EXPR, add, size_int (scale));
4432	base = size_binop (PLUS_EXPR, base, add);
4433	continue;
4434	}
4435	if (expr_invariant_in_loop_p (loop, op1))
4436	{
4437	add = op1;
4438	off = op0;
4439	goto do_add;
4440	}
4441	break;
4442	case MINUS_EXPR:
4443	if (expr_invariant_in_loop_p (loop, op1))
4444	{
4445	add = fold_convert (sizetype, op1);
4446	add = size_binop (MINUS_EXPR, size_zero_node, add);
4447	off = op0;
4448	goto do_add;
4449	}
4450	break;
4451	case MULT_EXPR:
4452	if (scale == `1` && tree_fits_shwi_p (op1))
4453	{
4454	int new_scale = tree_to_shwi (op1);
4455	/ Only treat this as a scaling operation if the target*
4456	supports it for at least some offset type. /*
4457	if (use_ifn_p
4458	&& !vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4459	masked_p, vectype, memory_type,
4460	signed_char_type_node,
4461	scale: new_scale, ifn_out: &ifn,
4462	offset_vectype_out: &offset_vectype)
4463	&& !vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4464	masked_p, vectype, memory_type,
4465	unsigned_char_type_node,
4466	scale: new_scale, ifn_out: &ifn,
4467	offset_vectype_out: &offset_vectype))
4468	break;
4469	scale = new_scale;
4470	off = op0;
4471	continue;
4472	}
4473	break;
4474	case SSA_NAME:
4475	off = op0;
4476	continue;
4477	CASE_CONVERT:
4478	if (!POINTER_TYPE_P (TREE_TYPE (op0))
4479	&& !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4480	break;
4481
4482	/ Don't include the conversion if the target is happy with*
4483	the current offset type. /*
4484	if (use_ifn_p
4485	&& TREE_CODE (off) == SSA_NAME
4486	&& !POINTER_TYPE_P (TREE_TYPE (off))
4487	&& vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr),
4488	masked_p, vectype, memory_type,
4489	TREE_TYPE (off), scale, ifn_out: &ifn,
4490	offset_vectype_out: &offset_vectype))
4491	break;
4492
4493	if (TYPE_PRECISION (TREE_TYPE (op0))
4494	== TYPE_PRECISION (TREE_TYPE (off)))
4495	{
4496	off = op0;
4497	continue;
4498	}
4499
4500	/ Include the conversion if it is widening and we're using*
4501	the IFN path or the target can handle the converted from
4502	offset or the current size is not already the same as the
4503	data vector element size. /*
4504	if ((TYPE_PRECISION (TREE_TYPE (op0))
4505	< TYPE_PRECISION (TREE_TYPE (off)))
4506	&& (use_ifn_p
4507	\|\| (DR_IS_READ (dr)
4508	? (targetm.vectorize.builtin_gather
4509	&& targetm.vectorize.builtin_gather (vectype,
4510	TREE_TYPE (op0),
4511	scale))
4512	: (targetm.vectorize.builtin_scatter
4513	&& targetm.vectorize.builtin_scatter (vectype,
4514	TREE_TYPE (op0),
4515	scale)))
4516	\|\| !operand_equal_p (TYPE_SIZE (TREE_TYPE (off)),
4517	TYPE_SIZE (TREE_TYPE (vectype)), flags: `0`)))
4518	{
4519	off = op0;
4520	offtype = TREE_TYPE (off);
4521	STRIP_NOPS (off);
4522	continue;
4523	}
4524	break;
4525	default:
4526	break;
4527	}
4528	break;
4529	}
4530
4531	/ If at the end OFF still isn't a SSA_NAME or isn't*
4532	defined in the loop, punt. /*
4533	if (TREE_CODE (off) != SSA_NAME
4534	\|\| expr_invariant_in_loop_p (loop, off))
4535	return false;
4536
4537	if (offtype == NULL_TREE)
4538	offtype = TREE_TYPE (off);
4539
4540	if (use_ifn_p)
4541	{
4542	if (!vect_gather_scatter_fn_p (vinfo: loop_vinfo, DR_IS_READ (dr), masked_p,
4543	vectype, memory_type, offset_type: offtype, scale,
4544	ifn_out: &ifn, offset_vectype_out: &offset_vectype))
4545	ifn = IFN_LAST;
4546	decl = NULL_TREE;
4547	}
4548	else
4549	{
4550	if (DR_IS_READ (dr))
4551	{
4552	if (targetm.vectorize.builtin_gather)
4553	decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
4554	}
4555	else
4556	{
4557	if (targetm.vectorize.builtin_scatter)
4558	decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
4559	}
4560	ifn = IFN_LAST;
4561	/ The offset vector type will be read from DECL when needed. /
4562	offset_vectype = NULL_TREE;
4563	}
4564
4565	info->ifn = ifn;
4566	info->decl = decl;
4567	info->base = base;
4568	info->offset = off;
4569	info->offset_dt = vect_unknown_def_type;
4570	info->offset_vectype = offset_vectype;
4571	info->scale = scale;
4572	info->element_type = TREE_TYPE (vectype);
4573	info->memory_type = memory_type;
4574	return true;
4575	}
4576
4577	/ Find the data references in STMT, analyze them with respect to LOOP and*
4578	append them to DATAREFS. Return false if datarefs in this stmt cannot
4579	be handled. /*
4580
4581	opt_result
4582	vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
4583	vec<data_reference_p> *datarefs,
4584	vec<int> dataref_groups, int* group_id)
4585	{
4586	/ We can ignore clobbers for dataref analysis - they are removed during*
4587	loop vectorization and BB vectorization checks dependences with a
4588	stmt walk. /*
4589	if (gimple_clobber_p (s: stmt))
4590	return opt_result::success ();
4591
4592	if (gimple_has_volatile_ops (stmt))
4593	return opt_result::failure_at (loc: stmt, fmt: "not vectorized: volatile type: %G",
4594	stmt);
4595
4596	if (stmt_can_throw_internal (cfun, stmt))
4597	return opt_result::failure_at (loc: stmt,
4598	fmt: "not vectorized:"
4599	" statement can throw an exception: %G",
4600	stmt);
4601
4602	auto_vec<data_reference_p, `2`> refs;
4603	opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
4604	if (!res)
4605	return res;
4606
4607	if (refs.is_empty ())
4608	return opt_result::success ();
4609
4610	if (refs.length () > `1`)
4611	{
4612	while (!refs.is_empty ())
4613	free_data_ref (refs.pop ());
4614	return opt_result::failure_at (loc: stmt,
4615	fmt: "not vectorized: more than one "
4616	"data ref in stmt: %G", stmt);
4617	}
4618
4619	data_reference_p dr = refs.pop ();
4620	if (gcall call = dyn_cast <gcall > (p: stmt))
4621	if (!gimple_call_internal_p (gs: call)
4622	\|\| (gimple_call_internal_fn (gs: call) != IFN_MASK_LOAD
4623	&& gimple_call_internal_fn (gs: call) != IFN_MASK_STORE))
4624	{
4625	free_data_ref (dr);
4626	return opt_result::failure_at (loc: stmt,
4627	fmt: "not vectorized: dr in a call %G", stmt);
4628	}
4629
4630	if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
4631	&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), `1`)))
4632	{
4633	free_data_ref (dr);
4634	return opt_result::failure_at (loc: stmt,
4635	fmt: "not vectorized:"
4636	" statement is an unsupported"
4637	" bitfield access %G", stmt);
4638	}
4639
4640	if (DR_BASE_ADDRESS (dr)
4641	&& TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
4642	{
4643	free_data_ref (dr);
4644	return opt_result::failure_at (loc: stmt,
4645	fmt: "not vectorized:"
4646	" base addr of dr is a constant\n");
4647	}
4648
4649	/ Check whether this may be a SIMD lane access and adjust the*
4650	DR to make it easier for us to handle it. /*
4651	if (loop
4652	&& loop->simduid
4653	&& (!DR_BASE_ADDRESS (dr)
4654	\|\| !DR_OFFSET (dr)
4655	\|\| !DR_INIT (dr)
4656	\|\| !DR_STEP (dr)))
4657	{
4658	struct data_reference *newdr
4659	= create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
4660	DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
4661	if (DR_BASE_ADDRESS (newdr)
4662	&& DR_OFFSET (newdr)
4663	&& DR_INIT (newdr)
4664	&& DR_STEP (newdr)
4665	&& TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
4666	&& integer_zerop (DR_STEP (newdr)))
4667	{
4668	tree base_address = DR_BASE_ADDRESS (newdr);
4669	tree off = DR_OFFSET (newdr);
4670	tree step = ssize_int (`1`);
4671	if (integer_zerop (off)
4672	&& TREE_CODE (base_address) == POINTER_PLUS_EXPR)
4673	{
4674	off = TREE_OPERAND (base_address, `1`);
4675	base_address = TREE_OPERAND (base_address, `0`);
4676	}
4677	STRIP_NOPS (off);
4678	if (TREE_CODE (off) == MULT_EXPR
4679	&& tree_fits_uhwi_p (TREE_OPERAND (off, `1`)))
4680	{
4681	step = TREE_OPERAND (off, `1`);
4682	off = TREE_OPERAND (off, `0`);
4683	STRIP_NOPS (off);
4684	}
4685	if (CONVERT_EXPR_P (off)
4686	&& (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, `0`)))
4687	< TYPE_PRECISION (TREE_TYPE (off))))
4688	off = TREE_OPERAND (off, `0`);
4689	if (TREE_CODE (off) == SSA_NAME)
4690	{
4691	gimple *def = SSA_NAME_DEF_STMT (off);
4692	/ Look through widening conversion. /
4693	if (is_gimple_assign (gs: def)
4694	&& CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
4695	{
4696	tree rhs1 = gimple_assign_rhs1 (gs: def);
4697	if (TREE_CODE (rhs1) == SSA_NAME
4698	&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
4699	&& (TYPE_PRECISION (TREE_TYPE (off))
4700	> TYPE_PRECISION (TREE_TYPE (rhs1))))
4701	def = SSA_NAME_DEF_STMT (rhs1);
4702	}
4703	if (is_gimple_call (gs: def)
4704	&& gimple_call_internal_p (gs: def)
4705	&& (gimple_call_internal_fn (gs: def) == IFN_GOMP_SIMD_LANE))
4706	{
4707	tree arg = gimple_call_arg (gs: def, index: `0`);
4708	tree reft = TREE_TYPE (DR_REF (newdr));
4709	gcc_assert (TREE_CODE (arg) == SSA_NAME);
4710	arg = SSA_NAME_VAR (arg);
4711	if (arg == loop->simduid
4712	/ For now. /
4713	&& tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
4714	{
4715	DR_BASE_ADDRESS (newdr) = base_address;
4716	DR_OFFSET (newdr) = ssize_int (`0`);
4717	DR_STEP (newdr) = step;
4718	DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
4719	DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
4720	/ Mark as simd-lane access. /
4721	tree arg2 = gimple_call_arg (gs: def, index: `1`);
4722	newdr->aux = (void *) (-`1` - tree_to_uhwi (arg2));
4723	free_data_ref (dr);
4724	datarefs->safe_push (obj: newdr);
4725	if (dataref_groups)
4726	dataref_groups->safe_push (obj: group_id);
4727	return opt_result::success ();
4728	}
4729	}
4730	}
4731	}
4732	free_data_ref (newdr);
4733	}
4734
4735	datarefs->safe_push (obj: dr);
4736	if (dataref_groups)
4737	dataref_groups->safe_push (obj: group_id);
4738	return opt_result::success ();
4739	}
4740
4741	/ Function vect_analyze_data_refs.*
4742
4743	Find all the data references in the loop or basic block.
4744
4745	The general structure of the analysis of data refs in the vectorizer is as
4746	follows:
4747	1- vect_analyze_data_refs(loop/bb): call
4748	compute_data_dependences_for_loop/bb to find and analyze all data-refs
4749	in the loop/bb and their dependences.
4750	2- vect_analyze_dependences(): apply dependence testing using ddrs.
4751	3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4752	4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4753
4754	*/
4755
4756	opt_result
4757	vect_analyze_data_refs (vec_info vinfo, poly_uint64 min_vf, bool *fatal)
4758	{
4759	class loop *loop = NULL;
4760	unsigned int i;
4761	struct data_reference *dr;
4762	tree scalar_type;
4763
4764	DUMP_VECT_SCOPE ("vect_analyze_data_refs");
4765
4766	if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo))
4767	loop = LOOP_VINFO_LOOP (loop_vinfo);
4768
4769	/ Go through the data-refs, check that the analysis succeeded. Update*
4770	pointer from stmt_vec_info struct to DR and vectype. /*
4771
4772	vec<data_reference_p> datarefs = vinfo->shared->datarefs;
4773	FOR_EACH_VEC_ELT (datarefs, i, dr)
4774	{
4775	enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
4776	poly_uint64 vf;
4777
4778	gcc_assert (DR_REF (dr));
4779	stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
4780	gcc_assert (!stmt_info->dr_aux.dr);
4781	stmt_info->dr_aux.dr = dr;
4782	stmt_info->dr_aux.stmt = stmt_info;
4783
4784	/ Check that analysis of the data-ref succeeded. /
4785	if (!DR_BASE_ADDRESS (dr) \|\| !DR_OFFSET (dr) \|\| !DR_INIT (dr)
4786	\|\| !DR_STEP (dr))
4787	{
4788	bool maybe_gather
4789	= DR_IS_READ (dr)
4790	&& !TREE_THIS_VOLATILE (DR_REF (dr));
4791	bool maybe_scatter
4792	= DR_IS_WRITE (dr)
4793	&& !TREE_THIS_VOLATILE (DR_REF (dr));
4794
4795	/ If target supports vector gather loads or scatter stores,*
4796	see if they can't be used. /*
4797	if (is_a <loop_vec_info> (p: vinfo)
4798	&& !nested_in_vect_loop_p (loop, stmt_info))
4799	{
4800	if (maybe_gather \|\| maybe_scatter)
4801	{
4802	if (maybe_gather)
4803	gatherscatter = GATHER;
4804	else
4805	gatherscatter = SCATTER;
4806	}
4807	}
4808
4809	if (gatherscatter == SG_NONE)
4810	{
4811	if (dump_enabled_p ())
4812	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4813	"not vectorized: data ref analysis "
4814	"failed %G", stmt_info->stmt);
4815	if (is_a <bb_vec_info> (p: vinfo))
4816	{
4817	/ In BB vectorization the ref can still participate*
4818	in dependence analysis, we just can't vectorize it. /*
4819	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4820	continue;
4821	}
4822	return opt_result::failure_at (loc: stmt_info->stmt,
4823	fmt: "not vectorized:"
4824	" data ref analysis failed: %G",
4825	stmt_info->stmt);
4826	}
4827	}
4828
4829	/ See if this was detected as SIMD lane access. /
4830	if (dr->aux == (void *)-`1`
4831	\|\| dr->aux == (void *)-`2`
4832	\|\| dr->aux == (void *)-`3`
4833	\|\| dr->aux == (void *)-`4`)
4834	{
4835	if (nested_in_vect_loop_p (loop, stmt_info))
4836	return opt_result::failure_at (loc: stmt_info->stmt,
4837	fmt: "not vectorized:"
4838	" data ref analysis failed: %G",
4839	stmt_info->stmt);
4840	STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
4841	= -(uintptr_t) dr->aux;
4842	}
4843
4844	tree base = get_base_address (DR_REF (dr));
4845	if (base && VAR_P (base) && DECL_NONALIASED (base))
4846	{
4847	if (dump_enabled_p ())
4848	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4849	"not vectorized: base object not addressable "
4850	"for stmt: %G", stmt_info->stmt);
4851	if (is_a <bb_vec_info> (p: vinfo))
4852	{
4853	/ In BB vectorization the ref can still participate*
4854	in dependence analysis, we just can't vectorize it. /*
4855	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4856	continue;
4857	}
4858	return opt_result::failure_at (loc: stmt_info->stmt,
4859	fmt: "not vectorized: base object not"
4860	" addressable for stmt: %G",
4861	stmt_info->stmt);
4862	}
4863
4864	if (is_a <loop_vec_info> (p: vinfo)
4865	&& DR_STEP (dr)
4866	&& TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
4867	{
4868	if (nested_in_vect_loop_p (loop, stmt_info))
4869	return opt_result::failure_at (loc: stmt_info->stmt,
4870	fmt: "not vectorized: "
4871	"not suitable for strided load %G",
4872	stmt_info->stmt);
4873	STMT_VINFO_STRIDED_P (stmt_info) = true;
4874	}
4875
4876	/ Update DR field in stmt_vec_info struct. /
4877
4878	/ If the dataref is in an inner-loop of the loop that is considered for*
4879	for vectorization, we also want to analyze the access relative to
4880	the outer-loop (DR contains information only relative to the
4881	inner-most enclosing loop). We do that by building a reference to the
4882	first location accessed by the inner-loop, and analyze it relative to
4883	the outer-loop. /*
4884	if (loop && nested_in_vect_loop_p (loop, stmt_info))
4885	{
4886	/ Build a reference to the first location accessed by the*
4887	inner loop: (BASE + INIT + OFFSET). By construction,*
4888	this address must be invariant in the inner loop, so we
4889	can consider it as being used in the outer loop. /*
4890	tree base = unshare_expr (DR_BASE_ADDRESS (dr));
4891	tree offset = unshare_expr (DR_OFFSET (dr));
4892	tree init = unshare_expr (DR_INIT (dr));
4893	tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4894	init, offset);
4895	tree init_addr = fold_build_pointer_plus (base, init_offset);
4896	tree init_ref = build_fold_indirect_ref (init_addr);
4897
4898	if (dump_enabled_p ())
4899	dump_printf_loc (MSG_NOTE, vect_location,
4900	"analyze in outer loop: %T\n", init_ref);
4901
4902	opt_result res
4903	= dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4904	init_ref, loop, stmt_info->stmt);
4905	if (!res)
4906	/ dr_analyze_innermost already explained the failure. /
4907	return res;
4908
4909	if (dump_enabled_p ())
4910	dump_printf_loc (MSG_NOTE, vect_location,
4911	"\touter base_address: %T\n"
4912	"\touter offset from base address: %T\n"
4913	"\touter constant offset from base address: %T\n"
4914	"\touter step: %T\n"
4915	"\touter base alignment: %d\n\n"
4916	"\touter base misalignment: %d\n"
4917	"\touter offset alignment: %d\n"
4918	"\touter step alignment: %d\n",
4919	STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
4920	STMT_VINFO_DR_OFFSET (stmt_info),
4921	STMT_VINFO_DR_INIT (stmt_info),
4922	STMT_VINFO_DR_STEP (stmt_info),
4923	STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
4924	STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
4925	STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
4926	STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
4927	}
4928
4929	/ Set vectype for STMT. /
4930	scalar_type = TREE_TYPE (DR_REF (dr));
4931	tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
4932	if (!vectype)
4933	{
4934	if (dump_enabled_p ())
4935	{
4936	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4937	"not vectorized: no vectype for stmt: %G",
4938	stmt_info->stmt);
4939	dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4940	dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4941	scalar_type);
4942	dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4943	}
4944
4945	if (is_a <bb_vec_info> (p: vinfo))
4946	{
4947	/ No vector type is fine, the ref can still participate*
4948	in dependence analysis, we just can't vectorize it. /*
4949	STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4950	continue;
4951	}
4952	if (fatal)
4953	fatal = false*;
4954	return opt_result::failure_at (loc: stmt_info->stmt,
4955	fmt: "not vectorized:"
4956	" no vectype for stmt: %G"
4957	" scalar_type: %T\n",
4958	stmt_info->stmt, scalar_type);
4959	}
4960	else
4961	{
4962	if (dump_enabled_p ())
4963	dump_printf_loc (MSG_NOTE, vect_location,
4964	"got vectype for stmt: %G%T\n",
4965	stmt_info->stmt, vectype);
4966	}
4967
4968	/ Adjust the minimal vectorization factor according to the*
4969	vector type. /*
4970	vf = TYPE_VECTOR_SUBPARTS (node: vectype);
4971	min_vf = upper_bound (a: min_vf, b: vf);
4972
4973	/ Leave the BB vectorizer to pick the vector type later, based on*
4974	the final dataref group size and SLP node size. /*
4975	if (is_a <loop_vec_info> (p: vinfo))
4976	STMT_VINFO_VECTYPE (stmt_info) = vectype;
4977
4978	if (gatherscatter != SG_NONE)
4979	{
4980	gather_scatter_info gs_info;
4981	if (!vect_check_gather_scatter (stmt_info,
4982	loop_vinfo: as_a <loop_vec_info> (p: vinfo),
4983	info: &gs_info)
4984	\|\| !get_vectype_for_scalar_type (vinfo,
4985	TREE_TYPE (gs_info.offset)))
4986	{
4987	if (fatal)
4988	fatal = false*;
4989	return opt_result::failure_at
4990	(loc: stmt_info->stmt,
4991	fmt: (gatherscatter == GATHER)
4992	? "not vectorized: not suitable for gather load %G"
4993	: "not vectorized: not suitable for scatter store %G",
4994	stmt_info->stmt);
4995	}
4996	STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
4997	}
4998	}
4999
5000	/ We used to stop processing and prune the list here. Verify we no*
5001	longer need to. /*
5002	gcc_assert (i == datarefs.length ());
5003
5004	return opt_result::success ();
5005	}
5006
5007
5008	/ Function vect_get_new_vect_var.*
5009
5010	Returns a name for a new variable. The current naming scheme appends the
5011	prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
5012	the name of vectorizer generated variables, and appends that to NAME if
5013	provided. /*
5014
5015	tree
5016	vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
5017	{
5018	const char *prefix;
5019	tree new_vect_var;
5020
5021	switch (var_kind)
5022	{
5023	case vect_simple_var:
5024	prefix = "vect";
5025	break;
5026	case vect_scalar_var:
5027	prefix = "stmp";
5028	break;
5029	case vect_mask_var:
5030	prefix = "mask";
5031	break;
5032	case vect_pointer_var:
5033	prefix = "vectp";
5034	break;
5035	default:
5036	gcc_unreachable ();
5037	}
5038
5039	if (name)
5040	{
5041	char* tmp = concat (prefix, "_", name, NULL);
5042	new_vect_var = create_tmp_reg (type, tmp);
5043	free (ptr: tmp);
5044	}
5045	else
5046	new_vect_var = create_tmp_reg (type, prefix);
5047
5048	return new_vect_var;
5049	}
5050
5051	/ Like vect_get_new_vect_var but return an SSA name. /
5052
5053	tree
5054	vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
5055	{
5056	const char *prefix;
5057	tree new_vect_var;
5058
5059	switch (var_kind)
5060	{
5061	case vect_simple_var:
5062	prefix = "vect";
5063	break;
5064	case vect_scalar_var:
5065	prefix = "stmp";
5066	break;
5067	case vect_pointer_var:
5068	prefix = "vectp";
5069	break;
5070	default:
5071	gcc_unreachable ();
5072	}
5073
5074	if (name)
5075	{
5076	char* tmp = concat (prefix, "_", name, NULL);
5077	new_vect_var = make_temp_ssa_name (type, NULL, name: tmp);
5078	free (ptr: tmp);
5079	}
5080	else
5081	new_vect_var = make_temp_ssa_name (type, NULL, name: prefix);
5082
5083	return new_vect_var;
5084	}
5085
5086	/ Duplicate points-to info on NAME from DR_INFO. /
5087
5088	static void
5089	vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
5090	{
5091	duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
5092	/ DR_PTR_INFO is for a base SSA name, not including constant or*
5093	variable offsets in the ref so its alignment info does not apply. /*
5094	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
5095	}
5096
5097	/ Function vect_create_addr_base_for_vector_ref.*
5098
5099	Create an expression that computes the address of the first memory location
5100	that will be accessed for a data reference.
5101
5102	Input:
5103	STMT_INFO: The statement containing the data reference.
5104	NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
5105	OFFSET: Optional. If supplied, it is be added to the initial address.
5106	LOOP: Specify relative to which loop-nest should the address be computed.
5107	For example, when the dataref is in an inner-loop nested in an
5108	outer-loop that is now being vectorized, LOOP can be either the
5109	outer-loop, or the inner-loop. The first memory location accessed
5110	by the following dataref ('in' points to short):
5111
5112	for (i=0; i<N; i++)
5113	for (j=0; j<M; j++)
5114	s += in[i+j]
5115
5116	is as follows:
5117	if LOOP=i_loop: &in (relative to i_loop)
5118	if LOOP=j_loop: &in+i2B (relative to j_loop)*
5119
5120	Output:
5121	1. Return an SSA_NAME whose value is the address of the memory location of
5122	the first vector of the data reference.
5123	2. If new_stmt_list is not NULL_TREE after return then the caller must insert
5124	these statement(s) which define the returned SSA_NAME.
5125
5126	FORNOW: We are only handling array accesses with step 1. /*
5127
5128	tree
5129	vect_create_addr_base_for_vector_ref (vec_info *vinfo, stmt_vec_info stmt_info,
5130	gimple_seq *new_stmt_list,
5131	tree offset)
5132	{
5133	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5134	struct data_reference *dr = dr_info->dr;
5135	const char *base_name;
5136	tree addr_base;
5137	tree dest;
5138	gimple_seq seq = NULL;
5139	tree vect_ptr_type;
5140	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5141	innermost_loop_behavior *drb = vect_dr_behavior (vinfo, dr_info);
5142
5143	tree data_ref_base = unshare_expr (drb->base_address);
5144	tree base_offset = unshare_expr (get_dr_vinfo_offset (vinfo, dr_info, check_outer: true));
5145	tree init = unshare_expr (drb->init);
5146
5147	if (loop_vinfo)
5148	base_name = get_name (data_ref_base);
5149	else
5150	{
5151	base_offset = ssize_int (`0`);
5152	init = ssize_int (`0`);
5153	base_name = get_name (DR_REF (dr));
5154	}
5155
5156	/ Create base_offset /
5157	base_offset = size_binop (PLUS_EXPR,
5158	fold_convert (sizetype, base_offset),
5159	fold_convert (sizetype, init));
5160
5161	if (offset)
5162	{
5163	offset = fold_convert (sizetype, offset);
5164	base_offset = fold_build2 (PLUS_EXPR, sizetype,
5165	base_offset, offset);
5166	}
5167
5168	/ base + base_offset /
5169	if (loop_vinfo)
5170	addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
5171	else
5172	addr_base = build1 (ADDR_EXPR,
5173	build_pointer_type (TREE_TYPE (DR_REF (dr))),
5174	/ Strip zero offset components since we don't need*
5175	them and they can confuse late diagnostics if
5176	we CSE them wrongly. See PR106904 for example. /*
5177	unshare_expr (strip_zero_offset_components
5178	(DR_REF (dr))));
5179
5180	vect_ptr_type = build_pointer_type (TREE_TYPE (DR_REF (dr)));
5181	dest = vect_get_new_vect_var (type: vect_ptr_type, var_kind: vect_pointer_var, name: base_name);
5182	addr_base = force_gimple_operand (addr_base, &seq, true, dest);
5183	gimple_seq_add_seq (new_stmt_list, seq);
5184
5185	if (DR_PTR_INFO (dr)
5186	&& TREE_CODE (addr_base) == SSA_NAME
5187	/ We should only duplicate pointer info to newly created SSA names. /
5188	&& SSA_NAME_VAR (addr_base) == dest)
5189	{
5190	gcc_assert (!SSA_NAME_PTR_INFO (addr_base));
5191	vect_duplicate_ssa_name_ptr_info (name: addr_base, dr_info);
5192	}
5193
5194	if (dump_enabled_p ())
5195	dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
5196
5197	return addr_base;
5198	}
5199
5200
5201	/ Function vect_create_data_ref_ptr.*
5202
5203	Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
5204	location accessed in the loop by STMT_INFO, along with the def-use update
5205	chain to appropriately advance the pointer through the loop iterations.
5206	Also set aliasing information for the pointer. This pointer is used by
5207	the callers to this function to create a memory reference expression for
5208	vector load/store access.
5209
5210	Input:
5211	1. STMT_INFO: a stmt that references memory. Expected to be of the form
5212	GIMPLE_ASSIGN <name, data-ref> or
5213	GIMPLE_ASSIGN <data-ref, name>.
5214	2. AGGR_TYPE: the type of the reference, which should be either a vector
5215	or an array.
5216	3. AT_LOOP: the loop where the vector memref is to be created.
5217	4. OFFSET (optional): a byte offset to be added to the initial address
5218	accessed by the data-ref in STMT_INFO.
5219	5. BSI: location where the new stmts are to be placed if there is no loop
5220	6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
5221	pointing to the initial address.
5222	8. IV_STEP (optional, defaults to NULL): the amount that should be added
5223	to the IV during each iteration of the loop. NULL says to move
5224	by one copy of AGGR_TYPE up or down, depending on the step of the
5225	data reference.
5226
5227	Output:
5228	1. Declare a new ptr to vector_type, and have it point to the base of the
5229	data reference (initial addressed accessed by the data reference).
5230	For example, for vector of type V8HI, the following code is generated:
5231
5232	v8hi ap;*
5233	ap = (v8hi )initial_address;*
5234
5235	if OFFSET is not supplied:
5236	initial_address = &a[init];
5237	if OFFSET is supplied:
5238	initial_address = &a[init] + OFFSET;
5239	if BYTE_OFFSET is supplied:
5240	initial_address = &a[init] + BYTE_OFFSET;
5241
5242	Return the initial_address in INITIAL_ADDRESS.
5243
5244	2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
5245	update the pointer in each iteration of the loop.
5246
5247	Return the increment stmt that updates the pointer in PTR_INCR.
5248
5249	3. Return the pointer. /*
5250
5251	tree
5252	vect_create_data_ref_ptr (vec_info *vinfo, stmt_vec_info stmt_info,
5253	tree aggr_type, class loop *at_loop, tree offset,
5254	tree initial_address, gimple_stmt_iterator gsi,
5255	gimple *ptr_incr, bool* only_init,
5256	tree iv_step)
5257	{
5258	const char *base_name;
5259	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
5260	class loop *loop = NULL;
5261	bool nested_in_vect_loop = false;
5262	class loop *containing_loop = NULL;
5263	tree aggr_ptr_type;
5264	tree aggr_ptr;
5265	tree new_temp;
5266	gimple_seq new_stmt_list = NULL;
5267	edge pe = NULL;
5268	basic_block new_bb;
5269	tree aggr_ptr_init;
5270	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5271	struct data_reference *dr = dr_info->dr;
5272	tree aptr;
5273	gimple_stmt_iterator incr_gsi;
5274	bool insert_after;
5275	tree indx_before_incr, indx_after_incr;
5276	gimple *incr;
5277	bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (p: vinfo);
5278
5279	gcc_assert (iv_step != NULL_TREE
5280	\|\| TREE_CODE (aggr_type) == ARRAY_TYPE
5281	\|\| TREE_CODE (aggr_type) == VECTOR_TYPE);
5282
5283	if (loop_vinfo)
5284	{
5285	loop = LOOP_VINFO_LOOP (loop_vinfo);
5286	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
5287	containing_loop = (gimple_bb (g: stmt_info->stmt))->loop_father;
5288	pe = loop_preheader_edge (loop);
5289	}
5290	else
5291	{
5292	gcc_assert (bb_vinfo);
5293	only_init = true;
5294	*ptr_incr = NULL;
5295	}
5296
5297	/ Create an expression for the first address accessed by this load*
5298	in LOOP. /*
5299	base_name = get_name (DR_BASE_ADDRESS (dr));
5300
5301	if (dump_enabled_p ())
5302	{
5303	tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
5304	dump_printf_loc (MSG_NOTE, vect_location,
5305	"create %s-pointer variable to type: %T",
5306	get_tree_code_name (TREE_CODE (aggr_type)),
5307	aggr_type);
5308	if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
5309	dump_printf (MSG_NOTE, " vectorizing an array ref: ");
5310	else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
5311	dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
5312	else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
5313	dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
5314	else
5315	dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
5316	dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
5317	}
5318
5319	/ (1) Create the new aggregate-pointer variable.*
5320	Vector and array types inherit the alias set of their component
5321	type by default so we need to use a ref-all pointer if the data
5322	reference does not conflict with the created aggregated data
5323	reference because it is not addressable. /*
5324	bool need_ref_all = false;
5325	if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5326	get_alias_set (DR_REF (dr))))
5327	need_ref_all = true;
5328	/ Likewise for any of the data references in the stmt group. /
5329	else if (DR_GROUP_SIZE (stmt_info) > `1`)
5330	{
5331	stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
5332	do
5333	{
5334	struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
5335	if (!alias_sets_conflict_p (get_alias_set (aggr_type),
5336	get_alias_set (DR_REF (sdr))))
5337	{
5338	need_ref_all = true;
5339	break;
5340	}
5341	sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
5342	}
5343	while (sinfo);
5344	}
5345	aggr_ptr_type = build_pointer_type_for_mode (aggr_type, VOIDmode,
5346	need_ref_all);
5347	aggr_ptr = vect_get_new_vect_var (type: aggr_ptr_type, var_kind: vect_pointer_var, name: base_name);
5348
5349
5350	/ Note: If the dataref is in an inner-loop nested in LOOP, and we are*
5351	vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
5352	def-use update cycles for the pointer: one relative to the outer-loop
5353	(LOOP), which is what steps (3) and (4) below do. The other is relative
5354	to the inner-loop (which is the inner-most loop containing the dataref),
5355	and this is done be step (5) below.
5356
5357	When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
5358	inner-most loop, and so steps (3),(4) work the same, and step (5) is
5359	redundant. Steps (3),(4) create the following:
5360
5361	vp0 = &base_addr;
5362	LOOP: vp1 = phi(vp0,vp2)
5363	...
5364	...
5365	vp2 = vp1 + step
5366	goto LOOP
5367
5368	If there is an inner-loop nested in loop, then step (5) will also be
5369	applied, and an additional update in the inner-loop will be created:
5370
5371	vp0 = &base_addr;
5372	LOOP: vp1 = phi(vp0,vp2)
5373	...
5374	inner: vp3 = phi(vp1,vp4)
5375	vp4 = vp3 + inner_step
5376	if () goto inner
5377	...
5378	vp2 = vp1 + step
5379	if () goto LOOP /*
5380
5381	/ (2) Calculate the initial address of the aggregate-pointer, and set*
5382	the aggregate-pointer to point to it before the loop. /*
5383
5384	/ Create: (&(base[init_val]+offset) in the loop preheader. /
5385
5386	new_temp = vect_create_addr_base_for_vector_ref (vinfo,
5387	stmt_info, new_stmt_list: &new_stmt_list,
5388	offset);
5389	if (new_stmt_list)
5390	{
5391	if (pe)
5392	{
5393	new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
5394	gcc_assert (!new_bb);
5395	}
5396	else
5397	gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
5398	}
5399
5400	*initial_address = new_temp;
5401	aggr_ptr_init = new_temp;
5402
5403	/ (3) Handle the updating of the aggregate-pointer inside the loop.*
5404	This is needed when ONLY_INIT is false, and also when AT_LOOP is the
5405	inner-loop nested in LOOP (during outer-loop vectorization). /*
5406
5407	/ No update in loop is required. /
5408	if (only_init && (!loop_vinfo \|\| at_loop == loop))
5409	aptr = aggr_ptr_init;
5410	else
5411	{
5412	/ Accesses to invariant addresses should be handled specially*
5413	by the caller. /*
5414	tree step = vect_dr_behavior (vinfo, dr_info)->step;
5415	gcc_assert (!integer_zerop (step));
5416
5417	if (iv_step == NULL_TREE)
5418	{
5419	/ The step of the aggregate pointer is the type size,*
5420	negated for downward accesses. /*
5421	iv_step = TYPE_SIZE_UNIT (aggr_type);
5422	if (tree_int_cst_sgn (step) == -`1`)
5423	iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
5424	}
5425
5426	standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5427
5428	create_iv (aggr_ptr_init, PLUS_EXPR,
5429	fold_convert (aggr_ptr_type, iv_step),
5430	aggr_ptr, loop, &incr_gsi, insert_after,
5431	&indx_before_incr, &indx_after_incr);
5432	incr = gsi_stmt (i: incr_gsi);
5433
5434	/ Copy the points-to information if it exists. /
5435	if (DR_PTR_INFO (dr))
5436	{
5437	vect_duplicate_ssa_name_ptr_info (name: indx_before_incr, dr_info);
5438	vect_duplicate_ssa_name_ptr_info (name: indx_after_incr, dr_info);
5439	}
5440	if (ptr_incr)
5441	*ptr_incr = incr;
5442
5443	aptr = indx_before_incr;
5444	}
5445
5446	if (!nested_in_vect_loop \|\| only_init)
5447	return aptr;
5448
5449
5450	/ (4) Handle the updating of the aggregate-pointer inside the inner-loop*
5451	nested in LOOP, if exists. /*
5452
5453	gcc_assert (nested_in_vect_loop);
5454	if (!only_init)
5455	{
5456	standard_iv_increment_position (containing_loop, &incr_gsi,
5457	&insert_after);
5458	create_iv (aptr, PLUS_EXPR, fold_convert (aggr_ptr_type, DR_STEP (dr)),
5459	aggr_ptr, containing_loop, &incr_gsi, insert_after,
5460	&indx_before_incr, &indx_after_incr);
5461	incr = gsi_stmt (i: incr_gsi);
5462
5463	/ Copy the points-to information if it exists. /
5464	if (DR_PTR_INFO (dr))
5465	{
5466	vect_duplicate_ssa_name_ptr_info (name: indx_before_incr, dr_info);
5467	vect_duplicate_ssa_name_ptr_info (name: indx_after_incr, dr_info);
5468	}
5469	if (ptr_incr)
5470	*ptr_incr = incr;
5471
5472	return indx_before_incr;
5473	}
5474	else
5475	gcc_unreachable ();
5476	}
5477
5478
5479	/ Function bump_vector_ptr*
5480
5481	Increment a pointer (to a vector type) by vector-size. If requested,
5482	i.e. if PTR-INCR is given, then also connect the new increment stmt
5483	to the existing def-use update-chain of the pointer, by modifying
5484	the PTR_INCR as illustrated below:
5485
5486	The pointer def-use update-chain before this function:
5487	DATAREF_PTR = phi (p_0, p_2)
5488	....
5489	PTR_INCR: p_2 = DATAREF_PTR + step
5490
5491	The pointer def-use update-chain after this function:
5492	DATAREF_PTR = phi (p_0, p_2)
5493	....
5494	NEW_DATAREF_PTR = DATAREF_PTR + BUMP
5495	....
5496	PTR_INCR: p_2 = NEW_DATAREF_PTR + step
5497
5498	Input:
5499	DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
5500	in the loop.
5501	PTR_INCR - optional. The stmt that updates the pointer in each iteration of
5502	the loop. The increment amount across iterations is expected
5503	to be vector_size.
5504	BSI - location where the new update stmt is to be placed.
5505	STMT_INFO - the original scalar memory-access stmt that is being vectorized.
5506	BUMP - optional. The offset by which to bump the pointer. If not given,
5507	the offset is assumed to be vector_size.
5508
5509	Output: Return NEW_DATAREF_PTR as illustrated above.
5510
5511	*/
5512
5513	tree
5514	bump_vector_ptr (vec_info *vinfo,
5515	tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator gsi,
5516	stmt_vec_info stmt_info, tree bump)
5517	{
5518	struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
5519	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5520	tree update = TYPE_SIZE_UNIT (vectype);
5521	gimple *incr_stmt;
5522	ssa_op_iter iter;
5523	use_operand_p use_p;
5524	tree new_dataref_ptr;
5525
5526	if (bump)
5527	update = bump;
5528
5529	if (TREE_CODE (dataref_ptr) == SSA_NAME)
5530	new_dataref_ptr = copy_ssa_name (var: dataref_ptr);
5531	else if (is_gimple_min_invariant (dataref_ptr))
5532	/ When possible avoid emitting a separate increment stmt that will*
5533	force the addressed object addressable. /*
5534	return build1 (ADDR_EXPR, TREE_TYPE (dataref_ptr),
5535	fold_build2 (MEM_REF,
5536	TREE_TYPE (TREE_TYPE (dataref_ptr)),
5537	dataref_ptr,
5538	fold_convert (ptr_type_node, update)));
5539	else
5540	new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
5541	incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
5542	dataref_ptr, update);
5543	vect_finish_stmt_generation (vinfo, stmt_info, incr_stmt, gsi);
5544	/ Fold the increment, avoiding excessive chains use-def chains of*
5545	those, leading to compile-time issues for passes until the next
5546	forwprop pass which would do this as well. /*
5547	gimple_stmt_iterator fold_gsi = gsi_for_stmt (incr_stmt);
5548	if (fold_stmt (&fold_gsi, follow_all_ssa_edges))
5549	{
5550	incr_stmt = gsi_stmt (i: fold_gsi);
5551	update_stmt (s: incr_stmt);
5552	}
5553
5554	/ Copy the points-to information if it exists. /
5555	if (DR_PTR_INFO (dr))
5556	{
5557	duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
5558	mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
5559	}
5560
5561	if (!ptr_incr)
5562	return new_dataref_ptr;
5563
5564	/ Update the vector-pointer's cross-iteration increment. /
5565	FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
5566	{
5567	tree use = USE_FROM_PTR (use_p);
5568
5569	if (use == dataref_ptr)
5570	SET_USE (use_p, new_dataref_ptr);
5571	else
5572	gcc_assert (operand_equal_p (use, update, `0`));
5573	}
5574
5575	return new_dataref_ptr;
5576	}
5577
5578
5579	/ Copy memory reference info such as base/clique from the SRC reference*
5580	to the DEST MEM_REF. /*
5581
5582	void
5583	vect_copy_ref_info (tree dest, tree src)
5584	{
5585	if (TREE_CODE (dest) != MEM_REF)
5586	return;
5587
5588	tree src_base = src;
5589	while (handled_component_p (t: src_base))
5590	src_base = TREE_OPERAND (src_base, `0`);
5591	if (TREE_CODE (src_base) != MEM_REF
5592	&& TREE_CODE (src_base) != TARGET_MEM_REF)
5593	return;
5594
5595	MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
5596	MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
5597	}
5598
5599
5600	/ Function vect_create_destination_var.*
5601
5602	Create a new temporary of type VECTYPE. /*
5603
5604	tree
5605	vect_create_destination_var (tree scalar_dest, tree vectype)
5606	{
5607	tree vec_dest;
5608	const char *name;
5609	char *new_name;
5610	tree type;
5611	enum vect_var_kind kind;
5612
5613	kind = vectype
5614	? VECTOR_BOOLEAN_TYPE_P (vectype)
5615	? vect_mask_var
5616	: vect_simple_var
5617	: vect_scalar_var;
5618	type = vectype ? vectype : TREE_TYPE (scalar_dest);
5619
5620	gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
5621
5622	name = get_name (scalar_dest);
5623	if (name)
5624	new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
5625	else
5626	new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
5627	vec_dest = vect_get_new_vect_var (type, var_kind: kind, name: new_name);
5628	free (ptr: new_name);
5629
5630	return vec_dest;
5631	}
5632
5633	/ Function vect_grouped_store_supported.*
5634
5635	Returns TRUE if interleave high and interleave low permutations
5636	are supported, and FALSE otherwise. /*
5637
5638	bool
5639	vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
5640	{
5641	machine_mode mode = TYPE_MODE (vectype);
5642
5643	/ vect_permute_store_chain requires the group size to be equal to 3 or*
5644	be a power of two. /*
5645	if (count != `3` && exact_log2 (x: count) == -`1`)
5646	{
5647	if (dump_enabled_p ())
5648	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5649	"the size of the group of accesses"
5650	" is not a power of 2 or not eqaul to 3\n");
5651	return false;
5652	}
5653
5654	/ Check that the permutation is supported. /
5655	if (VECTOR_MODE_P (mode))
5656	{
5657	unsigned int i;
5658	if (count == `3`)
5659	{
5660	unsigned int j0 = `0`, j1 = `0`, j2 = `0`;
5661	unsigned int i, j;
5662
5663	unsigned int nelt;
5664	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
5665	{
5666	if (dump_enabled_p ())
5667	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5668	"cannot handle groups of 3 stores for"
5669	" variable-length vectors\n");
5670	return false;
5671	}
5672
5673	vec_perm_builder sel (nelt, nelt, `1`);
5674	sel.quick_grow (len: nelt);
5675	vec_perm_indices indices;
5676	for (j = `0`; j < `3`; j++)
5677	{
5678	int nelt0 = ((`3` - j) * nelt) % `3`;
5679	int nelt1 = ((`3` - j) * nelt + `1`) % `3`;
5680	int nelt2 = ((`3` - j) * nelt + `2`) % `3`;
5681	for (i = `0`; i < nelt; i++)
5682	{
5683	if (`3` * i + nelt0 < nelt)
5684	sel [`3` * i + nelt0] = j0++;
5685	if (`3` * i + nelt1 < nelt)
5686	sel [`3` * i + nelt1] = nelt + j1++;
5687	if (`3` * i + nelt2 < nelt)
5688	sel [`3` * i + nelt2] = `0`;
5689	}
5690	indices.new_vector (sel, `2`, nelt);
5691	if (!can_vec_perm_const_p (mode, mode, indices))
5692	{
5693	if (dump_enabled_p ())
5694	dump_printf (MSG_MISSED_OPTIMIZATION,
5695	"permutation op not supported by target.\n");
5696	return false;
5697	}
5698
5699	for (i = `0`; i < nelt; i++)
5700	{
5701	if (`3` * i + nelt0 < nelt)
5702	sel [`3` * i + nelt0] = `3` * i + nelt0;
5703	if (`3` * i + nelt1 < nelt)
5704	sel [`3` * i + nelt1] = `3` * i + nelt1;
5705	if (`3` * i + nelt2 < nelt)
5706	sel [`3` * i + nelt2] = nelt + j2++;
5707	}
5708	indices.new_vector (sel, `2`, nelt);
5709	if (!can_vec_perm_const_p (mode, mode, indices))
5710	{
5711	if (dump_enabled_p ())
5712	dump_printf (MSG_MISSED_OPTIMIZATION,
5713	"permutation op not supported by target.\n");
5714	return false;
5715	}
5716	}
5717	return true;
5718	}
5719	else
5720	{
5721	/ If length is not equal to 3 then only power of 2 is supported. /
5722	gcc_assert (pow2p_hwi (count));
5723	poly_uint64 nelt = GET_MODE_NUNITS (mode);
5724
5725	/ The encoding has 2 interleaved stepped patterns. /
5726	if(!multiple_p (a: nelt, b: `2`))
5727	return false;
5728	vec_perm_builder sel (nelt, `2`, `3`);
5729	sel.quick_grow (len: `6`);
5730	for (i = `0`; i < `3`; i++)
5731	{
5732	sel [i * `2`] = i;
5733	sel [i * `2` + `1`] = i + nelt;
5734	}
5735	vec_perm_indices indices (sel, `2`, nelt);
5736	if (can_vec_perm_const_p (mode, mode, indices))
5737	{
5738	for (i = `0`; i < `6`; i++)
5739	sel [i] += exact_div (a: nelt, b: `2`);
5740	indices.new_vector (sel, `2`, nelt);
5741	if (can_vec_perm_const_p (mode, mode, indices))
5742	return true;
5743	}
5744	}
5745	}
5746
5747	if (dump_enabled_p ())
5748	dump_printf (MSG_MISSED_OPTIMIZATION,
5749	"permutation op not supported by target.\n");
5750	return false;
5751	}
5752
5753	/ Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors*
5754	of type VECTYPE. MASKED_P says whether the masked form is needed. /*
5755
5756	internal_fn
5757	vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5758	bool masked_p)
5759	{
5760	if (vect_lanes_optab_supported_p (name: "vec_mask_len_store_lanes",
5761	optab: vec_mask_len_store_lanes_optab, vectype,
5762	count))
5763	return IFN_MASK_LEN_STORE_LANES;
5764	else if (masked_p)
5765	{
5766	if (vect_lanes_optab_supported_p (name: "vec_mask_store_lanes",
5767	optab: vec_mask_store_lanes_optab, vectype,
5768	count))
5769	return IFN_MASK_STORE_LANES;
5770	}
5771	else
5772	{
5773	if (vect_lanes_optab_supported_p (name: "vec_store_lanes",
5774	optab: vec_store_lanes_optab, vectype, count))
5775	return IFN_STORE_LANES;
5776	}
5777	return IFN_LAST;
5778	}
5779
5780
5781	/ Function vect_permute_store_chain.*
5782
5783	Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
5784	a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5785	the data correctly for the stores. Return the final references for stores
5786	in RESULT_CHAIN.
5787
5788	E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5789	The input is 4 vectors each containing 8 elements. We assign a number to
5790	each element, the input sequence is:
5791
5792	1st vec: 0 1 2 3 4 5 6 7
5793	2nd vec: 8 9 10 11 12 13 14 15
5794	3rd vec: 16 17 18 19 20 21 22 23
5795	4th vec: 24 25 26 27 28 29 30 31
5796
5797	The output sequence should be:
5798
5799	1st vec: 0 8 16 24 1 9 17 25
5800	2nd vec: 2 10 18 26 3 11 19 27
5801	3rd vec: 4 12 20 28 5 13 21 30
5802	4th vec: 6 14 22 30 7 15 23 31
5803
5804	i.e., we interleave the contents of the four vectors in their order.
5805
5806	We use interleave_high/low instructions to create such output. The input of
5807	each interleave_high/low operation is two vectors:
5808	1st vec 2nd vec
5809	0 1 2 3 4 5 6 7
5810	the even elements of the result vector are obtained left-to-right from the
5811	high/low elements of the first vector. The odd elements of the result are
5812	obtained left-to-right from the high/low elements of the second vector.
5813	The output of interleave_high will be: 0 4 1 5
5814	and of interleave_low: 2 6 3 7
5815
5816
5817	The permutation is done in log LENGTH stages. In each stage interleave_high
5818	and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5819	where the first argument is taken from the first half of DR_CHAIN and the
5820	second argument from it's second half.
5821	In our example,
5822
5823	I1: interleave_high (1st vec, 3rd vec)
5824	I2: interleave_low (1st vec, 3rd vec)
5825	I3: interleave_high (2nd vec, 4th vec)
5826	I4: interleave_low (2nd vec, 4th vec)
5827
5828	The output for the first stage is:
5829
5830	I1: 0 16 1 17 2 18 3 19
5831	I2: 4 20 5 21 6 22 7 23
5832	I3: 8 24 9 25 10 26 11 27
5833	I4: 12 28 13 29 14 30 15 31
5834
5835	The output of the second stage, i.e. the final result is:
5836
5837	I1: 0 8 16 24 1 9 17 25
5838	I2: 2 10 18 26 3 11 19 27
5839	I3: 4 12 20 28 5 13 21 30
5840	I4: 6 14 22 30 7 15 23 31. /*
5841
5842	void
5843	vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
5844	unsigned int length,
5845	stmt_vec_info stmt_info,
5846	gimple_stmt_iterator *gsi,
5847	vec<tree> *result_chain)
5848	{
5849	tree vect1, vect2, high, low;
5850	gimple *perm_stmt;
5851	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5852	tree perm_mask_low, perm_mask_high;
5853	tree data_ref;
5854	tree perm3_mask_low, perm3_mask_high;
5855	unsigned int i, j, n, log_length = exact_log2 (x: length);
5856
5857	result_chain->quick_grow (len: length);
5858	memcpy (dest: result_chain->address (), src: dr_chain.address (),
5859	n: length * sizeof (tree));
5860
5861	if (length == `3`)
5862	{
5863	/ vect_grouped_store_supported ensures that this is constant. /
5864	unsigned int nelt = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
5865	unsigned int j0 = `0`, j1 = `0`, j2 = `0`;
5866
5867	vec_perm_builder sel (nelt, nelt, `1`);
5868	sel.quick_grow (len: nelt);
5869	vec_perm_indices indices;
5870	for (j = `0`; j < `3`; j++)
5871	{
5872	int nelt0 = ((`3` - j) * nelt) % `3`;
5873	int nelt1 = ((`3` - j) * nelt + `1`) % `3`;
5874	int nelt2 = ((`3` - j) * nelt + `2`) % `3`;
5875
5876	for (i = `0`; i < nelt; i++)
5877	{
5878	if (`3` * i + nelt0 < nelt)
5879	sel [`3` * i + nelt0] = j0++;
5880	if (`3` * i + nelt1 < nelt)
5881	sel [`3` * i + nelt1] = nelt + j1++;
5882	if (`3` * i + nelt2 < nelt)
5883	sel [`3` * i + nelt2] = `0`;
5884	}
5885	indices.new_vector (sel, `2`, nelt);
5886	perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5887
5888	for (i = `0`; i < nelt; i++)
5889	{
5890	if (`3` * i + nelt0 < nelt)
5891	sel [`3` * i + nelt0] = `3` * i + nelt0;
5892	if (`3` * i + nelt1 < nelt)
5893	sel [`3` * i + nelt1] = `3` * i + nelt1;
5894	if (`3` * i + nelt2 < nelt)
5895	sel [`3` * i + nelt2] = nelt + j2++;
5896	}
5897	indices.new_vector (sel, `2`, nelt);
5898	perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5899
5900	vect1 = dr_chain [`0`];
5901	vect2 = dr_chain [`1`];
5902
5903	/ Create interleaving stmt:*
5904	low = VEC_PERM_EXPR <vect1, vect2,
5905	{j, nelt, , j + 1, nelt + j + 1, ,
5906	j + 2, nelt + j + 2, , ...}> /
5907	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_low");
5908	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5909	vect2, perm3_mask_low);
5910	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5911
5912	vect1 = data_ref;
5913	vect2 = dr_chain [`2`];
5914	/ Create interleaving stmt:*
5915	low = VEC_PERM_EXPR <vect1, vect2,
5916	{0, 1, nelt + j, 3, 4, nelt + j + 1,
5917	6, 7, nelt + j + 2, ...}> /*
5918	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_high");
5919	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5920	vect2, perm3_mask_high);
5921	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5922	(*result_chain)[j] = data_ref;
5923	}
5924	}
5925	else
5926	{
5927	/ If length is not equal to 3 then only power of 2 is supported. /
5928	gcc_assert (pow2p_hwi (length));
5929
5930	/ The encoding has 2 interleaved stepped patterns. /
5931	poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (node: vectype);
5932	vec_perm_builder sel (nelt, `2`, `3`);
5933	sel.quick_grow (len: `6`);
5934	for (i = `0`; i < `3`; i++)
5935	{
5936	sel [i * `2`] = i;
5937	sel [i * `2` + `1`] = i + nelt;
5938	}
5939	vec_perm_indices indices (sel, `2`, nelt);
5940	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
5941
5942	for (i = `0`; i < `6`; i++)
5943	sel [i] += exact_div (a: nelt, b: `2`);
5944	indices.new_vector (sel, `2`, nelt);
5945	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
5946
5947	for (i = `0`, n = log_length; i < n; i++)
5948	{
5949	for (j = `0`; j < length/`2`; j++)
5950	{
5951	vect1 = dr_chain [j];
5952	vect2 = dr_chain [j+length/`2`];
5953
5954	/ Create interleaving stmt:*
5955	high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5956	...}> /*
5957	high = make_temp_ssa_name (type: vectype, NULL, name: "vect_inter_high");
5958	perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5959	vect2, perm_mask_high);
5960	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5961	(result_chain)[`2`j] = high;
5962
5963	/ Create interleaving stmt:*
5964	low = VEC_PERM_EXPR <vect1, vect2,
5965	{nelt/2, nelt3/2, nelt/2+1, nelt3/2+1,
5966	...}> /*
5967	low = make_temp_ssa_name (type: vectype, NULL, name: "vect_inter_low");
5968	perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5969	vect2, perm_mask_low);
5970	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
5971	(result_chain)[`2`j+`1`] = low;
5972	}
5973	memcpy (dest: dr_chain.address (), src: result_chain->address (),
5974	n: length * sizeof (tree));
5975	}
5976	}
5977	}
5978
5979	/ Function vect_setup_realignment*
5980
5981	This function is called when vectorizing an unaligned load using
5982	the dr_explicit_realign[_optimized] scheme.
5983	This function generates the following code at the loop prolog:
5984
5985	p = initial_addr;
5986	x msq_init = (floor(p)); # prolog load*
5987	realignment_token = call target_builtin;
5988	loop:
5989	x msq = phi (msq_init, ---)
5990
5991	The stmts marked with x are generated only for the case of
5992	dr_explicit_realign_optimized.
5993
5994	The code above sets up a new (vector) pointer, pointing to the first
5995	location accessed by STMT_INFO, and a "floor-aligned" load using that
5996	pointer. It also generates code to compute the "realignment-token"
5997	(if the relevant target hook was defined), and creates a phi-node at the
5998	loop-header bb whose arguments are the result of the prolog-load (created
5999	by this function) and the result of a load that takes place in the loop
6000	(to be created by the caller to this function).
6001
6002	For the case of dr_explicit_realign_optimized:
6003	The caller to this function uses the phi-result (msq) to create the
6004	realignment code inside the loop, and sets up the missing phi argument,
6005	as follows:
6006	loop:
6007	msq = phi (msq_init, lsq)
6008	lsq = (floor(p')); # load in loop*
6009	result = realign_load (msq, lsq, realignment_token);
6010
6011	For the case of dr_explicit_realign:
6012	loop:
6013	msq = (floor(p)); # load in loop*
6014	p' = p + (VS-1);
6015	lsq = (floor(p')); # load in loop*
6016	result = realign_load (msq, lsq, realignment_token);
6017
6018	Input:
6019	STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
6020	a memory location that may be unaligned.
6021	BSI - place where new code is to be inserted.
6022	ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
6023	is used.
6024
6025	Output:
6026	REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
6027	target hook, if defined.
6028	Return value - the result of the loop-header phi node. /*
6029
6030	tree
6031	vect_setup_realignment (vec_info *vinfo, stmt_vec_info stmt_info,
6032	gimple_stmt_iterator gsi, tree realignment_token,
6033	enum dr_alignment_support alignment_support_scheme,
6034	tree init_addr,
6035	class loop **at_loop)
6036	{
6037	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6038	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6039	dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6040	struct data_reference *dr = dr_info->dr;
6041	class loop *loop = NULL;
6042	edge pe = NULL;
6043	tree scalar_dest = gimple_assign_lhs (gs: stmt_info->stmt);
6044	tree vec_dest;
6045	gimple *inc;
6046	tree ptr;
6047	tree data_ref;
6048	basic_block new_bb;
6049	tree msq_init = NULL_TREE;
6050	tree new_temp;
6051	gphi *phi_stmt;
6052	tree msq = NULL_TREE;
6053	gimple_seq stmts = NULL;
6054	bool compute_in_loop = false;
6055	bool nested_in_vect_loop = false;
6056	class loop *containing_loop = (gimple_bb (g: stmt_info->stmt))->loop_father;
6057	class loop *loop_for_initial_load = NULL;
6058
6059	if (loop_vinfo)
6060	{
6061	loop = LOOP_VINFO_LOOP (loop_vinfo);
6062	nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
6063	}
6064
6065	gcc_assert (alignment_support_scheme == dr_explicit_realign
6066	\|\| alignment_support_scheme == dr_explicit_realign_optimized);
6067
6068	/ We need to generate three things:*
6069	1. the misalignment computation
6070	2. the extra vector load (for the optimized realignment scheme).
6071	3. the phi node for the two vectors from which the realignment is
6072	done (for the optimized realignment scheme). /*
6073
6074	/ 1. Determine where to generate the misalignment computation.*
6075
6076	If INIT_ADDR is NULL_TREE, this indicates that the misalignment
6077	calculation will be generated by this function, outside the loop (in the
6078	preheader). Otherwise, INIT_ADDR had already been computed for us by the
6079	caller, inside the loop.
6080
6081	Background: If the misalignment remains fixed throughout the iterations of
6082	the loop, then both realignment schemes are applicable, and also the
6083	misalignment computation can be done outside LOOP. This is because we are
6084	vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
6085	are a multiple of VS (the Vector Size), and therefore the misalignment in
6086	different vectorized LOOP iterations is always the same.
6087	The problem arises only if the memory access is in an inner-loop nested
6088	inside LOOP, which is now being vectorized using outer-loop vectorization.
6089	This is the only case when the misalignment of the memory access may not
6090	remain fixed throughout the iterations of the inner-loop (as explained in
6091	detail in vect_supportable_dr_alignment). In this case, not only is the
6092	optimized realignment scheme not applicable, but also the misalignment
6093	computation (and generation of the realignment token that is passed to
6094	REALIGN_LOAD) have to be done inside the loop.
6095
6096	In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
6097	or not, which in turn determines if the misalignment is computed inside
6098	the inner-loop, or outside LOOP. /*
6099
6100	if (init_addr != NULL_TREE \|\| !loop_vinfo)
6101	{
6102	compute_in_loop = true;
6103	gcc_assert (alignment_support_scheme == dr_explicit_realign);
6104	}
6105
6106
6107	/ 2. Determine where to generate the extra vector load.*
6108
6109	For the optimized realignment scheme, instead of generating two vector
6110	loads in each iteration, we generate a single extra vector load in the
6111	preheader of the loop, and in each iteration reuse the result of the
6112	vector load from the previous iteration. In case the memory access is in
6113	an inner-loop nested inside LOOP, which is now being vectorized using
6114	outer-loop vectorization, we need to determine whether this initial vector
6115	load should be generated at the preheader of the inner-loop, or can be
6116	generated at the preheader of LOOP. If the memory access has no evolution
6117	in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
6118	to be generated inside LOOP (in the preheader of the inner-loop). /*
6119
6120	if (nested_in_vect_loop)
6121	{
6122	tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
6123	bool invariant_in_outerloop =
6124	(tree_int_cst_compare (t1: outerloop_step, size_zero_node) == `0`);
6125	loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
6126	}
6127	else
6128	loop_for_initial_load = loop;
6129	if (at_loop)
6130	*at_loop = loop_for_initial_load;
6131
6132	tree vuse = NULL_TREE;
6133	if (loop_for_initial_load)
6134	{
6135	pe = loop_preheader_edge (loop_for_initial_load);
6136	if (gphi *vphi = get_virtual_phi (loop_for_initial_load->header))
6137	vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
6138	}
6139	if (!vuse)
6140	vuse = gimple_vuse (g: gsi_stmt (i: *gsi));
6141
6142	/ 3. For the case of the optimized realignment, create the first vector*
6143	load at the loop preheader. /*
6144
6145	if (alignment_support_scheme == dr_explicit_realign_optimized)
6146	{
6147	/ Create msq_init = (floor(p1)) in the loop preheader /*
6148	gassign *new_stmt;
6149
6150	gcc_assert (!compute_in_loop);
6151	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6152	ptr = vect_create_data_ref_ptr (vinfo, stmt_info, aggr_type: vectype,
6153	at_loop: loop_for_initial_load, NULL_TREE,
6154	initial_address: &init_addr, NULL, ptr_incr: &inc, only_init: true);
6155	if (TREE_CODE (ptr) == SSA_NAME)
6156	new_temp = copy_ssa_name (var: ptr);
6157	else
6158	new_temp = make_ssa_name (TREE_TYPE (ptr));
6159	poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
6160	tree type = TREE_TYPE (ptr);
6161	new_stmt = gimple_build_assign
6162	(new_temp, BIT_AND_EXPR, ptr,
6163	fold_build2 (MINUS_EXPR, type,
6164	build_int_cst (type, `0`),
6165	build_int_cst (type, align)));
6166	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6167	gcc_assert (!new_bb);
6168	data_ref
6169	= build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
6170	build_int_cst (reference_alias_ptr_type (DR_REF (dr)), `0`));
6171	vect_copy_ref_info (dest: data_ref, DR_REF (dr));
6172	new_stmt = gimple_build_assign (vec_dest, data_ref);
6173	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6174	gimple_assign_set_lhs (gs: new_stmt, lhs: new_temp);
6175	gimple_set_vuse (g: new_stmt, vuse);
6176	if (pe)
6177	{
6178	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6179	gcc_assert (!new_bb);
6180	}
6181	else
6182	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6183
6184	msq_init = gimple_assign_lhs (gs: new_stmt);
6185	}
6186
6187	/ 4. Create realignment token using a target builtin, if available.*
6188	It is done either inside the containing loop, or before LOOP (as
6189	determined above). /*
6190
6191	if (targetm.vectorize.builtin_mask_for_load)
6192	{
6193	gcall *new_stmt;
6194	tree builtin_decl;
6195
6196	/ Compute INIT_ADDR - the initial addressed accessed by this memref. /
6197	if (!init_addr)
6198	{
6199	/ Generate the INIT_ADDR computation outside LOOP. /
6200	init_addr = vect_create_addr_base_for_vector_ref (vinfo,
6201	stmt_info, new_stmt_list: &stmts,
6202	NULL_TREE);
6203	if (loop)
6204	{
6205	pe = loop_preheader_edge (loop);
6206	new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6207	gcc_assert (!new_bb);
6208	}
6209	else
6210	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
6211	}
6212
6213	builtin_decl = targetm.vectorize.builtin_mask_for_load ();
6214	new_stmt = gimple_build_call (builtin_decl, `1`, init_addr);
6215	vec_dest =
6216	vect_create_destination_var (scalar_dest,
6217	vectype: gimple_call_return_type (gs: new_stmt));
6218	new_temp = make_ssa_name (var: vec_dest, stmt: new_stmt);
6219	gimple_call_set_lhs (gs: new_stmt, lhs: new_temp);
6220
6221	if (compute_in_loop)
6222	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
6223	else
6224	{
6225	/ Generate the misalignment computation outside LOOP. /
6226	pe = loop_preheader_edge (loop);
6227	new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
6228	gcc_assert (!new_bb);
6229	}
6230
6231	*realignment_token = gimple_call_lhs (gs: new_stmt);
6232
6233	/ The result of the CALL_EXPR to this builtin is determined from*
6234	the value of the parameter and no global variables are touched
6235	which makes the builtin a "const" function. Requiring the
6236	builtin to have the "const" attribute makes it unnecessary
6237	to call mark_call_clobbered. /*
6238	gcc_assert (TREE_READONLY (builtin_decl));
6239	}
6240
6241	if (alignment_support_scheme == dr_explicit_realign)
6242	return msq;
6243
6244	gcc_assert (!compute_in_loop);
6245	gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
6246
6247
6248	/ 5. Create msq = phi <msq_init, lsq> in loop /
6249
6250	pe = loop_preheader_edge (containing_loop);
6251	vec_dest = vect_create_destination_var (scalar_dest, vectype);
6252	msq = make_ssa_name (var: vec_dest);
6253	phi_stmt = create_phi_node (msq, containing_loop->header);
6254	add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
6255
6256	return msq;
6257	}
6258
6259
6260	/ Function vect_grouped_load_supported.*
6261
6262	COUNT is the size of the load group (the number of statements plus the
6263	number of gaps). SINGLE_ELEMENT_P is true if there is actually
6264	only one statement, with a gap of COUNT - 1.
6265
6266	Returns true if a suitable permute exists. /*
6267
6268	bool
6269	vect_grouped_load_supported (tree vectype, bool single_element_p,
6270	unsigned HOST_WIDE_INT count)
6271	{
6272	machine_mode mode = TYPE_MODE (vectype);
6273
6274	/ If this is single-element interleaving with an element distance*
6275	that leaves unused vector loads around punt - we at least create
6276	very sub-optimal code in that case (and blow up memory,
6277	see PR65518). /*
6278	if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
6279	{
6280	if (dump_enabled_p ())
6281	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6282	"single-element interleaving not supported "
6283	"for not adjacent vector loads\n");
6284	return false;
6285	}
6286
6287	/ vect_permute_load_chain requires the group size to be equal to 3 or*
6288	be a power of two. /*
6289	if (count != `3` && exact_log2 (x: count) == -`1`)
6290	{
6291	if (dump_enabled_p ())
6292	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293	"the size of the group of accesses"
6294	" is not a power of 2 or not equal to 3\n");
6295	return false;
6296	}
6297
6298	/ Check that the permutation is supported. /
6299	if (VECTOR_MODE_P (mode))
6300	{
6301	unsigned int i, j;
6302	if (count == `3`)
6303	{
6304	unsigned int nelt;
6305	if (!GET_MODE_NUNITS (mode).is_constant (const_value: &nelt))
6306	{
6307	if (dump_enabled_p ())
6308	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6309	"cannot handle groups of 3 loads for"
6310	" variable-length vectors\n");
6311	return false;
6312	}
6313
6314	vec_perm_builder sel (nelt, nelt, `1`);
6315	sel.quick_grow (len: nelt);
6316	vec_perm_indices indices;
6317	unsigned int k;
6318	for (k = `0`; k < `3`; k++)
6319	{
6320	for (i = `0`; i < nelt; i++)
6321	if (`3` * i + k < `2` * nelt)
6322	sel [i] = `3` * i + k;
6323	else
6324	sel [i] = `0`;
6325	indices.new_vector (sel, `2`, nelt);
6326	if (!can_vec_perm_const_p (mode, mode, indices))
6327	{
6328	if (dump_enabled_p ())
6329	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6330	"shuffle of 3 loads is not supported by"
6331	" target\n");
6332	return false;
6333	}
6334	for (i = `0`, j = `0`; i < nelt; i++)
6335	if (`3` * i + k < `2` * nelt)
6336	sel [i] = i;
6337	else
6338	sel [i] = nelt + ((nelt + k) % `3`) + `3` * (j++);
6339	indices.new_vector (sel, `2`, nelt);
6340	if (!can_vec_perm_const_p (mode, mode, indices))
6341	{
6342	if (dump_enabled_p ())
6343	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6344	"shuffle of 3 loads is not supported by"
6345	" target\n");
6346	return false;
6347	}
6348	}
6349	return true;
6350	}
6351	else
6352	{
6353	/ If length is not equal to 3 then only power of 2 is supported. /
6354	gcc_assert (pow2p_hwi (count));
6355	poly_uint64 nelt = GET_MODE_NUNITS (mode);
6356
6357	/ The encoding has a single stepped pattern. /
6358	vec_perm_builder sel (nelt, `1`, `3`);
6359	sel.quick_grow (len: `3`);
6360	for (i = `0`; i < `3`; i++)
6361	sel [i] = i * `2`;
6362	vec_perm_indices indices (sel, `2`, nelt);
6363	if (can_vec_perm_const_p (mode, mode, indices))
6364	{
6365	for (i = `0`; i < `3`; i++)
6366	sel [i] = i * `2` + `1`;
6367	indices.new_vector (sel, `2`, nelt);
6368	if (can_vec_perm_const_p (mode, mode, indices))
6369	return true;
6370	}
6371	}
6372	}
6373
6374	if (dump_enabled_p ())
6375	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6376	"extract even/odd not supported by target\n");
6377	return false;
6378	}
6379
6380	/ Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT vectors*
6381	of type VECTYPE. MASKED_P says whether the masked form is needed. /*
6382
6383	internal_fn
6384	vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6385	bool masked_p)
6386	{
6387	if (vect_lanes_optab_supported_p (name: "vec_mask_len_load_lanes",
6388	optab: vec_mask_len_load_lanes_optab, vectype,
6389	count))
6390	return IFN_MASK_LEN_LOAD_LANES;
6391	else if (masked_p)
6392	{
6393	if (vect_lanes_optab_supported_p (name: "vec_mask_load_lanes",
6394	optab: vec_mask_load_lanes_optab, vectype,
6395	count))
6396	return IFN_MASK_LOAD_LANES;
6397	}
6398	else
6399	{
6400	if (vect_lanes_optab_supported_p (name: "vec_load_lanes", optab: vec_load_lanes_optab,
6401	vectype, count))
6402	return IFN_LOAD_LANES;
6403	}
6404	return IFN_LAST;
6405	}
6406
6407	/ Function vect_permute_load_chain.*
6408
6409	Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
6410	a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
6411	the input data correctly. Return the final references for loads in
6412	RESULT_CHAIN.
6413
6414	E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6415	The input is 4 vectors each containing 8 elements. We assign a number to each
6416	element, the input sequence is:
6417
6418	1st vec: 0 1 2 3 4 5 6 7
6419	2nd vec: 8 9 10 11 12 13 14 15
6420	3rd vec: 16 17 18 19 20 21 22 23
6421	4th vec: 24 25 26 27 28 29 30 31
6422
6423	The output sequence should be:
6424
6425	1st vec: 0 4 8 12 16 20 24 28
6426	2nd vec: 1 5 9 13 17 21 25 29
6427	3rd vec: 2 6 10 14 18 22 26 30
6428	4th vec: 3 7 11 15 19 23 27 31
6429
6430	i.e., the first output vector should contain the first elements of each
6431	interleaving group, etc.
6432
6433	We use extract_even/odd instructions to create such output. The input of
6434	each extract_even/odd operation is two vectors
6435	1st vec 2nd vec
6436	0 1 2 3 4 5 6 7
6437
6438	and the output is the vector of extracted even/odd elements. The output of
6439	extract_even will be: 0 2 4 6
6440	and of extract_odd: 1 3 5 7
6441
6442
6443	The permutation is done in log LENGTH stages. In each stage extract_even
6444	and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
6445	their order. In our example,
6446
6447	E1: extract_even (1st vec, 2nd vec)
6448	E2: extract_odd (1st vec, 2nd vec)
6449	E3: extract_even (3rd vec, 4th vec)
6450	E4: extract_odd (3rd vec, 4th vec)
6451
6452	The output for the first stage will be:
6453
6454	E1: 0 2 4 6 8 10 12 14
6455	E2: 1 3 5 7 9 11 13 15
6456	E3: 16 18 20 22 24 26 28 30
6457	E4: 17 19 21 23 25 27 29 31
6458
6459	In order to proceed and create the correct sequence for the next stage (or
6460	for the correct output, if the second stage is the last one, as in our
6461	example), we first put the output of extract_even operation and then the
6462	output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
6463	The input for the second stage is:
6464
6465	1st vec (E1): 0 2 4 6 8 10 12 14
6466	2nd vec (E3): 16 18 20 22 24 26 28 30
6467	3rd vec (E2): 1 3 5 7 9 11 13 15
6468	4th vec (E4): 17 19 21 23 25 27 29 31
6469
6470	The output of the second stage:
6471
6472	E1: 0 4 8 12 16 20 24 28
6473	E2: 2 6 10 14 18 22 26 30
6474	E3: 1 5 9 13 17 21 25 29
6475	E4: 3 7 11 15 19 23 27 31
6476
6477	And RESULT_CHAIN after reordering:
6478
6479	1st vec (E1): 0 4 8 12 16 20 24 28
6480	2nd vec (E3): 1 5 9 13 17 21 25 29
6481	3rd vec (E2): 2 6 10 14 18 22 26 30
6482	4th vec (E4): 3 7 11 15 19 23 27 31. /*
6483
6484	static void
6485	vect_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6486	unsigned int length,
6487	stmt_vec_info stmt_info,
6488	gimple_stmt_iterator *gsi,
6489	vec<tree> *result_chain)
6490	{
6491	tree data_ref, first_vect, second_vect;
6492	tree perm_mask_even, perm_mask_odd;
6493	tree perm3_mask_low, perm3_mask_high;
6494	gimple *perm_stmt;
6495	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6496	unsigned int i, j, log_length = exact_log2 (x: length);
6497
6498	result_chain->quick_grow (len: length);
6499	memcpy (dest: result_chain->address (), src: dr_chain.address (),
6500	n: length * sizeof (tree));
6501
6502	if (length == `3`)
6503	{
6504	/ vect_grouped_load_supported ensures that this is constant. /
6505	unsigned nelt = TYPE_VECTOR_SUBPARTS (node: vectype).to_constant ();
6506	unsigned int k;
6507
6508	vec_perm_builder sel (nelt, nelt, `1`);
6509	sel.quick_grow (len: nelt);
6510	vec_perm_indices indices;
6511	for (k = `0`; k < `3`; k++)
6512	{
6513	for (i = `0`; i < nelt; i++)
6514	if (`3` * i + k < `2` * nelt)
6515	sel [i] = `3` * i + k;
6516	else
6517	sel [i] = `0`;
6518	indices.new_vector (sel, `2`, nelt);
6519	perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6520
6521	for (i = `0`, j = `0`; i < nelt; i++)
6522	if (`3` * i + k < `2` * nelt)
6523	sel [i] = i;
6524	else
6525	sel [i] = nelt + ((nelt + k) % `3`) + `3` * (j++);
6526	indices.new_vector (sel, `2`, nelt);
6527	perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6528
6529	first_vect = dr_chain [`0`];
6530	second_vect = dr_chain [`1`];
6531
6532	/ Create interleaving stmt (low part of):*
6533	low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6534	...}> /*
6535	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_low");
6536	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6537	second_vect, perm3_mask_low);
6538	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6539
6540	/ Create interleaving stmt (high part of):*
6541	high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
6542	...}> /*
6543	first_vect = data_ref;
6544	second_vect = dr_chain [`2`];
6545	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3_high");
6546	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
6547	second_vect, perm3_mask_high);
6548	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6549	(*result_chain)[k] = data_ref;
6550	}
6551	}
6552	else
6553	{
6554	/ If length is not equal to 3 then only power of 2 is supported. /
6555	gcc_assert (pow2p_hwi (length));
6556
6557	/ The encoding has a single stepped pattern. /
6558	poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (node: vectype);
6559	vec_perm_builder sel (nelt, `1`, `3`);
6560	sel.quick_grow (len: `3`);
6561	for (i = `0`; i < `3`; ++i)
6562	sel [i] = i * `2`;
6563	vec_perm_indices indices (sel, `2`, nelt);
6564	perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
6565
6566	for (i = `0`; i < `3`; ++i)
6567	sel [i] = i * `2` + `1`;
6568	indices.new_vector (sel, `2`, nelt);
6569	perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
6570
6571	for (i = `0`; i < log_length; i++)
6572	{
6573	for (j = `0`; j < length; j += `2`)
6574	{
6575	first_vect = dr_chain [j];
6576	second_vect = dr_chain [j+`1`];
6577
6578	/ data_ref = permute_even (first_data_ref, second_data_ref); /
6579	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_perm_even");
6580	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6581	first_vect, second_vect,
6582	perm_mask_even);
6583	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6584	(*result_chain)[j/`2`] = data_ref;
6585
6586	/ data_ref = permute_odd (first_data_ref, second_data_ref); /
6587	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_perm_odd");
6588	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6589	first_vect, second_vect,
6590	perm_mask_odd);
6591	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6592	(*result_chain)[j/`2`+length/`2`] = data_ref;
6593	}
6594	memcpy (dest: dr_chain.address (), src: result_chain->address (),
6595	n: length * sizeof (tree));
6596	}
6597	}
6598	}
6599
6600	/ Function vect_shift_permute_load_chain.*
6601
6602	Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
6603	sequence of stmts to reorder the input data accordingly.
6604	Return the final references for loads in RESULT_CHAIN.
6605	Return true if successed, false otherwise.
6606
6607	E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
6608	The input is 3 vectors each containing 8 elements. We assign a
6609	number to each element, the input sequence is:
6610
6611	1st vec: 0 1 2 3 4 5 6 7
6612	2nd vec: 8 9 10 11 12 13 14 15
6613	3rd vec: 16 17 18 19 20 21 22 23
6614
6615	The output sequence should be:
6616
6617	1st vec: 0 3 6 9 12 15 18 21
6618	2nd vec: 1 4 7 10 13 16 19 22
6619	3rd vec: 2 5 8 11 14 17 20 23
6620
6621	We use 3 shuffle instructions and 3 3 - 1 shifts to create such output.*
6622
6623	First we shuffle all 3 vectors to get correct elements order:
6624
6625	1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
6626	2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
6627	3rd vec: (16 19 22) (17 20 23) (18 21)
6628
6629	Next we unite and shift vector 3 times:
6630
6631	1st step:
6632	shift right by 6 the concatenation of:
6633	"1st vec" and "2nd vec"
6634	( 0 3 6) ( 1 4 7) \|( 2 5) _ ( 8 11 14) ( 9 12 15)\| (10 13)
6635	"2nd vec" and "3rd vec"
6636	( 8 11 14) ( 9 12 15) \|(10 13) _ (16 19 22) (17 20 23)\| (18 21)
6637	"3rd vec" and "1st vec"
6638	(16 19 22) (17 20 23) \|(18 21) _ ( 0 3 6) ( 1 4 7)\| ( 2 5)
6639	\| New vectors \|
6640
6641	So that now new vectors are:
6642
6643	1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
6644	2nd vec: (10 13) (16 19 22) (17 20 23)
6645	3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
6646
6647	2nd step:
6648	shift right by 5 the concatenation of:
6649	"1st vec" and "3rd vec"
6650	( 2 5) ( 8 11 14) \|( 9 12 15) _ (18 21) ( 0 3 6)\| ( 1 4 7)
6651	"2nd vec" and "1st vec"
6652	(10 13) (16 19 22) \|(17 20 23) _ ( 2 5) ( 8 11 14)\| ( 9 12 15)
6653	"3rd vec" and "2nd vec"
6654	(18 21) ( 0 3 6) \|( 1 4 7) _ (10 13) (16 19 22)\| (17 20 23)
6655	\| New vectors \|
6656
6657	So that now new vectors are:
6658
6659	1st vec: ( 9 12 15) (18 21) ( 0 3 6)
6660	2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
6661	3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
6662
6663	3rd step:
6664	shift right by 5 the concatenation of:
6665	"1st vec" and "1st vec"
6666	( 9 12 15) (18 21) \|( 0 3 6) _ ( 9 12 15) (18 21)\| ( 0 3 6)
6667	shift right by 3 the concatenation of:
6668	"2nd vec" and "2nd vec"
6669	(17 20 23) \|( 2 5) ( 8 11 14) _ (17 20 23)\| ( 2 5) ( 8 11 14)
6670	\| New vectors \|
6671
6672	So that now all vectors are READY:
6673	1st vec: ( 0 3 6) ( 9 12 15) (18 21)
6674	2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
6675	3rd vec: ( 1 4 7) (10 13) (16 19 22)
6676
6677	This algorithm is faster than one in vect_permute_load_chain if:
6678	1. "shift of a concatination" is faster than general permutation.
6679	This is usually so.
6680	2. The TARGET machine can't execute vector instructions in parallel.
6681	This is because each step of the algorithm depends on previous.
6682	The algorithm in vect_permute_load_chain is much more parallel.
6683
6684	The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
6685	*/
6686
6687	static bool
6688	vect_shift_permute_load_chain (vec_info *vinfo, vec<tree> dr_chain,
6689	unsigned int length,
6690	stmt_vec_info stmt_info,
6691	gimple_stmt_iterator *gsi,
6692	vec<tree> *result_chain)
6693	{
6694	tree vect[`3`], vect_shift[`3`], data_ref, first_vect, second_vect;
6695	tree perm2_mask1, perm2_mask2, perm3_mask;
6696	tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
6697	gimple *perm_stmt;
6698
6699	tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6700	machine_mode vmode = TYPE_MODE (vectype);
6701	unsigned int i;
6702	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
6703
6704	unsigned HOST_WIDE_INT nelt, vf;
6705	if (!TYPE_VECTOR_SUBPARTS (node: vectype).is_constant (const_value: &nelt)
6706	\|\| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (const_value: &vf))
6707	/ Not supported for variable-length vectors. /
6708	return false;
6709
6710	vec_perm_builder sel (nelt, nelt, `1`);
6711	sel.quick_grow (len: nelt);
6712
6713	result_chain->quick_grow (len: length);
6714	memcpy (dest: result_chain->address (), src: dr_chain.address (),
6715	n: length * sizeof (tree));
6716
6717	if (pow2p_hwi (x: length) && vf > `4`)
6718	{
6719	unsigned int j, log_length = exact_log2 (x: length);
6720	for (i = `0`; i < nelt / `2`; ++i)
6721	sel [i] = i * `2`;
6722	for (i = `0`; i < nelt / `2`; ++i)
6723	sel [nelt / `2` + i] = i * `2` + `1`;
6724	vec_perm_indices indices (sel, `2`, nelt);
6725	if (!can_vec_perm_const_p (vmode, vmode, indices))
6726	{
6727	if (dump_enabled_p ())
6728	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6729	"shuffle of 2 fields structure is not \
6730	supported by target\n");
6731	return false;
6732	}
6733	perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
6734
6735	for (i = `0`; i < nelt / `2`; ++i)
6736	sel [i] = i * `2` + `1`;
6737	for (i = `0`; i < nelt / `2`; ++i)
6738	sel [nelt / `2` + i] = i * `2`;
6739	indices.new_vector (sel, `2`, nelt);
6740	if (!can_vec_perm_const_p (vmode, vmode, indices))
6741	{
6742	if (dump_enabled_p ())
6743	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6744	"shuffle of 2 fields structure is not \
6745	supported by target\n");
6746	return false;
6747	}
6748	perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
6749
6750	/ Generating permutation constant to shift all elements.*
6751	For vector length 8 it is {4 5 6 7 8 9 10 11}. /*
6752	for (i = `0`; i < nelt; i++)
6753	sel [i] = nelt / `2` + i;
6754	indices.new_vector (sel, `2`, nelt);
6755	if (!can_vec_perm_const_p (vmode, vmode, indices))
6756	{
6757	if (dump_enabled_p ())
6758	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6759	"shift permutation is not supported by target\n");
6760	return false;
6761	}
6762	shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6763
6764	/ Generating permutation constant to select vector from 2.*
6765	For vector length 8 it is {0 1 2 3 12 13 14 15}. /*
6766	for (i = `0`; i < nelt / `2`; i++)
6767	sel [i] = i;
6768	for (i = nelt / `2`; i < nelt; i++)
6769	sel [i] = nelt + i;
6770	indices.new_vector (sel, `2`, nelt);
6771	if (!can_vec_perm_const_p (vmode, vmode, indices))
6772	{
6773	if (dump_enabled_p ())
6774	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6775	"select is not supported by target\n");
6776	return false;
6777	}
6778	select_mask = vect_gen_perm_mask_checked (vectype, indices);
6779
6780	for (i = `0`; i < log_length; i++)
6781	{
6782	for (j = `0`; j < length; j += `2`)
6783	{
6784	first_vect = dr_chain [j];
6785	second_vect = dr_chain [j + `1`];
6786
6787	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle2");
6788	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6789	first_vect, first_vect,
6790	perm2_mask1);
6791	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6792	vect[`0`] = data_ref;
6793
6794	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle2");
6795	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6796	second_vect, second_vect,
6797	perm2_mask2);
6798	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6799	vect[`1`] = data_ref;
6800
6801	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift");
6802	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6803	vect[`0`], vect[`1`], shift1_mask);
6804	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6805	(*result_chain)[j/`2` + length/`2`] = data_ref;
6806
6807	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_select");
6808	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6809	vect[`0`], vect[`1`], select_mask);
6810	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6811	(*result_chain)[j/`2`] = data_ref;
6812	}
6813	memcpy (dest: dr_chain.address (), src: result_chain->address (),
6814	n: length * sizeof (tree));
6815	}
6816	return true;
6817	}
6818	if (length == `3` && vf > `2`)
6819	{
6820	unsigned int k = `0`, l = `0`;
6821
6822	/ Generating permutation constant to get all elements in rigth order.*
6823	For vector length 8 it is {0 3 6 1 4 7 2 5}. /*
6824	for (i = `0`; i < nelt; i++)
6825	{
6826	if (`3` * k + (l % `3`) >= nelt)
6827	{
6828	k = `0`;
6829	l += (`3` - (nelt % `3`));
6830	}
6831	sel [i] = `3` * k + (l % `3`);
6832	k++;
6833	}
6834	vec_perm_indices indices (sel, `2`, nelt);
6835	if (!can_vec_perm_const_p (vmode, vmode, indices))
6836	{
6837	if (dump_enabled_p ())
6838	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6839	"shuffle of 3 fields structure is not \
6840	supported by target\n");
6841	return false;
6842	}
6843	perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
6844
6845	/ Generating permutation constant to shift all elements.*
6846	For vector length 8 it is {6 7 8 9 10 11 12 13}. /*
6847	for (i = `0`; i < nelt; i++)
6848	sel [i] = `2` * (nelt / `3`) + (nelt % `3`) + i;
6849	indices.new_vector (sel, `2`, nelt);
6850	if (!can_vec_perm_const_p (vmode, vmode, indices))
6851	{
6852	if (dump_enabled_p ())
6853	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6854	"shift permutation is not supported by target\n");
6855	return false;
6856	}
6857	shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
6858
6859	/ Generating permutation constant to shift all elements.*
6860	For vector length 8 it is {5 6 7 8 9 10 11 12}. /*
6861	for (i = `0`; i < nelt; i++)
6862	sel [i] = `2` * (nelt / `3`) + `1` + i;
6863	indices.new_vector (sel, `2`, nelt);
6864	if (!can_vec_perm_const_p (vmode, vmode, indices))
6865	{
6866	if (dump_enabled_p ())
6867	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6868	"shift permutation is not supported by target\n");
6869	return false;
6870	}
6871	shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
6872
6873	/ Generating permutation constant to shift all elements.*
6874	For vector length 8 it is {3 4 5 6 7 8 9 10}. /*
6875	for (i = `0`; i < nelt; i++)
6876	sel [i] = (nelt / `3`) + (nelt % `3`) / `2` + i;
6877	indices.new_vector (sel, `2`, nelt);
6878	if (!can_vec_perm_const_p (vmode, vmode, indices))
6879	{
6880	if (dump_enabled_p ())
6881	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6882	"shift permutation is not supported by target\n");
6883	return false;
6884	}
6885	shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
6886
6887	/ Generating permutation constant to shift all elements.*
6888	For vector length 8 it is {5 6 7 8 9 10 11 12}. /*
6889	for (i = `0`; i < nelt; i++)
6890	sel [i] = `2` * (nelt / `3`) + (nelt % `3`) / `2` + i;
6891	indices.new_vector (sel, `2`, nelt);
6892	if (!can_vec_perm_const_p (vmode, vmode, indices))
6893	{
6894	if (dump_enabled_p ())
6895	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6896	"shift permutation is not supported by target\n");
6897	return false;
6898	}
6899	shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
6900
6901	for (k = `0`; k < `3`; k++)
6902	{
6903	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shuffle3");
6904	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6905	dr_chain [k], dr_chain [k],
6906	perm3_mask);
6907	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6908	vect[k] = data_ref;
6909	}
6910
6911	for (k = `0`; k < `3`; k++)
6912	{
6913	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift1");
6914	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6915	vect[k % `3`], vect[(k + `1`) % `3`],
6916	shift1_mask);
6917	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6918	vect_shift[k] = data_ref;
6919	}
6920
6921	for (k = `0`; k < `3`; k++)
6922	{
6923	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift2");
6924	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6925	vect_shift[(`4` - k) % `3`],
6926	vect_shift[(`3` - k) % `3`],
6927	shift2_mask);
6928	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6929	vect[k] = data_ref;
6930	}
6931
6932	(*result_chain)[`3` - (nelt % `3`)] = vect[`2`];
6933
6934	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift3");
6935	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[`0`],
6936	vect[`0`], shift3_mask);
6937	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6938	(*result_chain)[nelt % `3`] = data_ref;
6939
6940	data_ref = make_temp_ssa_name (type: vectype, NULL, name: "vect_shift4");
6941	perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[`1`],
6942	vect[`1`], shift4_mask);
6943	vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6944	(*result_chain)[`0`] = data_ref;
6945	return true;
6946	}
6947	return false;
6948	}
6949
6950	/ Function vect_transform_grouped_load.*
6951
6952	Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6953	to perform their permutation and ascribe the result vectorized statements to
6954	the scalar statements.
6955	*/
6956
6957	void
6958	vect_transform_grouped_load (vec_info *vinfo, stmt_vec_info stmt_info,
6959	vec<tree> dr_chain,
6960	int size, gimple_stmt_iterator *gsi)
6961	{
6962	machine_mode mode;
6963	vec<tree> result_chain = vNULL;
6964
6965	/ DR_CHAIN contains input data-refs that are a part of the interleaving.*
6966	RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
6967	vectors, that are ready for vector computation. /*
6968	result_chain.create (nelems: size);
6969
6970	/ If reassociation width for vector type is 2 or greater target machine can*
6971	execute 2 or more vector instructions in parallel. Otherwise try to
6972	get chain for loads group using vect_shift_permute_load_chain. /*
6973	mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
6974	if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > `1`
6975	\|\| pow2p_hwi (x: size)
6976	\|\| !vect_shift_permute_load_chain (vinfo, dr_chain, length: size, stmt_info,
6977	gsi, result_chain: &result_chain))
6978	vect_permute_load_chain (vinfo, dr_chain,
6979	length: size, stmt_info, gsi, result_chain: &result_chain);
6980	vect_record_grouped_load_vectors (vinfo, stmt_info, result_chain);
6981	result_chain.release ();
6982	}
6983
6984	/ RESULT_CHAIN contains the output of a group of grouped loads that were*
6985	generated as part of the vectorization of STMT_INFO. Assign the statement
6986	for each vector to the associated scalar statement. /*
6987
6988	void
6989	vect_record_grouped_load_vectors (vec_info *, stmt_vec_info stmt_info,
6990	vec<tree> result_chain)
6991	{
6992	stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6993	unsigned int i, gap_count;
6994	tree tmp_data_ref;
6995
6996	/ Put a permuted data-ref in the VECTORIZED_STMT field.*
6997	Since we scan the chain starting from it's first node, their order
6998	corresponds the order of data-refs in RESULT_CHAIN. /*
6999	stmt_vec_info next_stmt_info = first_stmt_info;
7000	gap_count = `1`;
7001	FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
7002	{
7003	if (!next_stmt_info)
7004	break;
7005
7006	/ Skip the gaps. Loads created for the gaps will be removed by dead*
7007	code elimination pass later. No need to check for the first stmt in
7008	the group, since it always exists.
7009	DR_GROUP_GAP is the number of steps in elements from the previous
7010	access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
7011	correspond to the gaps. /*
7012	if (next_stmt_info != first_stmt_info
7013	&& gap_count < DR_GROUP_GAP (next_stmt_info))
7014	{
7015	gap_count++;
7016	continue;
7017	}
7018
7019	/ ??? The following needs cleanup after the removal of*
7020	DR_GROUP_SAME_DR_STMT. /*
7021	if (next_stmt_info)
7022	{
7023	gimple *new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
7024	/ We assume that if VEC_STMT is not NULL, this is a case of multiple*
7025	copies, and we put the new vector statement last. /*
7026	STMT_VINFO_VEC_STMTS (next_stmt_info).safe_push (obj: new_stmt);
7027
7028	next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7029	gap_count = `1`;
7030	}
7031	}
7032	}
7033
7034	/ Function vect_force_dr_alignment_p.*
7035
7036	Returns whether the alignment of a DECL can be forced to be aligned
7037	on ALIGNMENT bit boundary. /*
7038
7039	bool
7040	vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
7041	{
7042	if (!VAR_P (decl))
7043	return false;
7044
7045	if (decl_in_symtab_p (decl)
7046	&& !symtab_node::get (decl)->can_increase_alignment_p ())
7047	return false;
7048
7049	if (TREE_STATIC (decl))
7050	return (known_le (alignment,
7051	(unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
7052	else
7053	return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
7054	}
7055
7056	/ Return whether the data reference DR_INFO is supported with respect to its*
7057	alignment.
7058	If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
7059	it is aligned, i.e., check if it is possible to vectorize it with different
7060	alignment. /*
7061
7062	enum dr_alignment_support
7063	vect_supportable_dr_alignment (vec_info vinfo, dr_vec_info dr_info,
7064	tree vectype, int misalignment)
7065	{
7066	data_reference *dr = dr_info->dr;
7067	stmt_vec_info stmt_info = dr_info->stmt;
7068	machine_mode mode = TYPE_MODE (vectype);
7069	loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (p: vinfo);
7070	class loop *vect_loop = NULL;
7071	bool nested_in_vect_loop = false;
7072
7073	if (misalignment == `0`)
7074	return dr_aligned;
7075
7076	/ For now assume all conditional loads/stores support unaligned*
7077	access without any special code. /*
7078	if (gcall stmt = dyn_cast <gcall > (p: stmt_info->stmt))
7079	if (gimple_call_internal_p (gs: stmt)
7080	&& (gimple_call_internal_fn (gs: stmt) == IFN_MASK_LOAD
7081	\|\| gimple_call_internal_fn (gs: stmt) == IFN_MASK_STORE))
7082	return dr_unaligned_supported;
7083
7084	if (loop_vinfo)
7085	{
7086	vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
7087	nested_in_vect_loop = nested_in_vect_loop_p (loop: vect_loop, stmt_info);
7088	}
7089
7090	/ Possibly unaligned access. /
7091
7092	/ We can choose between using the implicit realignment scheme (generating*
7093	a misaligned_move stmt) and the explicit realignment scheme (generating
7094	aligned loads with a REALIGN_LOAD). There are two variants to the
7095	explicit realignment scheme: optimized, and unoptimized.
7096	We can optimize the realignment only if the step between consecutive
7097	vector loads is equal to the vector size. Since the vector memory
7098	accesses advance in steps of VS (Vector Size) in the vectorized loop, it
7099	is guaranteed that the misalignment amount remains the same throughout the
7100	execution of the vectorized loop. Therefore, we can create the
7101	"realignment token" (the permutation mask that is passed to REALIGN_LOAD)
7102	at the loop preheader.
7103
7104	However, in the case of outer-loop vectorization, when vectorizing a
7105	memory access in the inner-loop nested within the LOOP that is now being
7106	vectorized, while it is guaranteed that the misalignment of the
7107	vectorized memory access will remain the same in different outer-loop
7108	iterations, it is not* guaranteed that is will remain the same throughout*
7109	the execution of the inner-loop. This is because the inner-loop advances
7110	with the original scalar step (and not in steps of VS). If the inner-loop
7111	step happens to be a multiple of VS, then the misalignment remains fixed
7112	and we can use the optimized realignment scheme. For example:
7113
7114	for (i=0; i<N; i++)
7115	for (j=0; j<M; j++)
7116	s += a[i+j];
7117
7118	When vectorizing the i-loop in the above example, the step between
7119	consecutive vector loads is 1, and so the misalignment does not remain
7120	fixed across the execution of the inner-loop, and the realignment cannot
7121	be optimized (as illustrated in the following pseudo vectorized loop):
7122
7123	for (i=0; i<N; i+=4)
7124	for (j=0; j<M; j++){
7125	vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
7126	// when j is {0,1,2,3,4,5,6,7,...} respectively.
7127	// (assuming that we start from an aligned address).
7128	}
7129
7130	We therefore have to use the unoptimized realignment scheme:
7131
7132	for (i=0; i<N; i+=4)
7133	for (j=k; j<M; j+=4)
7134	vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
7135	// that the misalignment of the initial address is
7136	// 0).
7137
7138	The loop can then be vectorized as follows:
7139
7140	for (k=0; k<4; k++){
7141	rt = get_realignment_token (&vp[k]);
7142	for (i=0; i<N; i+=4){
7143	v1 = vp[i+k];
7144	for (j=k; j<M; j+=4){
7145	v2 = vp[i+j+VS-1];
7146	va = REALIGN_LOAD <v1,v2,rt>;
7147	vs += va;
7148	v1 = v2;
7149	}
7150	}
7151	} /*
7152
7153	if (DR_IS_READ (dr))
7154	{
7155	if (optab_handler (op: vec_realign_load_optab, mode) != CODE_FOR_nothing
7156	&& (!targetm.vectorize.builtin_mask_for_load
7157	\|\| targetm.vectorize.builtin_mask_for_load ()))
7158	{
7159	/ If we are doing SLP then the accesses need not have the*
7160	same alignment, instead it depends on the SLP group size. /*
7161	if (loop_vinfo
7162	&& STMT_SLP_TYPE (stmt_info)
7163	&& (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
7164	\|\| !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
7165	* (DR_GROUP_SIZE
7166	(DR_GROUP_FIRST_ELEMENT (stmt_info))),
7167	b: TYPE_VECTOR_SUBPARTS (node: vectype))))
7168	;
7169	else if (!loop_vinfo
7170	\|\| (nested_in_vect_loop
7171	&& maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
7172	b: GET_MODE_SIZE (TYPE_MODE (vectype)))))
7173	return dr_explicit_realign;
7174	else
7175	return dr_explicit_realign_optimized;
7176	}
7177	}
7178
7179	bool is_packed = false;
7180	tree type = TREE_TYPE (DR_REF (dr));
7181	if (misalignment == DR_MISALIGNMENT_UNKNOWN)
7182	is_packed = not_size_aligned (DR_REF (dr));
7183	if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment,
7184	is_packed))
7185	return dr_unaligned_supported;
7186
7187	/ Unsupported. /
7188	return dr_unaligned_unsupported;
7189	}
7190

source code of gcc/tree-vect-data-refs.cc