PPCISelLowering.cpp source code [llvm/lib/Target/PowerPC/PPCISelLowering.cpp]

1	//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the PPCISelLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "PPCISelLowering.h"
14	#include "MCTargetDesc/PPCMCTargetDesc.h"
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCCState.h"
18	#include "PPCCallingConv.h"
19	#include "PPCFrameLowering.h"
20	#include "PPCInstrInfo.h"
21	#include "PPCMachineFunctionInfo.h"
22	#include "PPCPerfectShuffle.h"
23	#include "PPCRegisterInfo.h"
24	#include "PPCSubtarget.h"
25	#include "PPCTargetMachine.h"
26	#include "llvm/ADT/APFloat.h"
27	#include "llvm/ADT/APInt.h"
28	#include "llvm/ADT/APSInt.h"
29	#include "llvm/ADT/ArrayRef.h"
30	#include "llvm/ADT/DenseMap.h"
31	#include "llvm/ADT/STLExtras.h"
32	#include "llvm/ADT/SmallPtrSet.h"
33	#include "llvm/ADT/SmallSet.h"
34	#include "llvm/ADT/SmallVector.h"
35	#include "llvm/ADT/Statistic.h"
36	#include "llvm/ADT/StringRef.h"
37	#include "llvm/ADT/StringSwitch.h"
38	#include "llvm/CodeGen/CallingConvLower.h"
39	#include "llvm/CodeGen/ISDOpcodes.h"
40	#include "llvm/CodeGen/MachineBasicBlock.h"
41	#include "llvm/CodeGen/MachineFrameInfo.h"
42	#include "llvm/CodeGen/MachineFunction.h"
43	#include "llvm/CodeGen/MachineInstr.h"
44	#include "llvm/CodeGen/MachineInstrBuilder.h"
45	#include "llvm/CodeGen/MachineJumpTableInfo.h"
46	#include "llvm/CodeGen/MachineLoopInfo.h"
47	#include "llvm/CodeGen/MachineMemOperand.h"
48	#include "llvm/CodeGen/MachineModuleInfo.h"
49	#include "llvm/CodeGen/MachineOperand.h"
50	#include "llvm/CodeGen/MachineRegisterInfo.h"
51	#include "llvm/CodeGen/RuntimeLibcalls.h"
52	#include "llvm/CodeGen/SelectionDAG.h"
53	#include "llvm/CodeGen/SelectionDAGNodes.h"
54	#include "llvm/CodeGen/TargetInstrInfo.h"
55	#include "llvm/CodeGen/TargetLowering.h"
56	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
57	#include "llvm/CodeGen/TargetRegisterInfo.h"
58	#include "llvm/CodeGen/ValueTypes.h"
59	#include "llvm/CodeGenTypes/MachineValueType.h"
60	#include "llvm/IR/CallingConv.h"
61	#include "llvm/IR/Constant.h"
62	#include "llvm/IR/Constants.h"
63	#include "llvm/IR/DataLayout.h"
64	#include "llvm/IR/DebugLoc.h"
65	#include "llvm/IR/DerivedTypes.h"
66	#include "llvm/IR/Function.h"
67	#include "llvm/IR/GlobalValue.h"
68	#include "llvm/IR/IRBuilder.h"
69	#include "llvm/IR/Instructions.h"
70	#include "llvm/IR/Intrinsics.h"
71	#include "llvm/IR/IntrinsicsPowerPC.h"
72	#include "llvm/IR/Module.h"
73	#include "llvm/IR/Type.h"
74	#include "llvm/IR/Use.h"
75	#include "llvm/IR/Value.h"
76	#include "llvm/MC/MCContext.h"
77	#include "llvm/MC/MCExpr.h"
78	#include "llvm/MC/MCRegisterInfo.h"
79	#include "llvm/MC/MCSectionXCOFF.h"
80	#include "llvm/MC/MCSymbolXCOFF.h"
81	#include "llvm/Support/AtomicOrdering.h"
82	#include "llvm/Support/BranchProbability.h"
83	#include "llvm/Support/Casting.h"
84	#include "llvm/Support/CodeGen.h"
85	#include "llvm/Support/CommandLine.h"
86	#include "llvm/Support/Compiler.h"
87	#include "llvm/Support/Debug.h"
88	#include "llvm/Support/ErrorHandling.h"
89	#include "llvm/Support/Format.h"
90	#include "llvm/Support/KnownBits.h"
91	#include "llvm/Support/MathExtras.h"
92	#include "llvm/Support/raw_ostream.h"
93	#include "llvm/Target/TargetMachine.h"
94	#include "llvm/Target/TargetOptions.h"
95	#include <algorithm>
96	#include <cassert>
97	#include <cstdint>
98	#include <iterator>
99	#include <list>
100	#include <optional>
101	#include <utility>
102	#include <vector>
103
104	using namespace llvm;
105
106	#define DEBUG_TYPE "ppc-lowering"
107
108	static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109	cl::desc ("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111	static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112	cl::desc ("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114	static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115	cl::desc ("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117	static cl::opt<bool> DisableSCO("disable-ppc-sco",
118	cl::desc ("disable sibling call optimization on ppc"), cl::Hidden);
119
120	static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121	cl::desc ("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123	static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124	cl::desc ("use absolute jump tables on ppc"), cl::Hidden);
125
126	static cl::opt<bool>
127	DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128	cl::desc ("disable vector permute decomposition"),
129	cl::init(Val: true), cl::Hidden);
130
131	cl::opt<bool> DisableAutoPairedVecSt(
132	"disable-auto-paired-vec-st",
133	cl::desc ("disable automatically generated 32byte paired vector stores"),
134	cl::init(Val: true), cl::Hidden);
135
136	static cl::opt<unsigned> PPCMinimumJumpTableEntries(
137	"ppc-min-jump-table-entries", cl::init(Val: `64`), cl::Hidden,
138	cl::desc ("Set minimum number of entries to use a jump table on PPC"));
139
140	static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
141	"ppc-gather-alias-max-depth", cl::init(Val: `18`), cl::Hidden,
142	cl::desc ("max depth when checking alias info in GatherAllAliases()"));
143
144	STATISTIC(NumTailCalls, "Number of tail calls");
145	STATISTIC(NumSiblingCalls, "Number of sibling calls");
146	STATISTIC(ShufflesHandledWithVPERM,
147	"Number of shuffles lowered to a VPERM or XXPERM");
148	STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
149
150	static bool isNByteElemShuffleMask(ShuffleVectorSDNode , unsigned, int*);
151
152	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
153
154	static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
155
156	// A faster local-[exec\|dynamic] TLS access sequence (enabled with the
157	// -maix-small-local-[exec\|dynamic]-tls option) can be produced for TLS
158	// variables; consistent with the IBM XL compiler, we apply a max size of
159	// slightly under 32KB.
160	constexpr uint64_t AIXSmallTlsPolicySizeLimit = `32751`;
161
162	// FIXME: Remove this once the bug has been fixed!
163	extern cl::opt<bool> ANDIGlueBug;
164
165	PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
166	const PPCSubtarget &STI)
167	: TargetLowering (TM), Subtarget(STI) {
168	// Initialize map that relates the PPC addressing modes to the computed flags
169	// of a load/store instruction. The map is used to determine the optimal
170	// addressing mode when selecting load and stores.
171	initializeAddrModeMap();
172	// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
173	// arguments are at least 4/8 bytes aligned.
174	bool isPPC64 = Subtarget.isPPC64();
175	setMinStackArgumentAlignment(isPPC64 ? Align (`8`) : Align (`4`));
176
177	// Set up the register classes.
178	addRegisterClass(MVT::VT: i32, RC: &PPC::GPRCRegClass);
179	if (!useSoftFloat()) {
180	if (hasSPE()) {
181	addRegisterClass(MVT::VT: f32, RC: &PPC::GPRCRegClass);
182	// EFPU2 APU only supports f32
183	if (!Subtarget.hasEFPU2())
184	addRegisterClass(MVT::VT: f64, RC: &PPC::SPERCRegClass);
185	} else {
186	addRegisterClass(MVT::VT: f32, RC: &PPC::F4RCRegClass);
187	addRegisterClass(MVT::VT: f64, RC: &PPC::F8RCRegClass);
188	}
189	}
190
191	// Match BITREVERSE to customized fast code sequence in the td file.
192	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
193	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
194
195	// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
196	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
197
198	// Custom lower inline assembly to check for special registers.
199	setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
200	setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
201
202	// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
203	for (MVT VT : MVT::integer_valuetypes()) {
204	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
205	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
206	}
207
208	if (Subtarget.isISA3_0()) {
209	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
210	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
211	setTruncStoreAction(MVT::ValVT: f64, MVT::MemVT: f16, Action: Legal);
212	setTruncStoreAction(MVT::ValVT: f32, MVT::MemVT: f16, Action: Legal);
213	} else {
214	// No extending loads from f16 or HW conversions back and forth.
215	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
216	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
217	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
218	setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
219	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
220	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
221	setTruncStoreAction(MVT::ValVT: f64, MVT::MemVT: f16, Action: Expand);
222	setTruncStoreAction(MVT::ValVT: f32, MVT::MemVT: f16, Action: Expand);
223	}
224
225	setTruncStoreAction(MVT::ValVT: f64, MVT::MemVT: f32, Action: Expand);
226
227	// PowerPC has pre-inc load and store's.
228	setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
229	setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
230	setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
231	setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
232	setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
233	setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
234	setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
235	setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
236	setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
237	setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
238	if (!Subtarget.hasSPE()) {
239	setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
240	setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
241	setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
242	setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
243	}
244
245	// PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
246	const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
247	for (MVT VT : ScalarIntVTs) {
248	setOperationAction(ISD::ADDC, VT, Legal);
249	setOperationAction(ISD::ADDE, VT, Legal);
250	setOperationAction(ISD::SUBC, VT, Legal);
251	setOperationAction(ISD::SUBE, VT, Legal);
252	}
253
254	if (Subtarget.useCRBits()) {
255	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
256
257	if (isPPC64 \|\| Subtarget.hasFPCVT()) {
258	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
259	AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
260	isPPC64 ? MVT::i64 : MVT::i32);
261	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
262	AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
263	isPPC64 ? MVT::i64 : MVT::i32);
264
265	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
266	AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
267	isPPC64 ? MVT::i64 : MVT::i32);
268	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
269	AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
270	isPPC64 ? MVT::i64 : MVT::i32);
271
272	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
273	AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
274	isPPC64 ? MVT::i64 : MVT::i32);
275	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
276	AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
277	isPPC64 ? MVT::i64 : MVT::i32);
278
279	setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
280	AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
281	isPPC64 ? MVT::i64 : MVT::i32);
282	setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
283	AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
284	isPPC64 ? MVT::i64 : MVT::i32);
285	} else {
286	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
287	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
288	setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
289	setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
290	}
291
292	// PowerPC does not support direct load/store of condition registers.
293	setOperationAction(ISD::LOAD, MVT::i1, Custom);
294	setOperationAction(ISD::STORE, MVT::i1, Custom);
295
296	// FIXME: Remove this once the ANDI glue bug is fixed:
297	if (ANDIGlueBug)
298	setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
299
300	for (MVT VT : MVT::integer_valuetypes()) {
301	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
302	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
303	setTruncStoreAction(VT, MVT::i1, Expand);
304	}
305
306	addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
307	}
308
309	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
310	// PPC (the libcall is not available).
311	setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
312	setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
313	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
314	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
315
316	// We do not currently implement these libm ops for PowerPC.
317	setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
318	setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
319	setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
320	setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
321	setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
322	setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
323
324	// PowerPC has no SREM/UREM instructions unless we are on P9
325	// On P9 we may use a hardware instruction to compute the remainder.
326	// When the result of both the remainder and the division is required it is
327	// more efficient to compute the remainder from the result of the division
328	// rather than use the remainder instruction. The instructions are legalized
329	// directly because the DivRemPairsPass performs the transformation at the IR
330	// level.
331	if (Subtarget.isISA3_0()) {
332	setOperationAction(ISD::SREM, MVT::i32, Legal);
333	setOperationAction(ISD::UREM, MVT::i32, Legal);
334	setOperationAction(ISD::SREM, MVT::i64, Legal);
335	setOperationAction(ISD::UREM, MVT::i64, Legal);
336	} else {
337	setOperationAction(ISD::SREM, MVT::i32, Expand);
338	setOperationAction(ISD::UREM, MVT::i32, Expand);
339	setOperationAction(ISD::SREM, MVT::i64, Expand);
340	setOperationAction(ISD::UREM, MVT::i64, Expand);
341	}
342
343	// Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
344	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
345	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
346	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
347	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
348	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
349	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
350	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
351	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
352
353	// Handle constrained floating-point operations of scalar.
354	// TODO: Handle SPE specific operation.
355	setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
356	setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
357	setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
358	setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
359	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
360
361	setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
362	setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
363	setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
364	setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
365
366	if (!Subtarget.hasSPE()) {
367	setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
368	setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
369	}
370
371	if (Subtarget.hasVSX()) {
372	setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
373	setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
374	}
375
376	if (Subtarget.hasFSQRT()) {
377	setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
378	setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
379	}
380
381	if (Subtarget.hasFPRND()) {
382	setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
383	setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal);
384	setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
385	setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
386
387	setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
388	setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal);
389	setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
390	setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
391	}
392
393	// We don't support sin/cos/sqrt/fmod/pow
394	setOperationAction(ISD::FSIN , MVT::f64, Expand);
395	setOperationAction(ISD::FCOS , MVT::f64, Expand);
396	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
397	setOperationAction(ISD::FREM , MVT::f64, Expand);
398	setOperationAction(ISD::FPOW , MVT::f64, Expand);
399	setOperationAction(ISD::FSIN , MVT::f32, Expand);
400	setOperationAction(ISD::FCOS , MVT::f32, Expand);
401	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
402	setOperationAction(ISD::FREM , MVT::f32, Expand);
403	setOperationAction(ISD::FPOW , MVT::f32, Expand);
404
405	// MASS transformation for LLVM intrinsics with replicating fast-math flag
406	// to be consistent to PPCGenScalarMASSEntries pass
407	if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
408	setOperationAction(ISD::FSIN , MVT::f64, Custom);
409	setOperationAction(ISD::FCOS , MVT::f64, Custom);
410	setOperationAction(ISD::FPOW , MVT::f64, Custom);
411	setOperationAction(ISD::FLOG, MVT::f64, Custom);
412	setOperationAction(ISD::FLOG10, MVT::f64, Custom);
413	setOperationAction(ISD::FEXP, MVT::f64, Custom);
414	setOperationAction(ISD::FSIN , MVT::f32, Custom);
415	setOperationAction(ISD::FCOS , MVT::f32, Custom);
416	setOperationAction(ISD::FPOW , MVT::f32, Custom);
417	setOperationAction(ISD::FLOG, MVT::f32, Custom);
418	setOperationAction(ISD::FLOG10, MVT::f32, Custom);
419	setOperationAction(ISD::FEXP, MVT::f32, Custom);
420	}
421
422	if (Subtarget.hasSPE()) {
423	setOperationAction(ISD::FMA , MVT::f64, Expand);
424	setOperationAction(ISD::FMA , MVT::f32, Expand);
425	} else {
426	setOperationAction(ISD::FMA , MVT::f64, Legal);
427	setOperationAction(ISD::FMA , MVT::f32, Legal);
428	}
429
430	if (Subtarget.hasSPE())
431	setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
432
433	setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
434
435	// If we're enabling GP optimizations, use hardware square root
436	if (!Subtarget.hasFSQRT() &&
437	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
438	Subtarget.hasFRE()))
439	setOperationAction(ISD::FSQRT, MVT::f64, Expand);
440
441	if (!Subtarget.hasFSQRT() &&
442	!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
443	Subtarget.hasFRES()))
444	setOperationAction(ISD::FSQRT, MVT::f32, Expand);
445
446	if (Subtarget.hasFCPSGN()) {
447	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
448	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
449	} else {
450	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
451	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
452	}
453
454	if (Subtarget.hasFPRND()) {
455	setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
456	setOperationAction(ISD::FCEIL, MVT::f64, Legal);
457	setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
458	setOperationAction(ISD::FROUND, MVT::f64, Legal);
459
460	setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
461	setOperationAction(ISD::FCEIL, MVT::f32, Legal);
462	setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
463	setOperationAction(ISD::FROUND, MVT::f32, Legal);
464	}
465
466	// Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
467	// instruction xxbrd to speed up scalar BSWAP64.
468	if (Subtarget.isISA3_1()) {
469	setOperationAction(ISD::BSWAP, MVT::i32, Legal);
470	setOperationAction(ISD::BSWAP, MVT::i64, Legal);
471	} else {
472	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
473	setOperationAction(
474	ISD::BSWAP, MVT::i64,
475	(Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
476	}
477
478	// CTPOP or CTTZ were introduced in P8/P9 respectively
479	if (Subtarget.isISA3_0()) {
480	setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
481	setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
482	} else {
483	setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
484	setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
485	}
486
487	if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
488	setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
489	setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
490	} else {
491	setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
492	setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
493	}
494
495	// PowerPC does not have ROTR
496	setOperationAction(ISD::ROTR, MVT::i32 , Expand);
497	setOperationAction(ISD::ROTR, MVT::i64 , Expand);
498
499	if (!Subtarget.useCRBits()) {
500	// PowerPC does not have Select
501	setOperationAction(ISD::SELECT, MVT::i32, Expand);
502	setOperationAction(ISD::SELECT, MVT::i64, Expand);
503	setOperationAction(ISD::SELECT, MVT::f32, Expand);
504	setOperationAction(ISD::SELECT, MVT::f64, Expand);
505	}
506
507	// PowerPC wants to turn select_cc of FP into fsel when possible.
508	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
509	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
510
511	// PowerPC wants to optimize integer setcc a bit
512	if (!Subtarget.useCRBits())
513	setOperationAction(ISD::SETCC, MVT::i32, Custom);
514
515	if (Subtarget.hasFPU()) {
516	setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
517	setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
518	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
519
520	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
521	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
522	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
523	}
524
525	// PowerPC does not have BRCOND which requires SetCC
526	if (!Subtarget.useCRBits())
527	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
528
529	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
530
531	if (Subtarget.hasSPE()) {
532	// SPE has built-in conversions
533	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
534	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
535	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
536	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
537	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
538	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
539
540	// SPE supports signaling compare of f32/f64.
541	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
542	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
543	} else {
544	// PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
545	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
546	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
547
548	// PowerPC does not have [U\|S]INT_TO_FP
549	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
550	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
551	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
552	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
553	}
554
555	if (Subtarget.hasDirectMove() && isPPC64) {
556	setOperationAction(ISD::BITCAST, MVT::f32, Legal);
557	setOperationAction(ISD::BITCAST, MVT::i32, Legal);
558	setOperationAction(ISD::BITCAST, MVT::i64, Legal);
559	setOperationAction(ISD::BITCAST, MVT::f64, Legal);
560	if (TM.Options.UnsafeFPMath) {
561	setOperationAction(ISD::LRINT, MVT::f64, Legal);
562	setOperationAction(ISD::LRINT, MVT::f32, Legal);
563	setOperationAction(ISD::LLRINT, MVT::f64, Legal);
564	setOperationAction(ISD::LLRINT, MVT::f32, Legal);
565	setOperationAction(ISD::LROUND, MVT::f64, Legal);
566	setOperationAction(ISD::LROUND, MVT::f32, Legal);
567	setOperationAction(ISD::LLROUND, MVT::f64, Legal);
568	setOperationAction(ISD::LLROUND, MVT::f32, Legal);
569	}
570	} else {
571	setOperationAction(ISD::BITCAST, MVT::f32, Expand);
572	setOperationAction(ISD::BITCAST, MVT::i32, Expand);
573	setOperationAction(ISD::BITCAST, MVT::i64, Expand);
574	setOperationAction(ISD::BITCAST, MVT::f64, Expand);
575	}
576
577	// We cannot sextinreg(i1). Expand to shifts.
578	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
579
580	// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
581	// SjLj exception handling but a light-weight setjmp/longjmp replacement to
582	// support continuation, user-level threading, and etc.. As a result, no
583	// other SjLj exception interfaces are implemented and please don't build
584	// your own exception handling based on them.
585	// LLVM/Clang supports zero-cost DWARF exception handling.
586	setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
587	setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
588
589	// We want to legalize GlobalAddress and ConstantPool nodes into the
590	// appropriate instructions to materialize the address.
591	setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
592	setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
593	setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
594	setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
595	setOperationAction(ISD::JumpTable, MVT::i32, Custom);
596	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
597	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
598	setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
599	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
600	setOperationAction(ISD::JumpTable, MVT::i64, Custom);
601
602	// TRAP is legal.
603	setOperationAction(ISD::TRAP, MVT::Other, Legal);
604
605	// TRAMPOLINE is custom lowered.
606	setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
607	setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
608
609	// VASTART needs to be custom lowered to use the VarArgsFrameIndex
610	setOperationAction(ISD::VASTART , MVT::Other, Custom);
611
612	if (Subtarget.is64BitELFABI()) {
613	// VAARG always uses double-word chunks, so promote anything smaller.
614	setOperationAction(ISD::VAARG, MVT::i1, Promote);
615	AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
616	setOperationAction(ISD::VAARG, MVT::i8, Promote);
617	AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
618	setOperationAction(ISD::VAARG, MVT::i16, Promote);
619	AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
620	setOperationAction(ISD::VAARG, MVT::i32, Promote);
621	AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
622	setOperationAction(ISD::VAARG, MVT::Other, Expand);
623	} else if (Subtarget.is32BitELFABI()) {
624	// VAARG is custom lowered with the 32-bit SVR4 ABI.
625	setOperationAction(ISD::VAARG, MVT::Other, Custom);
626	setOperationAction(ISD::VAARG, MVT::i64, Custom);
627	} else
628	setOperationAction(ISD::VAARG, MVT::Other, Expand);
629
630	// VACOPY is custom lowered with the 32-bit SVR4 ABI.
631	if (Subtarget.is32BitELFABI())
632	setOperationAction(ISD::VACOPY , MVT::Other, Custom);
633	else
634	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
635
636	// Use the default implementation.
637	setOperationAction(ISD::VAEND , MVT::Other, Expand);
638	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
639	setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
640	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
641	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
642	setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
643	setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
644	setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
645	setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
646
647	// We want to custom lower some of our intrinsics.
648	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
649	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
650	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
651	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
652	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
653
654	// To handle counter-based loop conditions.
655	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
656
657	setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
658	setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
659	setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
660	setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
661
662	// Comparisons that require checking two conditions.
663	if (Subtarget.hasSPE()) {
664	setCondCodeAction(ISD::SETO, MVT::f32, Expand);
665	setCondCodeAction(ISD::SETO, MVT::f64, Expand);
666	setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
667	setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
668	}
669	setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
670	setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
671	setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
672	setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
673	setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
674	setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
675	setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
676	setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
677	setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
678	setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
679	setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
680	setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
681
682	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
683	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
684
685	if (Subtarget.has64BitSupport()) {
686	// They also have instructions for converting between i64 and fp.
687	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
688	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
689	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
690	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
691	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
692	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
693	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
694	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
695	// This is just the low 32 bits of a (signed) fp->i64 conversion.
696	// We cannot do this with Promote because i64 is not a legal type.
697	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
698	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
699
700	if (Subtarget.hasLFIWAX() \|\| Subtarget.isPPC64()) {
701	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
702	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
703	}
704	} else {
705	// PowerPC does not have FP_TO_UINT on 32-bit implementations.
706	if (Subtarget.hasSPE()) {
707	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
708	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
709	} else {
710	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
711	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
712	}
713	}
714
715	// With the instructions enabled under FPCVT, we can do everything.
716	if (Subtarget.hasFPCVT()) {
717	if (Subtarget.has64BitSupport()) {
718	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
719	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
720	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
721	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
722	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
723	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
724	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
725	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
726	}
727
728	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
729	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
730	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
731	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
732	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
733	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
734	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
735	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
736	}
737
738	if (Subtarget.use64BitRegs()) {
739	// 64-bit PowerPC implementations can support i64 types directly
740	addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
741	// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
742	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
743	// 64-bit PowerPC wants to expand i128 shifts itself.
744	setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
745	setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
746	setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
747	} else {
748	// 32-bit PowerPC wants to expand i64 shifts itself.
749	setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
750	setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
751	setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
752	}
753
754	// PowerPC has better expansions for funnel shifts than the generic
755	// TargetLowering::expandFunnelShift.
756	if (Subtarget.has64BitSupport()) {
757	setOperationAction(ISD::FSHL, MVT::i64, Custom);
758	setOperationAction(ISD::FSHR, MVT::i64, Custom);
759	}
760	setOperationAction(ISD::FSHL, MVT::i32, Custom);
761	setOperationAction(ISD::FSHR, MVT::i32, Custom);
762
763	if (Subtarget.hasVSX()) {
764	setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
765	setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
766	setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
767	setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
768	}
769
770	if (Subtarget.hasAltivec()) {
771	for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
772	setOperationAction(ISD::SADDSAT, VT, Legal);
773	setOperationAction(ISD::SSUBSAT, VT, Legal);
774	setOperationAction(ISD::UADDSAT, VT, Legal);
775	setOperationAction(ISD::USUBSAT, VT, Legal);
776	}
777	// First set operation action for all vector types to expand. Then we
778	// will selectively turn on ones that can be effectively codegen'd.
779	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
780	// add/sub are legal for all supported vector VT's.
781	setOperationAction(ISD::ADD, VT, Legal);
782	setOperationAction(ISD::SUB, VT, Legal);
783
784	// For v2i64, these are only valid with P8Vector. This is corrected after
785	// the loop.
786	if (VT.getSizeInBits() <= `128` && VT.getScalarSizeInBits() <= `64`) {
787	setOperationAction(ISD::SMAX, VT, Legal);
788	setOperationAction(ISD::SMIN, VT, Legal);
789	setOperationAction(ISD::UMAX, VT, Legal);
790	setOperationAction(ISD::UMIN, VT, Legal);
791	}
792	else {
793	setOperationAction(ISD::SMAX, VT, Expand);
794	setOperationAction(ISD::SMIN, VT, Expand);
795	setOperationAction(ISD::UMAX, VT, Expand);
796	setOperationAction(ISD::UMIN, VT, Expand);
797	}
798
799	if (Subtarget.hasVSX()) {
800	setOperationAction(ISD::FMAXNUM, VT, Legal);
801	setOperationAction(ISD::FMINNUM, VT, Legal);
802	}
803
804	// Vector instructions introduced in P8
805	if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
806	setOperationAction(ISD::CTPOP, VT, Legal);
807	setOperationAction(ISD::CTLZ, VT, Legal);
808	}
809	else {
810	setOperationAction(ISD::CTPOP, VT, Expand);
811	setOperationAction(ISD::CTLZ, VT, Expand);
812	}
813
814	// Vector instructions introduced in P9
815	if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
816	setOperationAction(ISD::CTTZ, VT, Legal);
817	else
818	setOperationAction(ISD::CTTZ, VT, Expand);
819
820	// We promote all shuffles to v16i8.
821	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
822	AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
823
824	// We promote all non-typed operations to v4i32.
825	setOperationAction(ISD::AND , VT, Promote);
826	AddPromotedToType (ISD::AND , VT, MVT::v4i32);
827	setOperationAction(ISD::OR , VT, Promote);
828	AddPromotedToType (ISD::OR , VT, MVT::v4i32);
829	setOperationAction(ISD::XOR , VT, Promote);
830	AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
831	setOperationAction(ISD::LOAD , VT, Promote);
832	AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
833	setOperationAction(ISD::SELECT, VT, Promote);
834	AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
835	setOperationAction(ISD::VSELECT, VT, Legal);
836	setOperationAction(ISD::SELECT_CC, VT, Promote);
837	AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
838	setOperationAction(ISD::STORE, VT, Promote);
839	AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
840
841	// No other operations are legal.
842	setOperationAction(ISD::MUL , VT, Expand);
843	setOperationAction(ISD::SDIV, VT, Expand);
844	setOperationAction(ISD::SREM, VT, Expand);
845	setOperationAction(ISD::UDIV, VT, Expand);
846	setOperationAction(ISD::UREM, VT, Expand);
847	setOperationAction(ISD::FDIV, VT, Expand);
848	setOperationAction(ISD::FREM, VT, Expand);
849	setOperationAction(ISD::FNEG, VT, Expand);
850	setOperationAction(ISD::FSQRT, VT, Expand);
851	setOperationAction(ISD::FLOG, VT, Expand);
852	setOperationAction(ISD::FLOG10, VT, Expand);
853	setOperationAction(ISD::FLOG2, VT, Expand);
854	setOperationAction(ISD::FEXP, VT, Expand);
855	setOperationAction(ISD::FEXP2, VT, Expand);
856	setOperationAction(ISD::FSIN, VT, Expand);
857	setOperationAction(ISD::FCOS, VT, Expand);
858	setOperationAction(ISD::FABS, VT, Expand);
859	setOperationAction(ISD::FFLOOR, VT, Expand);
860	setOperationAction(ISD::FCEIL, VT, Expand);
861	setOperationAction(ISD::FTRUNC, VT, Expand);
862	setOperationAction(ISD::FRINT, VT, Expand);
863	setOperationAction(ISD::FLDEXP, VT, Expand);
864	setOperationAction(ISD::FNEARBYINT, VT, Expand);
865	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
866	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
867	setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
868	setOperationAction(ISD::MULHU, VT, Expand);
869	setOperationAction(ISD::MULHS, VT, Expand);
870	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
871	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
872	setOperationAction(ISD::UDIVREM, VT, Expand);
873	setOperationAction(ISD::SDIVREM, VT, Expand);
874	setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
875	setOperationAction(ISD::FPOW, VT, Expand);
876	setOperationAction(ISD::BSWAP, VT, Expand);
877	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
878	setOperationAction(ISD::ROTL, VT, Expand);
879	setOperationAction(ISD::ROTR, VT, Expand);
880
881	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
882	setTruncStoreAction(VT, InnerVT, Expand);
883	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
884	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
885	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
886	}
887	}
888	setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
889	if (!Subtarget.hasP8Vector()) {
890	setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
891	setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
892	setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
893	setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
894	}
895
896	// We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
897	// with merges, splats, etc.
898	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
899
900	// Vector truncates to sub-word integer that fit in an Altivec/VSX register
901	// are cheap, so handle them before they get expanded to scalar.
902	setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
903	setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
904	setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
905	setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
906	setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
907
908	setOperationAction(ISD::AND , MVT::v4i32, Legal);
909	setOperationAction(ISD::OR , MVT::v4i32, Legal);
910	setOperationAction(ISD::XOR , MVT::v4i32, Legal);
911	setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
912	setOperationAction(ISD::SELECT, MVT::v4i32,
913	Subtarget.useCRBits() ? Legal : Expand);
914	setOperationAction(ISD::STORE , MVT::v4i32, Legal);
915	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
916	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
917	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
918	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
919	setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
920	setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
921	setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
922	setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
923	setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
924	setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
925	setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
926	setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
927
928	// Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
929	setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
930	// With hasAltivec set, we can lower ISD::ROTL to vrl(b\|h\|w).
931	if (Subtarget.hasAltivec())
932	for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
933	setOperationAction(ISD::ROTL, VT, Legal);
934	// With hasP8Altivec set, we can lower ISD::ROTL to vrld.
935	if (Subtarget.hasP8Altivec())
936	setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
937
938	addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
939	addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
940	addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
941	addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
942
943	setOperationAction(ISD::MUL, MVT::v4f32, Legal);
944	setOperationAction(ISD::FMA, MVT::v4f32, Legal);
945
946	if (Subtarget.hasVSX()) {
947	setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
948	setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
949	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
950	}
951
952	if (Subtarget.hasP8Altivec())
953	setOperationAction(ISD::MUL, MVT::v4i32, Legal);
954	else
955	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
956
957	if (Subtarget.isISA3_1()) {
958	setOperationAction(ISD::MUL, MVT::v2i64, Legal);
959	setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
960	setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
961	setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
962	setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
963	setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
964	setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
965	setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
966	setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
967	setOperationAction(ISD::UREM, MVT::v2i64, Legal);
968	setOperationAction(ISD::SREM, MVT::v2i64, Legal);
969	setOperationAction(ISD::UREM, MVT::v4i32, Legal);
970	setOperationAction(ISD::SREM, MVT::v4i32, Legal);
971	setOperationAction(ISD::UREM, MVT::v1i128, Legal);
972	setOperationAction(ISD::SREM, MVT::v1i128, Legal);
973	setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
974	setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
975	setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
976	}
977
978	setOperationAction(ISD::MUL, MVT::v8i16, Legal);
979	setOperationAction(ISD::MUL, MVT::v16i8, Custom);
980
981	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
982	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
983
984	setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
985	setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
986	setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
987	setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
988
989	// Altivec does not contain unordered floating-point compare instructions
990	setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
991	setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
992	setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
993	setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
994
995	if (Subtarget.hasVSX()) {
996	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
997	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
998	if (Subtarget.hasP8Vector()) {
999	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
1000	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
1001	}
1002	if (Subtarget.hasDirectMove() && isPPC64) {
1003	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
1004	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
1005	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
1006	setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
1007	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
1008	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
1009	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
1010	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
1011	}
1012	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1013
1014	// The nearbyint variants are not allowed to raise the inexact exception
1015	// so we can only code-gen them with unsafe math.
1016	if (TM.Options.UnsafeFPMath) {
1017	setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1018	setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1019	}
1020
1021	setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1022	setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1023	setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1024	setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1025	setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1026	setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1027	setOperationAction(ISD::FROUND, MVT::f64, Legal);
1028	setOperationAction(ISD::FRINT, MVT::f64, Legal);
1029
1030	setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1031	setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1032	setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1033	setOperationAction(ISD::FROUND, MVT::f32, Legal);
1034	setOperationAction(ISD::FRINT, MVT::f32, Legal);
1035
1036	setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1037	setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1038
1039	setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1040	setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1041
1042	// Share the Altivec comparison restrictions.
1043	setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1044	setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1045	setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1046	setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1047
1048	setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1049	setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1050
1051	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1052
1053	if (Subtarget.hasP8Vector())
1054	addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1055
1056	addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1057
1058	addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1059	addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1060	addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1061
1062	if (Subtarget.hasP8Altivec()) {
1063	setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1064	setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1065	setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1066
1067	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1068	// SRL, but not for SRA because of the instructions available:
1069	// VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1070	// doing
1071	setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1072	setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1073	setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1074
1075	setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1076	}
1077	else {
1078	setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1079	setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1080	setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1081
1082	setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1083
1084	// VSX v2i64 only supports non-arithmetic operations.
1085	setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1086	setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1087	}
1088
1089	if (Subtarget.isISA3_1())
1090	setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1091	else
1092	setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1093
1094	setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1095	AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1096	setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1097	AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1098
1099	setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1100
1101	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1102	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1103	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1104	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1105	setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1106	setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1107	setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1108	setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1109
1110	// Custom handling for partial vectors of integers converted to
1111	// floating point. We already have optimal handling for v2i32 through
1112	// the DAG combine, so those aren't necessary.
1113	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1114	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1115	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1116	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1117	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1118	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1119	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1120	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1121	setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1122	setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1123	setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1124	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1125	setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1126	setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1127	setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1128	setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1129
1130	setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1131	setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1132	setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1133	setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1134	setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1135	setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1136
1137	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1138	setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1139
1140	// Handle constrained floating-point operations of vector.
1141	// The predictor is `hasVSX` because altivec instruction has
1142	// no exception but VSX vector instruction has.
1143	setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1144	setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1145	setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1146	setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1147	setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1148	setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1149	setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1150	setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1151	setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1152	setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1153	setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
1154	setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1155	setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1156
1157	setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1158	setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1159	setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1160	setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1161	setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1162	setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1163	setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1164	setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1165	setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1166	setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1167	setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
1168	setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1169	setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1170
1171	addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1172	addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1173
1174	for (MVT FPT : MVT::fp_valuetypes())
1175	setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1176
1177	// Expand the SELECT to SELECT_CC
1178	setOperationAction(ISD::SELECT, MVT::f128, Expand);
1179
1180	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1181	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1182
1183	// No implementation for these ops for PowerPC.
1184	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1185	setOperationAction(ISD::FSIN, MVT::f128, Expand);
1186	setOperationAction(ISD::FCOS, MVT::f128, Expand);
1187	setOperationAction(ISD::FPOW, MVT::f128, Expand);
1188	setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1189	setOperationAction(ISD::FREM, MVT::f128, Expand);
1190	}
1191
1192	if (Subtarget.hasP8Altivec()) {
1193	addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1194	addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1195	}
1196
1197	if (Subtarget.hasP9Vector()) {
1198	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1199	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1200
1201	// Test data class instructions store results in CR bits.
1202	if (Subtarget.useCRBits()) {
1203	setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
1204	setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
1205	setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
1206	}
1207
1208	// 128 bit shifts can be accomplished via 3 instructions for SHL and
1209	// SRL, but not for SRA because of the instructions available:
1210	// VS{RL} and VS{RL}O.
1211	setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1212	setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1213	setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1214
1215	setOperationAction(ISD::FADD, MVT::f128, Legal);
1216	setOperationAction(ISD::FSUB, MVT::f128, Legal);
1217	setOperationAction(ISD::FDIV, MVT::f128, Legal);
1218	setOperationAction(ISD::FMUL, MVT::f128, Legal);
1219	setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1220
1221	setOperationAction(ISD::FMA, MVT::f128, Legal);
1222	setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1223	setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1224	setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1225	setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1226	setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1227	setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1228
1229	setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1230	setOperationAction(ISD::FRINT, MVT::f128, Legal);
1231	setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1232	setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1233	setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1234	setOperationAction(ISD::FROUND, MVT::f128, Legal);
1235
1236	setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1237	setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1238	setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1239
1240	// Handle constrained floating-point operations of fp128
1241	setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1242	setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1243	setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1244	setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1245	setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1246	setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1247	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1248	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1249	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1250	setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1251	setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1252	setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1253	setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1254	setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1255	setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1256	setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1257	setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1258	setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1259	setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1260	setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1261	} else if (Subtarget.hasVSX()) {
1262	setOperationAction(ISD::LOAD, MVT::f128, Promote);
1263	setOperationAction(ISD::STORE, MVT::f128, Promote);
1264
1265	AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1266	AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1267
1268	// Set FADD/FSUB as libcall to avoid the legalizer to expand the
1269	// fp_to_uint and int_to_fp.
1270	setOperationAction(ISD::FADD, MVT::f128, LibCall);
1271	setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1272
1273	setOperationAction(ISD::FMUL, MVT::f128, Expand);
1274	setOperationAction(ISD::FDIV, MVT::f128, Expand);
1275	setOperationAction(ISD::FNEG, MVT::f128, Expand);
1276	setOperationAction(ISD::FABS, MVT::f128, Expand);
1277	setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1278	setOperationAction(ISD::FMA, MVT::f128, Expand);
1279	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1280
1281	// Expand the fp_extend if the target type is fp128.
1282	setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1283	setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1284
1285	// Expand the fp_round if the source type is fp128.
1286	for (MVT VT : {MVT::f32, MVT::f64}) {
1287	setOperationAction(ISD::FP_ROUND, VT, Custom);
1288	setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1289	}
1290
1291	setOperationAction(ISD::SETCC, MVT::f128, Custom);
1292	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1293	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1294	setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1295
1296	// Lower following f128 select_cc pattern:
1297	// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1298	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1299
1300	// We need to handle f128 SELECT_CC with integer result type.
1301	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1302	setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1303	}
1304
1305	if (Subtarget.hasP9Altivec()) {
1306	if (Subtarget.isISA3_1()) {
1307	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1308	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1309	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1310	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1311	} else {
1312	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1313	setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1314	}
1315	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
1316	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1317	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1318	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
1319	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1320	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1321	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1322
1323	setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1324	setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1325	setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1326	setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1327	}
1328
1329	if (Subtarget.hasP10Vector()) {
1330	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1331	}
1332	}
1333
1334	if (Subtarget.pairedVectorMemops()) {
1335	addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1336	setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1337	setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1338	}
1339	if (Subtarget.hasMMA()) {
1340	if (Subtarget.isISAFuture())
1341	addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1342	else
1343	addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1344	setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1345	setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1346	setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1347	}
1348
1349	if (Subtarget.has64BitSupport())
1350	setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1351
1352	if (Subtarget.isISA3_1())
1353	setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1354
1355	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1356
1357	if (!isPPC64) {
1358	setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1359	setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1360	}
1361
1362	if (shouldInlineQuadwordAtomics()) {
1363	setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1364	setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1365	setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1366	}
1367
1368	setBooleanContents(ZeroOrOneBooleanContent);
1369
1370	if (Subtarget.hasAltivec()) {
1371	// Altivec instructions set fields to all zeros or all ones.
1372	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1373	}
1374
1375	setLibcallName(Call: RTLIB::MULO_I128, Name: nullptr);
1376	if (!isPPC64) {
1377	// These libcalls are not available in 32-bit.
1378	setLibcallName(Call: RTLIB::SHL_I128, Name: nullptr);
1379	setLibcallName(Call: RTLIB::SRL_I128, Name: nullptr);
1380	setLibcallName(Call: RTLIB::SRA_I128, Name: nullptr);
1381	setLibcallName(Call: RTLIB::MUL_I128, Name: nullptr);
1382	setLibcallName(Call: RTLIB::MULO_I64, Name: nullptr);
1383	}
1384
1385	if (shouldInlineQuadwordAtomics())
1386	setMaxAtomicSizeInBitsSupported(`128`);
1387	else if (isPPC64)
1388	setMaxAtomicSizeInBitsSupported(`64`);
1389	else
1390	setMaxAtomicSizeInBitsSupported(`32`);
1391
1392	setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1393
1394	// We have target-specific dag combine patterns for the following nodes:
1395	setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1396	ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1397	if (Subtarget.hasFPCVT())
1398	setTargetDAGCombine(ISD::UINT_TO_FP);
1399	setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1400	if (Subtarget.useCRBits())
1401	setTargetDAGCombine(ISD::BRCOND);
1402	setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1403	ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1404
1405	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1406
1407	setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1408
1409	if (Subtarget.useCRBits()) {
1410	setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1411	}
1412
1413	setLibcallName(Call: RTLIB::LOG_F128, Name: "logf128");
1414	setLibcallName(Call: RTLIB::LOG2_F128, Name: "log2f128");
1415	setLibcallName(Call: RTLIB::LOG10_F128, Name: "log10f128");
1416	setLibcallName(Call: RTLIB::EXP_F128, Name: "expf128");
1417	setLibcallName(Call: RTLIB::EXP2_F128, Name: "exp2f128");
1418	setLibcallName(Call: RTLIB::SIN_F128, Name: "sinf128");
1419	setLibcallName(Call: RTLIB::COS_F128, Name: "cosf128");
1420	setLibcallName(Call: RTLIB::SINCOS_F128, Name: "sincosf128");
1421	setLibcallName(Call: RTLIB::POW_F128, Name: "powf128");
1422	setLibcallName(Call: RTLIB::FMIN_F128, Name: "fminf128");
1423	setLibcallName(Call: RTLIB::FMAX_F128, Name: "fmaxf128");
1424	setLibcallName(Call: RTLIB::REM_F128, Name: "fmodf128");
1425	setLibcallName(Call: RTLIB::SQRT_F128, Name: "sqrtf128");
1426	setLibcallName(Call: RTLIB::CEIL_F128, Name: "ceilf128");
1427	setLibcallName(Call: RTLIB::FLOOR_F128, Name: "floorf128");
1428	setLibcallName(Call: RTLIB::TRUNC_F128, Name: "truncf128");
1429	setLibcallName(Call: RTLIB::ROUND_F128, Name: "roundf128");
1430	setLibcallName(Call: RTLIB::LROUND_F128, Name: "lroundf128");
1431	setLibcallName(Call: RTLIB::LLROUND_F128, Name: "llroundf128");
1432	setLibcallName(Call: RTLIB::RINT_F128, Name: "rintf128");
1433	setLibcallName(Call: RTLIB::LRINT_F128, Name: "lrintf128");
1434	setLibcallName(Call: RTLIB::LLRINT_F128, Name: "llrintf128");
1435	setLibcallName(Call: RTLIB::NEARBYINT_F128, Name: "nearbyintf128");
1436	setLibcallName(Call: RTLIB::FMA_F128, Name: "fmaf128");
1437	setLibcallName(Call: RTLIB::FREXP_F128, Name: "frexpf128");
1438
1439	if (Subtarget.isAIXABI()) {
1440	setLibcallName(Call: RTLIB::MEMCPY, Name: isPPC64 ? "___memmove64" : "___memmove");
1441	setLibcallName(Call: RTLIB::MEMMOVE, Name: isPPC64 ? "___memmove64" : "___memmove");
1442	setLibcallName(Call: RTLIB::MEMSET, Name: isPPC64 ? "___memset64" : "___memset");
1443	setLibcallName(Call: RTLIB::BZERO, Name: isPPC64 ? "___bzero64" : "___bzero");
1444	}
1445
1446	// With 32 condition bits, we don't need to sink (and duplicate) compares
1447	// aggressively in CodeGenPrep.
1448	if (Subtarget.useCRBits()) {
1449	setHasMultipleConditionRegisters();
1450	setJumpIsExpensive();
1451	}
1452
1453	// TODO: The default entry number is set to 64. This stops most jump table
1454	// generation on PPC. But it is good for current PPC HWs because the indirect
1455	// branch instruction mtctr to the jump table may lead to bad branch predict.
1456	// Re-evaluate this value on future HWs that can do better with mtctr.
1457	setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1458
1459	setMinFunctionAlignment(Align (`4`));
1460
1461	switch (Subtarget.getCPUDirective()) {
1462	default: break;
1463	case PPC::DIR_970:
1464	case PPC::DIR_A2:
1465	case PPC::DIR_E500:
1466	case PPC::DIR_E500mc:
1467	case PPC::DIR_E5500:
1468	case PPC::DIR_PWR4:
1469	case PPC::DIR_PWR5:
1470	case PPC::DIR_PWR5X:
1471	case PPC::DIR_PWR6:
1472	case PPC::DIR_PWR6X:
1473	case PPC::DIR_PWR7:
1474	case PPC::DIR_PWR8:
1475	case PPC::DIR_PWR9:
1476	case PPC::DIR_PWR10:
1477	case PPC::DIR_PWR_FUTURE:
1478	setPrefLoopAlignment(Align (`16`));
1479	setPrefFunctionAlignment(Align (`16`));
1480	break;
1481	}
1482
1483	if (Subtarget.enableMachineScheduler())
1484	setSchedulingPreference(Sched::Source);
1485	else
1486	setSchedulingPreference(Sched::Hybrid);
1487
1488	computeRegisterProperties(STI.getRegisterInfo());
1489
1490	// The Freescale cores do better with aggressive inlining of memcpy and
1491	// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1492	if (Subtarget.getCPUDirective() == PPC::DIR_E500mc \|\|
1493	Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1494	MaxStoresPerMemset = `32`;
1495	MaxStoresPerMemsetOptSize = `16`;
1496	MaxStoresPerMemcpy = `32`;
1497	MaxStoresPerMemcpyOptSize = `8`;
1498	MaxStoresPerMemmove = `32`;
1499	MaxStoresPerMemmoveOptSize = `8`;
1500	} else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1501	// The A2 also benefits from (very) aggressive inlining of memcpy and
1502	// friends. The overhead of a the function call, even when warm, can be
1503	// over one hundred cycles.
1504	MaxStoresPerMemset = `128`;
1505	MaxStoresPerMemcpy = `128`;
1506	MaxStoresPerMemmove = `128`;
1507	MaxLoadsPerMemcmp = `128`;
1508	} else {
1509	MaxLoadsPerMemcmp = `8`;
1510	MaxLoadsPerMemcmpOptSize = `4`;
1511	}
1512
1513	IsStrictFPEnabled = true;
1514
1515	// Let the subtarget (CPU) decide if a predictable select is more expensive
1516	// than the corresponding branch. This information is used in CGP to decide
1517	// when to convert selects into branches.
1518	PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1519
1520	GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1521	}
1522
1523	// ********************************* NOTE **********************************
1524	// For selecting load and store instructions, the addressing modes are defined
1525	// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1526	// patterns to match the load the store instructions.
1527	//
1528	// The TD definitions for the addressing modes correspond to their respective
1529	// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1530	// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1531	// address mode flags of a particular node. Afterwards, the computed address
1532	// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1533	// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1534	// accordingly, based on the preferred addressing mode.
1535	//
1536	// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1537	// MemOpFlags contains all the possible flags that can be used to compute the
1538	// optimal addressing mode for load and store instructions.
1539	// AddrMode contains all the possible load and store addressing modes available
1540	// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1541	//
1542	// When adding new load and store instructions, it is possible that new address
1543	// flags may need to be added into MemOpFlags, and a new addressing mode will
1544	// need to be added to AddrMode. An entry of the new addressing mode (consisting
1545	// of the minimal and main distinguishing address flags for the new load/store
1546	// instructions) will need to be added into initializeAddrModeMap() below.
1547	// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1548	// need to be updated to account for selecting the optimal addressing mode.
1549	// *****************************************************************************
1550	/// Initialize the map that relates the different addressing modes of the load
1551	/// and store instructions to a set of flags. This ensures the load/store
1552	/// instruction is correctly matched during instruction selection.
1553	void PPCTargetLowering::initializeAddrModeMap() {
1554	AddrModesMap [PPC::AM_DForm] = {
1555	// LWZ, STW
1556	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_WordInt,
1557	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_WordInt,
1558	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1559	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1560	// LBZ, LHZ, STB, STH
1561	PPC::MOF_ZExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1562	PPC::MOF_ZExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1563	PPC::MOF_ZExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1564	PPC::MOF_ZExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1565	// LHA
1566	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16 \| PPC::MOF_SubWordInt,
1567	PPC::MOF_SExt \| PPC::MOF_RPlusLo \| PPC::MOF_SubWordInt,
1568	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_SubWordInt,
1569	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubWordInt,
1570	// LFS, LFD, STFS, STFD
1571	PPC::MOF_RPlusSImm16 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1572	PPC::MOF_RPlusLo \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1573	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1574	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetBeforeP9,
1575	};
1576	AddrModesMap [PPC::AM_DSForm] = {
1577	// LWA
1578	PPC::MOF_SExt \| PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_WordInt,
1579	PPC::MOF_SExt \| PPC::MOF_NotAddNorCst \| PPC::MOF_WordInt,
1580	PPC::MOF_SExt \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_WordInt,
1581	// LD, STD
1582	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_DoubleWordInt,
1583	PPC::MOF_NotAddNorCst \| PPC::MOF_DoubleWordInt,
1584	PPC::MOF_AddrIsSImm32 \| PPC::MOF_DoubleWordInt,
1585	// DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1586	PPC::MOF_RPlusSImm16Mult4 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1587	PPC::MOF_NotAddNorCst \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1588	PPC::MOF_AddrIsSImm32 \| PPC::MOF_ScalarFloat \| PPC::MOF_SubtargetP9,
1589	};
1590	AddrModesMap [PPC::AM_DQForm] = {
1591	// LXV, STXV
1592	PPC::MOF_RPlusSImm16Mult16 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1593	PPC::MOF_NotAddNorCst \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1594	PPC::MOF_AddrIsSImm32 \| PPC::MOF_Vector \| PPC::MOF_SubtargetP9,
1595	};
1596	AddrModesMap [PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 \|
1597	PPC::MOF_SubtargetP10};
1598	// TODO: Add mapping for quadword load/store.
1599	}
1600
1601	/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1602	/// the desired ByVal argument alignment.
1603	static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1604	if (MaxAlign == MaxMaxAlign)
1605	return;
1606	if (VectorType *VTy = dyn_cast<VectorType>(Val: Ty)) {
1607	if (MaxMaxAlign >= `32` &&
1608	VTy->getPrimitiveSizeInBits().getFixedValue() >= `256`)
1609	MaxAlign = Align (`32`);
1610	else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= `128` &&
1611	MaxAlign < `16`)
1612	MaxAlign = Align (`16`);
1613	} else if (ArrayType *ATy = dyn_cast<ArrayType>(Val: Ty)) {
1614	Align EltAlign;
1615	getMaxByValAlign(Ty: ATy->getElementType(), MaxAlign&: EltAlign, MaxMaxAlign);
1616	if (EltAlign > MaxAlign)
1617	MaxAlign = EltAlign;
1618	} else if (StructType *STy = dyn_cast<StructType>(Val: Ty)) {
1619	for (auto *EltTy : STy->elements()) {
1620	Align EltAlign;
1621	getMaxByValAlign(Ty: EltTy, MaxAlign&: EltAlign, MaxMaxAlign);
1622	if (EltAlign > MaxAlign)
1623	MaxAlign = EltAlign;
1624	if (MaxAlign == MaxMaxAlign)
1625	break;
1626	}
1627	}
1628	}
1629
1630	/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1631	/// function arguments in the caller parameter area.
1632	uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1633	const DataLayout &DL) const {
1634	// 16byte and wider vectors are passed on 16byte boundary.
1635	// The rest is 8 on PPC64 and 4 on PPC32 boundary.
1636	Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
1637	if (Subtarget.hasAltivec())
1638	getMaxByValAlign(Ty, MaxAlign&: Alignment, MaxMaxAlign: Align (`16`));
1639	return Alignment.value();
1640	}
1641
1642	bool PPCTargetLowering::useSoftFloat() const {
1643	return Subtarget.useSoftFloat();
1644	}
1645
1646	bool PPCTargetLowering::hasSPE() const {
1647	return Subtarget.hasSPE();
1648	}
1649
1650	bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1651	return VT.isScalarInteger();
1652	}
1653
1654	bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1655	Type VectorTy, unsigned* ElemSizeInBits, unsigned &Index) const {
1656	if (!Subtarget.isPPC64() \|\| !Subtarget.hasVSX())
1657	return false;
1658
1659	if (auto *VTy = dyn_cast<VectorType>(Val: VectorTy)) {
1660	if (VTy->getScalarType()->isIntegerTy()) {
1661	// ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1662	if (ElemSizeInBits == `32`) {
1663	Index = Subtarget.isLittleEndian() ? `2` : `1`;
1664	return true;
1665	}
1666	if (ElemSizeInBits == `64`) {
1667	Index = Subtarget.isLittleEndian() ? `1` : `0`;
1668	return true;
1669	}
1670	}
1671	}
1672	return false;
1673	}
1674
1675	const char PPCTargetLowering::getTargetNodeName(unsigned* Opcode) const {
1676	switch ((PPCISD::NodeType)Opcode) {
1677	case PPCISD::FIRST_NUMBER: break;
1678	case PPCISD::FSEL: return "PPCISD::FSEL";
1679	case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1680	case PPCISD::XSMINC: return "PPCISD::XSMINC";
1681	case PPCISD::FCFID: return "PPCISD::FCFID";
1682	case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1683	case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1684	case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1685	case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1686	case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1687	case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1688	case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1689	case PPCISD::FRE: return "PPCISD::FRE";
1690	case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1691	case PPCISD::FTSQRT:
1692	return "PPCISD::FTSQRT";
1693	case PPCISD::FSQRT:
1694	return "PPCISD::FSQRT";
1695	case PPCISD::STFIWX: return "PPCISD::STFIWX";
1696	case PPCISD::VPERM: return "PPCISD::VPERM";
1697	case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1698	case PPCISD::XXSPLTI_SP_TO_DP:
1699	return "PPCISD::XXSPLTI_SP_TO_DP";
1700	case PPCISD::XXSPLTI32DX:
1701	return "PPCISD::XXSPLTI32DX";
1702	case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1703	case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1704	case PPCISD::XXPERM:
1705	return "PPCISD::XXPERM";
1706	case PPCISD::VECSHL: return "PPCISD::VECSHL";
1707	case PPCISD::CMPB: return "PPCISD::CMPB";
1708	case PPCISD::Hi: return "PPCISD::Hi";
1709	case PPCISD::Lo: return "PPCISD::Lo";
1710	case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1711	case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1712	case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1713	case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1714	case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1715	case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1716	case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1717	case PPCISD::SRL: return "PPCISD::SRL";
1718	case PPCISD::SRA: return "PPCISD::SRA";
1719	case PPCISD::SHL: return "PPCISD::SHL";
1720	case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1721	case PPCISD::CALL: return "PPCISD::CALL";
1722	case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1723	case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1724	case PPCISD::CALL_RM:
1725	return "PPCISD::CALL_RM";
1726	case PPCISD::CALL_NOP_RM:
1727	return "PPCISD::CALL_NOP_RM";
1728	case PPCISD::CALL_NOTOC_RM:
1729	return "PPCISD::CALL_NOTOC_RM";
1730	case PPCISD::MTCTR: return "PPCISD::MTCTR";
1731	case PPCISD::BCTRL: return "PPCISD::BCTRL";
1732	case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1733	case PPCISD::BCTRL_RM:
1734	return "PPCISD::BCTRL_RM";
1735	case PPCISD::BCTRL_LOAD_TOC_RM:
1736	return "PPCISD::BCTRL_LOAD_TOC_RM";
1737	case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1738	case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1739	case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1740	case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1741	case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1742	case PPCISD::MFVSR: return "PPCISD::MFVSR";
1743	case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1744	case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1745	case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1746	case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1747	case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1748	return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1749	case PPCISD::ANDI_rec_1_EQ_BIT:
1750	return "PPCISD::ANDI_rec_1_EQ_BIT";
1751	case PPCISD::ANDI_rec_1_GT_BIT:
1752	return "PPCISD::ANDI_rec_1_GT_BIT";
1753	case PPCISD::VCMP: return "PPCISD::VCMP";
1754	case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1755	case PPCISD::LBRX: return "PPCISD::LBRX";
1756	case PPCISD::STBRX: return "PPCISD::STBRX";
1757	case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1758	case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1759	case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1760	case PPCISD::STXSIX: return "PPCISD::STXSIX";
1761	case PPCISD::VEXTS: return "PPCISD::VEXTS";
1762	case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1763	case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1764	case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1765	case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1766	case PPCISD::ST_VSR_SCAL_INT:
1767	return "PPCISD::ST_VSR_SCAL_INT";
1768	case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1769	case PPCISD::BDNZ: return "PPCISD::BDNZ";
1770	case PPCISD::BDZ: return "PPCISD::BDZ";
1771	case PPCISD::MFFS: return "PPCISD::MFFS";
1772	case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1773	case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1774	case PPCISD::CR6SET: return "PPCISD::CR6SET";
1775	case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1776	case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1777	case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1778	case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1779	case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1780	case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1781	case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1782	case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1783	case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1784	case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1785	case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1786	case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1787	case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1788	case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1789	case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1790	case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1791	case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1792	case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1793	case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1794	case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1795	case PPCISD::PADDI_DTPREL:
1796	return "PPCISD::PADDI_DTPREL";
1797	case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1798	case PPCISD::SC: return "PPCISD::SC";
1799	case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1800	case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1801	case PPCISD::RFEBB: return "PPCISD::RFEBB";
1802	case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1803	case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1804	case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1805	case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1806	case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1807	case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1808	case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1809	case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1810	case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1811	case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1812	return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1813	case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1814	return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1815	case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1816	case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1817	case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1818	case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1819	case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1820	case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1821	case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1822	case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1823	case PPCISD::STRICT_FADDRTZ:
1824	return "PPCISD::STRICT_FADDRTZ";
1825	case PPCISD::STRICT_FCTIDZ:
1826	return "PPCISD::STRICT_FCTIDZ";
1827	case PPCISD::STRICT_FCTIWZ:
1828	return "PPCISD::STRICT_FCTIWZ";
1829	case PPCISD::STRICT_FCTIDUZ:
1830	return "PPCISD::STRICT_FCTIDUZ";
1831	case PPCISD::STRICT_FCTIWUZ:
1832	return "PPCISD::STRICT_FCTIWUZ";
1833	case PPCISD::STRICT_FCFID:
1834	return "PPCISD::STRICT_FCFID";
1835	case PPCISD::STRICT_FCFIDU:
1836	return "PPCISD::STRICT_FCFIDU";
1837	case PPCISD::STRICT_FCFIDS:
1838	return "PPCISD::STRICT_FCFIDS";
1839	case PPCISD::STRICT_FCFIDUS:
1840	return "PPCISD::STRICT_FCFIDUS";
1841	case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1842	case PPCISD::STORE_COND:
1843	return "PPCISD::STORE_COND";
1844	}
1845	return nullptr;
1846	}
1847
1848	EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1849	EVT VT) const {
1850	if (!VT.isVector())
1851	return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1852
1853	return VT.changeVectorElementTypeToInteger();
1854	}
1855
1856	bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1857	assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1858	return true;
1859	}
1860
1861	//===----------------------------------------------------------------------===//
1862	// Node matching predicates, for use by the tblgen matching code.
1863	//===----------------------------------------------------------------------===//
1864
1865	/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1866	static bool isFloatingPointZero(SDValue Op) {
1867	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Val&: Op))
1868	return CFP->getValueAPF().isZero();
1869	else if (ISD::isEXTLoad(N: Op.getNode()) \|\| ISD::isNON_EXTLoad(N: Op.getNode())) {
1870	// Maybe this has already been legalized into the constant pool?
1871	if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Val: Op.getOperand(i: `1`)))
1872	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: CP->getConstVal()))
1873	return CFP->getValueAPF().isZero();
1874	}
1875	return false;
1876	}
1877
1878	/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1879	/// true if Op is undef or if it matches the specified value.
1880	static bool isConstantOrUndef(int Op, int Val) {
1881	return Op < `0` \|\| Op == Val;
1882	}
1883
1884	/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1885	/// VPKUHUM instruction.
1886	/// The ShuffleKind distinguishes between big-endian operations with
1887	/// two different inputs (0), either-endian operations with two identical
1888	/// inputs (1), and little-endian operations with two different inputs (2).
1889	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1890	bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1891	SelectionDAG &DAG) {
1892	bool IsLE = DAG.getDataLayout().isLittleEndian();
1893	if (ShuffleKind == `0`) {
1894	if (IsLE)
1895	return false;
1896	for (unsigned i = `0`; i != `16`; ++i)
1897	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+`1`))
1898	return false;
1899	} else if (ShuffleKind == `2`) {
1900	if (!IsLE)
1901	return false;
1902	for (unsigned i = `0`; i != `16`; ++i)
1903	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`))
1904	return false;
1905	} else if (ShuffleKind == `1`) {
1906	unsigned j = IsLE ? `0` : `1`;
1907	for (unsigned i = `0`; i != `8`; ++i)
1908	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i), Val: i*`2`+j) \|\|
1909	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j))
1910	return false;
1911	}
1912	return true;
1913	}
1914
1915	/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1916	/// VPKUWUM instruction.
1917	/// The ShuffleKind distinguishes between big-endian operations with
1918	/// two different inputs (0), either-endian operations with two identical
1919	/// inputs (1), and little-endian operations with two different inputs (2).
1920	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1921	bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1922	SelectionDAG &DAG) {
1923	bool IsLE = DAG.getDataLayout().isLittleEndian();
1924	if (ShuffleKind == `0`) {
1925	if (IsLE)
1926	return false;
1927	for (unsigned i = `0`; i != `16`; i += `2`)
1928	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`2`) \|\|
1929	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`3`))
1930	return false;
1931	} else if (ShuffleKind == `2`) {
1932	if (!IsLE)
1933	return false;
1934	for (unsigned i = `0`; i != `16`; i += `2`)
1935	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1936	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`))
1937	return false;
1938	} else if (ShuffleKind == `1`) {
1939	unsigned j = IsLE ? `0` : `2`;
1940	for (unsigned i = `0`; i != `8`; i += `2`)
1941	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1942	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1943	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1944	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`))
1945	return false;
1946	}
1947	return true;
1948	}
1949
1950	/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1951	/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1952	/// current subtarget.
1953	///
1954	/// The ShuffleKind distinguishes between big-endian operations with
1955	/// two different inputs (0), either-endian operations with two identical
1956	/// inputs (1), and little-endian operations with two different inputs (2).
1957	/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1958	bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode N, unsigned* ShuffleKind,
1959	SelectionDAG &DAG) {
1960	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1961	if (!Subtarget.hasP8Vector())
1962	return false;
1963
1964	bool IsLE = DAG.getDataLayout().isLittleEndian();
1965	if (ShuffleKind == `0`) {
1966	if (IsLE)
1967	return false;
1968	for (unsigned i = `0`; i != `16`; i += `4`)
1969	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+`4`) \|\|
1970	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`5`) \|\|
1971	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`6`) \|\|
1972	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`7`))
1973	return false;
1974	} else if (ShuffleKind == `2`) {
1975	if (!IsLE)
1976	return false;
1977	for (unsigned i = `0`; i != `16`; i += `4`)
1978	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`) \|\|
1979	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+`1`) \|\|
1980	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+`2`) \|\|
1981	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+`3`))
1982	return false;
1983	} else if (ShuffleKind == `1`) {
1984	unsigned j = IsLE ? `0` : `4`;
1985	for (unsigned i = `0`; i != `8`; i += `4`)
1986	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i ), Val: i*`2`+j) \|\|
1987	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`1`), Val: i*`2`+j+`1`) \|\|
1988	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`2`), Val: i*`2`+j+`2`) \|\|
1989	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`3`), Val: i*`2`+j+`3`) \|\|
1990	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`8`), Val: i*`2`+j) \|\|
1991	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`9`), Val: i*`2`+j+`1`) \|\|
1992	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`10`), Val: i*`2`+j+`2`) \|\|
1993	!isConstantOrUndef(Op: N->getMaskElt(Idx: i+`11`), Val: i*`2`+j+`3`))
1994	return false;
1995	}
1996	return true;
1997	}
1998
1999	/// isVMerge - Common function, used to match vmrg shuffles.*
2000	///
2001	static bool isVMerge(ShuffleVectorSDNode N, unsigned* UnitSize,
2002	unsigned LHSStart, unsigned RHSStart) {
2003	if (N->getValueType(`0`) != MVT::v16i8)
2004	return false;
2005	assert((UnitSize == `1` \|\| UnitSize == `2` \|\| UnitSize == `4`) &&
2006	"Unsupported merge size!");
2007
2008	for (unsigned i = `0`; i != `8`/UnitSize; ++i) // Step over units
2009	for (unsigned j = `0`; j != UnitSize; ++j) { // Step over bytes within unit
2010	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+j),
2011	Val: LHSStart+j+i*UnitSize) \|\|
2012	!isConstantOrUndef(Op: N->getMaskElt(Idx: iUnitSize`2`+UnitSize+j),
2013	Val: RHSStart+j+i*UnitSize))
2014	return false;
2015	}
2016	return true;
2017	}
2018
2019	/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2020	/// a VMRGL instruction with the specified unit size (1,2 or 4 bytes).*
2021	/// The ShuffleKind distinguishes between big-endian merges with two
2022	/// different inputs (0), either-endian merges with two identical inputs (1),
2023	/// and little-endian merges with two different inputs (2). For the latter,
2024	/// the input operands are swapped (see PPCInstrAltivec.td).
2025	bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
2026	unsigned ShuffleKind, SelectionDAG &DAG) {
2027	if (DAG.getDataLayout().isLittleEndian()) {
2028	if (ShuffleKind == `1`) // unary
2029	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
2030	else if (ShuffleKind == `2`) // swapped
2031	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
2032	else
2033	return false;
2034	} else {
2035	if (ShuffleKind == `1`) // unary
2036	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
2037	else if (ShuffleKind == `0`) // normal
2038	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
2039	else
2040	return false;
2041	}
2042	}
2043
2044	/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2045	/// a VMRGH instruction with the specified unit size (1,2 or 4 bytes).*
2046	/// The ShuffleKind distinguishes between big-endian merges with two
2047	/// different inputs (0), either-endian merges with two identical inputs (1),
2048	/// and little-endian merges with two different inputs (2). For the latter,
2049	/// the input operands are swapped (see PPCInstrAltivec.td).
2050	bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode N, unsigned* UnitSize,
2051	unsigned ShuffleKind, SelectionDAG &DAG) {
2052	if (DAG.getDataLayout().isLittleEndian()) {
2053	if (ShuffleKind == `1`) // unary
2054	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `8`);
2055	else if (ShuffleKind == `2`) // swapped
2056	return isVMerge(N, UnitSize, LHSStart: `8`, RHSStart: `24`);
2057	else
2058	return false;
2059	} else {
2060	if (ShuffleKind == `1`) // unary
2061	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `0`);
2062	else if (ShuffleKind == `0`) // normal
2063	return isVMerge(N, UnitSize, LHSStart: `0`, RHSStart: `16`);
2064	else
2065	return false;
2066	}
2067	}
2068
2069	/**
2070	* Common function used to match vmrgew and vmrgow shuffles
2071	*
2072	* The indexOffset determines whether to look for even or odd words in
2073	* the shuffle mask. This is based on the of the endianness of the target
2074	* machine.
2075	* - Little Endian:
2076	* - Use offset of 0 to check for odd elements
2077	* - Use offset of 4 to check for even elements
2078	* - Big Endian:
2079	* - Use offset of 0 to check for even elements
2080	* - Use offset of 4 to check for odd elements
2081	* A detailed description of the vector element ordering for little endian and
2082	* big endian can be found at
2083	* http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2084	* Targeting your applications - what little endian and big endian IBM XL C/C++
2085	* compiler differences mean to you
2086	*
2087	* The mask to the shuffle vector instruction specifies the indices of the
2088	* elements from the two input vectors to place in the result. The elements are
2089	* numbered in array-access order, starting with the first vector. These vectors
2090	* are always of type v16i8, thus each vector will contain 16 elements of size
2091	* 8. More info on the shuffle vector can be found in the
2092	* http://llvm.org/docs/LangRef.html#shufflevector-instruction
2093	* Language Reference.
2094	*
2095	* The RHSStartValue indicates whether the same input vectors are used (unary)
2096	* or two different input vectors are used, based on the following:
2097	* - If the instruction uses the same vector for both inputs, the range of the
2098	* indices will be 0 to 15. In this case, the RHSStart value passed should
2099	* be 0.
2100	* - If the instruction has two different vectors then the range of the
2101	* indices will be 0 to 31. In this case, the RHSStart value passed should
2102	* be 16 (indices 0-15 specify elements in the first vector while indices 16
2103	* to 31 specify elements in the second vector).
2104	*
2105	* \param[in] N The shuffle vector SD Node to analyze
2106	* \param[in] IndexOffset Specifies whether to look for even or odd elements
2107	* \param[in] RHSStartValue Specifies the starting index for the righthand input
2108	* vector to the shuffle_vector instruction
2109	* \return true iff this shuffle vector represents an even or odd word merge
2110	*/
2111	static bool isVMerge(ShuffleVectorSDNode N, unsigned* IndexOffset,
2112	unsigned RHSStartValue) {
2113	if (N->getValueType(`0`) != MVT::v16i8)
2114	return false;
2115
2116	for (unsigned i = `0`; i < `2`; ++i)
2117	for (unsigned j = `0`; j < `4`; ++j)
2118	if (!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j),
2119	Val: i*RHSStartValue+j+IndexOffset) \|\|
2120	!isConstantOrUndef(Op: N->getMaskElt(Idx: i*`4`+j+`8`),
2121	Val: i*RHSStartValue+j+IndexOffset+`8`))
2122	return false;
2123	return true;
2124	}
2125
2126	/**
2127	* Determine if the specified shuffle mask is suitable for the vmrgew or
2128	* vmrgow instructions.
2129	*
2130	* \param[in] N The shuffle vector SD Node to analyze
2131	* \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2132	* \param[in] ShuffleKind Identify the type of merge:
2133	* - 0 = big-endian merge with two different inputs;
2134	* - 1 = either-endian merge with two identical inputs;
2135	* - 2 = little-endian merge with two different inputs (inputs are swapped for
2136	* little-endian merges).
2137	* \param[in] DAG The current SelectionDAG
2138	* \return true iff this shuffle mask
2139	*/
2140	bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode N, bool* CheckEven,
2141	unsigned ShuffleKind, SelectionDAG &DAG) {
2142	if (DAG.getDataLayout().isLittleEndian()) {
2143	unsigned indexOffset = CheckEven ? `4` : `0`;
2144	if (ShuffleKind == `1`) // Unary
2145	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
2146	else if (ShuffleKind == `2`) // swapped
2147	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
2148	else
2149	return false;
2150	}
2151	else {
2152	unsigned indexOffset = CheckEven ? `0` : `4`;
2153	if (ShuffleKind == `1`) // Unary
2154	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `0`);
2155	else if (ShuffleKind == `0`) // Normal
2156	return isVMerge(N, IndexOffset: indexOffset, RHSStartValue: `16`);
2157	else
2158	return false;
2159	}
2160	return false;
2161	}
2162
2163	/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2164	/// amount, otherwise return -1.
2165	/// The ShuffleKind distinguishes between big-endian operations with two
2166	/// different inputs (0), either-endian operations with two identical inputs
2167	/// (1), and little-endian operations with two different inputs (2). For the
2168	/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2169	int PPC::isVSLDOIShuffleMask(SDNode N, unsigned* ShuffleKind,
2170	SelectionDAG &DAG) {
2171	if (N->getValueType(`0`) != MVT::v16i8)
2172	return -`1`;
2173
2174	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2175
2176	// Find the first non-undef value in the shuffle mask.
2177	unsigned i;
2178	for (i = `0`; i != `16` && SVOp->getMaskElt(Idx: i) < `0`; ++i)
2179	/search/;
2180
2181	if (i == `16`) return -`1`; // all undef.
2182
2183	// Otherwise, check to see if the rest of the elements are consecutively
2184	// numbered from this value.
2185	unsigned ShiftAmt = SVOp->getMaskElt(Idx: i);
2186	if (ShiftAmt < i) return -`1`;
2187
2188	ShiftAmt -= i;
2189	bool isLE = DAG.getDataLayout().isLittleEndian();
2190
2191	if ((ShuffleKind == `0` && !isLE) \|\| (ShuffleKind == `2` && isLE)) {
2192	// Check the rest of the elements to see if they are consecutive.
2193	for (++i; i != `16`; ++i)
2194	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: ShiftAmt+i))
2195	return -`1`;
2196	} else if (ShuffleKind == `1`) {
2197	// Check the rest of the elements to see if they are consecutive.
2198	for (++i; i != `16`; ++i)
2199	if (!isConstantOrUndef(Op: SVOp->getMaskElt(Idx: i), Val: (ShiftAmt+i) & `15`))
2200	return -`1`;
2201	} else
2202	return -`1`;
2203
2204	if (isLE)
2205	ShiftAmt = `16` - ShiftAmt;
2206
2207	return ShiftAmt;
2208	}
2209
2210	/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2211	/// specifies a splat of a single element that is suitable for input to
2212	/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2213	bool PPC::isSplatShuffleMask(ShuffleVectorSDNode N, unsigned* EltSize) {
2214	EVT VT = N->getValueType(ResNo: `0`);
2215	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2216	return EltSize == `8` && N->getMaskElt(Idx: `0`) == N->getMaskElt(Idx: `1`);
2217
2218	assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2219	EltSize <= `8` && "Can only handle 1,2,4,8 byte element sizes");
2220
2221	// The consecutive indices need to specify an element, not part of two
2222	// different elements. So abandon ship early if this isn't the case.
2223	if (N->getMaskElt(Idx: `0`) % EltSize != `0`)
2224	return false;
2225
2226	// This is a splat operation if each element of the permute is the same, and
2227	// if the value doesn't reference the second vector.
2228	unsigned ElementBase = N->getMaskElt(Idx: `0`);
2229
2230	// FIXME: Handle UNDEF elements too!
2231	if (ElementBase >= `16`)
2232	return false;
2233
2234	// Check that the indices are consecutive, in the case of a multi-byte element
2235	// splatted with a v16i8 mask.
2236	for (unsigned i = `1`; i != EltSize; ++i)
2237	if (N->getMaskElt(Idx: i) < `0` \|\| N->getMaskElt(Idx: i) != (int)(i+ElementBase))
2238	return false;
2239
2240	for (unsigned i = EltSize, e = `16`; i != e; i += EltSize) {
2241	if (N->getMaskElt(Idx: i) < `0`) continue;
2242	for (unsigned j = `0`; j != EltSize; ++j)
2243	if (N->getMaskElt(Idx: i+j) != N->getMaskElt(Idx: j))
2244	return false;
2245	}
2246	return true;
2247	}
2248
2249	/// Check that the mask is shuffling N byte elements. Within each N byte
2250	/// element of the mask, the indices could be either in increasing or
2251	/// decreasing order as long as they are consecutive.
2252	/// \param[in] N the shuffle vector SD Node to analyze
2253	/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2254	/// Word/DoubleWord/QuadWord).
2255	/// \param[in] StepLen the delta indices number among the N byte element, if
2256	/// the mask is in increasing/decreasing order then it is 1/-1.
2257	/// \return true iff the mask is shuffling N byte elements.
2258	static bool isNByteElemShuffleMask(ShuffleVectorSDNode N, unsigned* Width,
2259	int StepLen) {
2260	assert((Width == `2` \|\| Width == `4` \|\| Width == `8` \|\| Width == `16`) &&
2261	"Unexpected element width.");
2262	assert((StepLen == `1` \|\| StepLen == -`1`) && "Unexpected element width.");
2263
2264	unsigned NumOfElem = `16` / Width;
2265	unsigned MaskVal[`16`]; // Width is never greater than 16
2266	for (unsigned i = `0`; i < NumOfElem; ++i) {
2267	MaskVal[`0`] = N->getMaskElt(Idx: i * Width);
2268	if ((StepLen == `1`) && (MaskVal[`0`] % Width)) {
2269	return false;
2270	} else if ((StepLen == -`1`) && ((MaskVal[`0`] + `1`) % Width)) {
2271	return false;
2272	}
2273
2274	for (unsigned int j = `1`; j < Width; ++j) {
2275	MaskVal[j] = N->getMaskElt(Idx: i * Width + j);
2276	if (MaskVal[j] != MaskVal[j-`1`] + StepLen) {
2277	return false;
2278	}
2279	}
2280	}
2281
2282	return true;
2283	}
2284
2285	bool PPC::isXXINSERTWMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2286	unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2287	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2288	return false;
2289
2290	// Now we look at mask elements 0,4,8,12
2291	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2292	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2293	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2294	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2295	unsigned LittleEndianShifts[] = { `2`, `1`, `0`, `3` };
2296	unsigned BigEndianShifts[] = { `3`, `0`, `1`, `2` };
2297
2298	// Below, let H and L be arbitrary elements of the shuffle mask
2299	// where H is in the range [4,7] and L is in the range [0,3].
2300	// H, 1, 2, 3 or L, 5, 6, 7
2301	if ((M0 > `3` && M1 == `1` && M2 == `2` && M3 == `3`) \|\|
2302	(M0 < `4` && M1 == `5` && M2 == `6` && M3 == `7`)) {
2303	ShiftElts = IsLE ? LittleEndianShifts[M0 & `0x3`] : BigEndianShifts[M0 & `0x3`];
2304	InsertAtByte = IsLE ? `12` : `0`;
2305	Swap = M0 < `4`;
2306	return true;
2307	}
2308	// 0, H, 2, 3 or 4, L, 6, 7
2309	if ((M1 > `3` && M0 == `0` && M2 == `2` && M3 == `3`) \|\|
2310	(M1 < `4` && M0 == `4` && M2 == `6` && M3 == `7`)) {
2311	ShiftElts = IsLE ? LittleEndianShifts[M1 & `0x3`] : BigEndianShifts[M1 & `0x3`];
2312	InsertAtByte = IsLE ? `8` : `4`;
2313	Swap = M1 < `4`;
2314	return true;
2315	}
2316	// 0, 1, H, 3 or 4, 5, L, 7
2317	if ((M2 > `3` && M0 == `0` && M1 == `1` && M3 == `3`) \|\|
2318	(M2 < `4` && M0 == `4` && M1 == `5` && M3 == `7`)) {
2319	ShiftElts = IsLE ? LittleEndianShifts[M2 & `0x3`] : BigEndianShifts[M2 & `0x3`];
2320	InsertAtByte = IsLE ? `4` : `8`;
2321	Swap = M2 < `4`;
2322	return true;
2323	}
2324	// 0, 1, 2, H or 4, 5, 6, L
2325	if ((M3 > `3` && M0 == `0` && M1 == `1` && M2 == `2`) \|\|
2326	(M3 < `4` && M0 == `4` && M1 == `5` && M2 == `6`)) {
2327	ShiftElts = IsLE ? LittleEndianShifts[M3 & `0x3`] : BigEndianShifts[M3 & `0x3`];
2328	InsertAtByte = IsLE ? `0` : `12`;
2329	Swap = M3 < `4`;
2330	return true;
2331	}
2332
2333	// If both vector operands for the shuffle are the same vector, the mask will
2334	// contain only elements from the first one and the second one will be undef.
2335	if (N->getOperand(Num: `1`).isUndef()) {
2336	ShiftElts = `0`;
2337	Swap = true;
2338	unsigned XXINSERTWSrcElem = IsLE ? `2` : `1`;
2339	if (M0 == XXINSERTWSrcElem && M1 == `1` && M2 == `2` && M3 == `3`) {
2340	InsertAtByte = IsLE ? `12` : `0`;
2341	return true;
2342	}
2343	if (M0 == `0` && M1 == XXINSERTWSrcElem && M2 == `2` && M3 == `3`) {
2344	InsertAtByte = IsLE ? `8` : `4`;
2345	return true;
2346	}
2347	if (M0 == `0` && M1 == `1` && M2 == XXINSERTWSrcElem && M3 == `3`) {
2348	InsertAtByte = IsLE ? `4` : `8`;
2349	return true;
2350	}
2351	if (M0 == `0` && M1 == `1` && M2 == `2` && M3 == XXINSERTWSrcElem) {
2352	InsertAtByte = IsLE ? `0` : `12`;
2353	return true;
2354	}
2355	}
2356
2357	return false;
2358	}
2359
2360	bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode N, unsigned* &ShiftElts,
2361	bool &Swap, bool IsLE) {
2362	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2363	// Ensure each byte index of the word is consecutive.
2364	if (!isNByteElemShuffleMask(N, Width: `4`, StepLen: `1`))
2365	return false;
2366
2367	// Now we look at mask elements 0,4,8,12, which are the beginning of words.
2368	unsigned M0 = N->getMaskElt(Idx: `0`) / `4`;
2369	unsigned M1 = N->getMaskElt(Idx: `4`) / `4`;
2370	unsigned M2 = N->getMaskElt(Idx: `8`) / `4`;
2371	unsigned M3 = N->getMaskElt(Idx: `12`) / `4`;
2372
2373	// If both vector operands for the shuffle are the same vector, the mask will
2374	// contain only elements from the first one and the second one will be undef.
2375	if (N->getOperand(Num: `1`).isUndef()) {
2376	assert(M0 < `4` && "Indexing into an undef vector?");
2377	if (M1 != (M0 + `1`) % `4` \|\| M2 != (M1 + `1`) % `4` \|\| M3 != (M2 + `1`) % `4`)
2378	return false;
2379
2380	ShiftElts = IsLE ? (`4` - M0) % `4` : M0;
2381	Swap = false;
2382	return true;
2383	}
2384
2385	// Ensure each word index of the ShuffleVector Mask is consecutive.
2386	if (M1 != (M0 + `1`) % `8` \|\| M2 != (M1 + `1`) % `8` \|\| M3 != (M2 + `1`) % `8`)
2387	return false;
2388
2389	if (IsLE) {
2390	if (M0 == `0` \|\| M0 == `7` \|\| M0 == `6` \|\| M0 == `5`) {
2391	// Input vectors don't need to be swapped if the leading element
2392	// of the result is one of the 3 left elements of the second vector
2393	// (or if there is no shift to be done at all).
2394	Swap = false;
2395	ShiftElts = (`8` - M0) % `8`;
2396	} else if (M0 == `4` \|\| M0 == `3` \|\| M0 == `2` \|\| M0 == `1`) {
2397	// Input vectors need to be swapped if the leading element
2398	// of the result is one of the 3 left elements of the first vector
2399	// (or if we're shifting by 4 - thereby simply swapping the vectors).
2400	Swap = true;
2401	ShiftElts = (`4` - M0) % `4`;
2402	}
2403
2404	return true;
2405	} else { // BE
2406	if (M0 == `0` \|\| M0 == `1` \|\| M0 == `2` \|\| M0 == `3`) {
2407	// Input vectors don't need to be swapped if the leading element
2408	// of the result is one of the 4 elements of the first vector.
2409	Swap = false;
2410	ShiftElts = M0;
2411	} else if (M0 == `4` \|\| M0 == `5` \|\| M0 == `6` \|\| M0 == `7`) {
2412	// Input vectors need to be swapped if the leading element
2413	// of the result is one of the 4 elements of the right vector.
2414	Swap = true;
2415	ShiftElts = M0 - `4`;
2416	}
2417
2418	return true;
2419	}
2420	}
2421
2422	bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode N, int* Width) {
2423	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2424
2425	if (!isNByteElemShuffleMask(N, Width, StepLen: -`1`))
2426	return false;
2427
2428	for (int i = `0`; i < `16`; i += Width)
2429	if (N->getMaskElt(Idx: i) != i + Width - `1`)
2430	return false;
2431
2432	return true;
2433	}
2434
2435	bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2436	return isXXBRShuffleMaskHelper(N, Width: `2`);
2437	}
2438
2439	bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2440	return isXXBRShuffleMaskHelper(N, Width: `4`);
2441	}
2442
2443	bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2444	return isXXBRShuffleMaskHelper(N, Width: `8`);
2445	}
2446
2447	bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2448	return isXXBRShuffleMaskHelper(N, Width: `16`);
2449	}
2450
2451	/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2452	/// if the inputs to the instruction should be swapped and set \p DM to the
2453	/// value for the immediate.
2454	/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2455	/// AND element 0 of the result comes from the first input (LE) or second input
2456	/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2457	/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2458	/// mask.
2459	bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode N, unsigned* &DM,
2460	bool &Swap, bool IsLE) {
2461	assert(N->getValueType(`0`) == MVT::v16i8 && "Shuffle vector expects v16i8");
2462
2463	// Ensure each byte index of the double word is consecutive.
2464	if (!isNByteElemShuffleMask(N, Width: `8`, StepLen: `1`))
2465	return false;
2466
2467	unsigned M0 = N->getMaskElt(Idx: `0`) / `8`;
2468	unsigned M1 = N->getMaskElt(Idx: `8`) / `8`;
2469	assert(((M0 \| M1) < `4`) && "A mask element out of bounds?");
2470
2471	// If both vector operands for the shuffle are the same vector, the mask will
2472	// contain only elements from the first one and the second one will be undef.
2473	if (N->getOperand(Num: `1`).isUndef()) {
2474	if ((M0 \| M1) < `2`) {
2475	DM = IsLE ? (((~M1) & `1`) << `1`) + ((~M0) & `1`) : (M0 << `1`) + (M1 & `1`);
2476	Swap = false;
2477	return true;
2478	} else
2479	return false;
2480	}
2481
2482	if (IsLE) {
2483	if (M0 > `1` && M1 < `2`) {
2484	Swap = false;
2485	} else if (M0 < `2` && M1 > `1`) {
2486	M0 = (M0 + `2`) % `4`;
2487	M1 = (M1 + `2`) % `4`;
2488	Swap = true;
2489	} else
2490	return false;
2491
2492	// Note: if control flow comes here that means Swap is already set above
2493	DM = (((~M1) & `1`) << `1`) + ((~M0) & `1`);
2494	return true;
2495	} else { // BE
2496	if (M0 < `2` && M1 > `1`) {
2497	Swap = false;
2498	} else if (M0 > `1` && M1 < `2`) {
2499	M0 = (M0 + `2`) % `4`;
2500	M1 = (M1 + `2`) % `4`;
2501	Swap = true;
2502	} else
2503	return false;
2504
2505	// Note: if control flow comes here that means Swap is already set above
2506	DM = (M0 << `1`) + (M1 & `1`);
2507	return true;
2508	}
2509	}
2510
2511
2512	/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2513	/// appropriate for PPC mnemonics (which have a big endian bias - namely
2514	/// elements are counted from the left of the vector register).
2515	unsigned PPC::getSplatIdxForPPCMnemonics(SDNode N, unsigned* EltSize,
2516	SelectionDAG &DAG) {
2517	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val: N);
2518	assert(isSplatShuffleMask(SVOp, EltSize));
2519	EVT VT = SVOp->getValueType(ResNo: `0`);
2520
2521	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
2522	return DAG.getDataLayout().isLittleEndian() ? `1` - SVOp->getMaskElt(Idx: `0`)
2523	: SVOp->getMaskElt(Idx: `0`);
2524
2525	if (DAG.getDataLayout().isLittleEndian())
2526	return (`16` / EltSize) - `1` - (SVOp->getMaskElt(Idx: `0`) / EltSize);
2527	else
2528	return SVOp->getMaskElt(Idx: `0`) / EltSize;
2529	}
2530
2531	/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2532	/// by using a vspltis[bhw] instruction of the specified element size, return
2533	/// the constant being splatted. The ByteSize field indicates the number of
2534	/// bytes of each element [124] -> [bhw].
2535	SDValue PPC::get_VSPLTI_elt(SDNode N, unsigned* ByteSize, SelectionDAG &DAG) {
2536	SDValue OpVal;
2537
2538	// If ByteSize of the splat is bigger than the element size of the
2539	// build_vector, then we have a case where we are checking for a splat where
2540	// multiple elements of the buildvector are folded together into a single
2541	// logical element of the splat (e.g. "vsplish 1" to splat {0,1}8).*
2542	unsigned EltSize = `16`/N->getNumOperands();
2543	if (EltSize < ByteSize) {
2544	unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2545	SDValue UniquedVals[`4`];
2546	assert(Multiple > `1` && Multiple <= `4` && "How can this happen?");
2547
2548	// See if all of the elements in the buildvector agree across.
2549	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2550	if (N->getOperand(Num: i).isUndef()) continue;
2551	// If the element isn't a constant, bail fully out.
2552	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: i))) return SDValue ();
2553
2554	if (!UniquedVals[i&(Multiple-`1`)].getNode())
2555	UniquedVals[i&(Multiple-`1`)] = N->getOperand(Num: i);
2556	else if (UniquedVals[i&(Multiple-`1`)] != N->getOperand(Num: i))
2557	return SDValue (); // no match.
2558	}
2559
2560	// Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2561	// either constant or undef values that are identical for each chunk. See
2562	// if these chunks can form into a larger vspltis.*
2563
2564	// Check to see if all of the leading entries are either 0 or -1. If
2565	// neither, then this won't fit into the immediate field.
2566	bool LeadingZero = true;
2567	bool LeadingOnes = true;
2568	for (unsigned i = `0`; i != Multiple-`1`; ++i) {
2569	if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2570
2571	LeadingZero &= isNullConstant(V: UniquedVals[i]);
2572	LeadingOnes &= isAllOnesConstant(V: UniquedVals[i]);
2573	}
2574	// Finally, check the least significant entry.
2575	if (LeadingZero) {
2576	if (!UniquedVals[Multiple-`1`].getNode())
2577	return DAG.getTargetConstant(`0`, SDLoc(N), MVT::i32); // 0,0,0,undef
2578	int Val = UniquedVals[Multiple - `1`]->getAsZExtVal();
2579	if (Val < `16`) // 0,0,0,4 -> vspltisw(4)
2580	return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2581	}
2582	if (LeadingOnes) {
2583	if (!UniquedVals[Multiple-`1`].getNode())
2584	return DAG.getTargetConstant(~`0U`, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2585	int Val =cast<ConstantSDNode>(Val&: UniquedVals[Multiple-`1`])->getSExtValue();
2586	if (Val >= -`16`) // -1,-1,-1,-2 -> vspltisw(-2)
2587	return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2588	}
2589
2590	return SDValue ();
2591	}
2592
2593	// Check to see if this buildvec has a single non-undef value in its elements.
2594	for (unsigned i = `0`, e = N->getNumOperands(); i != e; ++i) {
2595	if (N->getOperand(Num: i).isUndef()) continue;
2596	if (!OpVal.getNode())
2597	OpVal = N->getOperand(Num: i);
2598	else if (OpVal != N->getOperand(Num: i))
2599	return SDValue ();
2600	}
2601
2602	if (!OpVal.getNode()) return SDValue (); // All UNDEF: use implicit def.
2603
2604	unsigned ValSizeInBytes = EltSize;
2605	uint64_t Value = `0`;
2606	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: OpVal)) {
2607	Value = CN->getZExtValue();
2608	} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Val&: OpVal)) {
2609	assert(CN->getValueType(`0`) == MVT::f32 && "Only one legal FP vector type!");
2610	Value = llvm::bit_cast<uint32_t>(from: CN->getValueAPF().convertToFloat());
2611	}
2612
2613	// If the splat value is larger than the element value, then we can never do
2614	// this splat. The only case that we could fit the replicated bits into our
2615	// immediate field for would be zero, and we prefer to use vxor for it.
2616	if (ValSizeInBytes < ByteSize) return SDValue ();
2617
2618	// If the element value is larger than the splat value, check if it consists
2619	// of a repeated bit pattern of size ByteSize.
2620	if (!APInt (ValSizeInBytes * `8`, Value).isSplat(SplatSizeInBits: ByteSize * `8`))
2621	return SDValue ();
2622
2623	// Properly sign extend the value.
2624	int MaskVal = SignExtend32(X: Value, B: ByteSize * `8`);
2625
2626	// If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2627	if (MaskVal == `0`) return SDValue ();
2628
2629	// Finally, if this value fits in a 5 bit sext field, return it
2630	if (SignExtend32<`5`>(MaskVal) == MaskVal)
2631	return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2632	return SDValue ();
2633	}
2634
2635	//===----------------------------------------------------------------------===//
2636	// Addressing Mode Selection
2637	//===----------------------------------------------------------------------===//
2638
2639	/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2640	/// or 64-bit immediate, and if the value can be accurately represented as a
2641	/// sign extension from a 16-bit value. If so, this returns true and the
2642	/// immediate.
2643	bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2644	if (!isa<ConstantSDNode>(Val: N))
2645	return false;
2646
2647	Imm = (int16_t)N->getAsZExtVal();
2648	if (N->getValueType(`0`) == MVT::i32)
2649	return Imm == (int32_t)N->getAsZExtVal();
2650	else
2651	return Imm == (int64_t)N->getAsZExtVal();
2652	}
2653	bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2654	return isIntS16Immediate(N: Op.getNode(), Imm);
2655	}
2656
2657	/// Used when computing address flags for selecting loads and stores.
2658	/// If we have an OR, check if the LHS and RHS are provably disjoint.
2659	/// An OR of two provably disjoint values is equivalent to an ADD.
2660	/// Most PPC load/store instructions compute the effective address as a sum,
2661	/// so doing this conversion is useful.
2662	static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2663	if (N.getOpcode() != ISD::OR)
2664	return false;
2665	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2666	if (!LHSKnown.Zero.getBoolValue())
2667	return false;
2668	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2669	return (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`);
2670	}
2671
2672	/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2673	/// be represented as an indexed [r+r] operation.
2674	bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2675	SDValue &Index,
2676	SelectionDAG &DAG) const {
2677	for (SDNode *U : N ->uses()) {
2678	if (MemSDNode *Memop = dyn_cast<MemSDNode>(Val: U)) {
2679	if (Memop->getMemoryVT() == MVT::f64) {
2680	Base = N.getOperand(i: `0`);
2681	Index = N.getOperand(i: `1`);
2682	return true;
2683	}
2684	}
2685	}
2686	return false;
2687	}
2688
2689	/// isIntS34Immediate - This method tests if value of node given can be
2690	/// accurately represented as a sign extension from a 34-bit value. If so,
2691	/// this returns true and the immediate.
2692	bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2693	if (!isa<ConstantSDNode>(Val: N))
2694	return false;
2695
2696	Imm = (int64_t)N->getAsZExtVal();
2697	return isInt<`34`>(x: Imm);
2698	}
2699	bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2700	return isIntS34Immediate(N: Op.getNode(), Imm);
2701	}
2702
2703	/// SelectAddressRegReg - Given the specified addressed, check to see if it
2704	/// can be represented as an indexed [r+r] operation. Returns false if it
2705	/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2706	/// non-zero and N can be represented by a base register plus a signed 16-bit
2707	/// displacement, make a more precise judgement by checking (displacement % \p
2708	/// EncodingAlignment).
2709	bool PPCTargetLowering::SelectAddressRegReg(
2710	SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2711	MaybeAlign EncodingAlignment) const {
2712	// If we have a PC Relative target flag don't select as [reg+reg]. It will be
2713	// a [pc+imm].
2714	if (SelectAddressPCRel(N, Base))
2715	return false;
2716
2717	int16_t Imm = `0`;
2718	if (N.getOpcode() == ISD::ADD) {
2719	// Is there any SPE load/store (f64), which can't handle 16bit offset?
2720	// SPE load/store can only handle 8-bit offsets.
2721	if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2722	return true;
2723	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2724	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2725	return false; // r+i
2726	if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo)
2727	return false; // r+i
2728
2729	Base = N.getOperand(i: `0`);
2730	Index = N.getOperand(i: `1`);
2731	return true;
2732	} else if (N.getOpcode() == ISD::OR) {
2733	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm) &&
2734	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm)))
2735	return false; // r+i can fold it if we can.
2736
2737	// If this is an or of disjoint bitfields, we can codegen this as an add
2738	// (for better address arithmetic) if the LHS and RHS of the OR are provably
2739	// disjoint.
2740	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2741
2742	if (LHSKnown.Zero.getBoolValue()) {
2743	KnownBits RHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `1`));
2744	// If all of the bits are known zero on the LHS or RHS, the add won't
2745	// carry.
2746	if (~(LHSKnown.Zero \| RHSKnown.Zero) == `0`) {
2747	Base = N.getOperand(i: `0`);
2748	Index = N.getOperand(i: `1`);
2749	return true;
2750	}
2751	}
2752	}
2753
2754	return false;
2755	}
2756
2757	// If we happen to be doing an i64 load or store into a stack slot that has
2758	// less than a 4-byte alignment, then the frame-index elimination may need to
2759	// use an indexed load or store instruction (because the offset may not be a
2760	// multiple of 4). The extra register needed to hold the offset comes from the
2761	// register scavenger, and it is possible that the scavenger will need to use
2762	// an emergency spill slot. As a result, we need to make sure that a spill slot
2763	// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2764	// stack slot.
2765	static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2766	// FIXME: This does not handle the LWA case.
2767	if (VT != MVT::i64)
2768	return;
2769
2770	// NOTE: We'll exclude negative FIs here, which come from argument
2771	// lowering, because there are no known test cases triggering this problem
2772	// using packed structures (or similar). We can remove this exclusion if
2773	// we find such a test case. The reason why this is so test-case driven is
2774	// because this entire 'fixup' is only to prevent crashes (from the
2775	// register scavenger) on not-really-valid inputs. For example, if we have:
2776	// %a = alloca i1
2777	// %b = bitcast i1* %a to i64*
2778	// store i64 a, i64 b*
2779	// then the store should really be marked as 'align 1', but is not. If it
2780	// were marked as 'align 1' then the indexed form would have been
2781	// instruction-selected initially, and the problem this 'fixup' is preventing
2782	// won't happen regardless.
2783	if (FrameIdx < `0`)
2784	return;
2785
2786	MachineFunction &MF = DAG.getMachineFunction();
2787	MachineFrameInfo &MFI = MF.getFrameInfo();
2788
2789	if (MFI.getObjectAlign(ObjectIdx: FrameIdx) >= Align (`4`))
2790	return;
2791
2792	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2793	FuncInfo->setHasNonRISpills();
2794	}
2795
2796	/// Returns true if the address N can be represented by a base register plus
2797	/// a signed 16-bit displacement [r+imm], and if it is not better
2798	/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2799	/// displacements that are multiples of that value.
2800	bool PPCTargetLowering::SelectAddressRegImm(
2801	SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2802	MaybeAlign EncodingAlignment) const {
2803	// FIXME dl should come from parent load or store, not from address
2804	SDLoc dl(N);
2805
2806	// If we have a PC Relative target flag don't select as [reg+imm]. It will be
2807	// a [pc+imm].
2808	if (SelectAddressPCRel(N, Base))
2809	return false;
2810
2811	// If this can be more profitably realized as r+r, fail.
2812	if (SelectAddressRegReg(N, Base&: Disp, Index&: Base, DAG, EncodingAlignment))
2813	return false;
2814
2815	if (N.getOpcode() == ISD::ADD) {
2816	int16_t imm = `0`;
2817	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2818	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2819	Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2820	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2821	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2822	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2823	} else {
2824	Base = N.getOperand(i: `0`);
2825	}
2826	return true; // [r+i]
2827	} else if (N.getOperand(i: `1`).getOpcode() == PPCISD::Lo) {
2828	// Match LOAD (ADD (X, Lo(G))).
2829	assert(!N.getOperand(`1`).getConstantOperandVal(`1`) &&
2830	"Cannot handle constant offsets yet!");
2831	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
2832	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
2833	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
2834	Disp.getOpcode() == ISD::TargetConstantPool \|\|
2835	Disp.getOpcode() == ISD::TargetJumpTable);
2836	Base = N.getOperand(i: `0`);
2837	return true; // [&g+r]
2838	}
2839	} else if (N.getOpcode() == ISD::OR) {
2840	int16_t imm = `0`;
2841	if (isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) &&
2842	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: imm))) {
2843	// If this is an or of disjoint bitfields, we can codegen this as an add
2844	// (for better address arithmetic) if the LHS and RHS of the OR are
2845	// provably disjoint.
2846	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2847
2848	if ((LHSKnown.Zero.getZExtValue()\|~(uint64_t)imm) == ~`0ULL`) {
2849	// If all of the bits are known zero on the LHS or RHS, the add won't
2850	// carry.
2851	if (FrameIndexSDNode *FI =
2852	dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`))) {
2853	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2854	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2855	} else {
2856	Base = N.getOperand(i: `0`);
2857	}
2858	Disp = DAG.getTargetConstant(Val: imm, DL: dl, VT: N.getValueType());
2859	return true;
2860	}
2861	}
2862	} else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
2863	// Loading from a constant address.
2864
2865	// If this address fits entirely in a 16-bit sext immediate field, codegen
2866	// this as "d, 0"
2867	int16_t Imm;
2868	if (isIntS16Immediate(N: CN, Imm) &&
2869	(!EncodingAlignment \|\| isAligned(Lhs: *EncodingAlignment, SizeInBytes: Imm))) {
2870	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: CN->getValueType(ResNo: `0`));
2871	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2872	CN->getValueType(`0`));
2873	return true;
2874	}
2875
2876	// Handle 32-bit sext immediates with LIS + addr mode.
2877	if ((CN->getValueType(`0`) == MVT::i32 \|\|
2878	(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2879	(!EncodingAlignment \|\|
2880	isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2881	int Addr = (int)CN->getZExtValue();
2882
2883	// Otherwise, break this down into an LIS + disp.
2884	Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2885
2886	Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> `16`, dl,
2887	MVT::i32);
2888	unsigned Opc = CN->getValueType(`0`) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2889	Base = SDValue (DAG.getMachineNode(Opcode: Opc, dl, VT: CN->getValueType(ResNo: `0`), Op1: Base), `0`);
2890	return true;
2891	}
2892	}
2893
2894	Disp = DAG.getTargetConstant(Val: `0`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()));
2895	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
2896	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2897	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
2898	} else
2899	Base = N;
2900	return true; // [r+0]
2901	}
2902
2903	/// Similar to the 16-bit case but for instructions that take a 34-bit
2904	/// displacement field (prefixed loads/stores).
2905	bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2906	SDValue &Base,
2907	SelectionDAG &DAG) const {
2908	// Only on 64-bit targets.
2909	if (N.getValueType() != MVT::i64)
2910	return false;
2911
2912	SDLoc dl(N);
2913	int64_t Imm = `0`;
2914
2915	if (N.getOpcode() == ISD::ADD) {
2916	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2917	return false;
2918	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2919	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2920	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2921	else
2922	Base = N.getOperand(i: `0`);
2923	return true;
2924	}
2925
2926	if (N.getOpcode() == ISD::OR) {
2927	if (!isIntS34Immediate(Op: N.getOperand(i: `1`), Imm))
2928	return false;
2929	// If this is an or of disjoint bitfields, we can codegen this as an add
2930	// (for better address arithmetic) if the LHS and RHS of the OR are
2931	// provably disjoint.
2932	KnownBits LHSKnown = DAG.computeKnownBits(Op: N.getOperand(i: `0`));
2933	if ((LHSKnown.Zero.getZExtValue() \| ~(uint64_t)Imm) != ~`0ULL`)
2934	return false;
2935	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
2936	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
2937	else
2938	Base = N.getOperand(i: `0`);
2939	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2940	return true;
2941	}
2942
2943	if (isIntS34Immediate(Op: N, Imm)) { // If the address is a 34-bit const.
2944	Disp = DAG.getTargetConstant(Val: Imm, DL: dl, VT: N.getValueType());
2945	Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2946	return true;
2947	}
2948
2949	return false;
2950	}
2951
2952	/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2953	/// represented as an indexed [r+r] operation.
2954	bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2955	SDValue &Index,
2956	SelectionDAG &DAG) const {
2957	// Check to see if we can easily represent this as an [r+r] address. This
2958	// will fail if it thinks that the address is more profitably represented as
2959	// reg+imm, e.g. where imm = 0.
2960	if (SelectAddressRegReg(N, Base, Index, DAG))
2961	return true;
2962
2963	// If the address is the result of an add, we will utilize the fact that the
2964	// address calculation includes an implicit add. However, we can reduce
2965	// register pressure if we do not materialize a constant just for use as the
2966	// index register. We only get rid of the add if it is not an add of a
2967	// value and a 16-bit signed constant and both have a single use.
2968	int16_t imm = `0`;
2969	if (N.getOpcode() == ISD::ADD &&
2970	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: imm) \|\|
2971	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
2972	Base = N.getOperand(i: `0`);
2973	Index = N.getOperand(i: `1`);
2974	return true;
2975	}
2976
2977	// Otherwise, do it the hard way, using R0 as the base register.
2978	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2979	N.getValueType());
2980	Index = N;
2981	return true;
2982	}
2983
2984	template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2985	Ty *PCRelCand = dyn_cast<Ty>(N);
2986	return PCRelCand && (PPCInstrInfo::hasPCRelFlag(TF: PCRelCand->getTargetFlags()));
2987	}
2988
2989	/// Returns true if this address is a PC Relative address.
2990	/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2991	/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2992	bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2993	// This is a materialize PC Relative node. Always select this as PC Relative.
2994	Base = N;
2995	if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2996	return true;
2997	if (isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
2998	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
2999	isValidPCRelNode<JumpTableSDNode>(N) \|\|
3000	isValidPCRelNode<BlockAddressSDNode>(N))
3001	return true;
3002	return false;
3003	}
3004
3005	/// Returns true if we should use a direct load into vector instruction
3006	/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3007	static bool usePartialVectorLoads(SDNode N, const* PPCSubtarget& ST) {
3008
3009	// If there are any other uses other than scalar to vector, then we should
3010	// keep it as a scalar load -> direct move pattern to prevent multiple
3011	// loads.
3012	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N);
3013	if (!LD)
3014	return false;
3015
3016	EVT MemVT = LD->getMemoryVT();
3017	if (!MemVT.isSimple())
3018	return false;
3019	switch(MemVT.getSimpleVT().SimpleTy) {
3020	case MVT::i64:
3021	break;
3022	case MVT::i32:
3023	if (!ST.hasP8Vector())
3024	return false;
3025	break;
3026	case MVT::i16:
3027	case MVT::i8:
3028	if (!ST.hasP9Vector())
3029	return false;
3030	break;
3031	default:
3032	return false;
3033	}
3034
3035	SDValue LoadedVal(N, `0`);
3036	if (!LoadedVal.hasOneUse())
3037	return false;
3038
3039	for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3040	UI != UE; ++UI)
3041	if (UI.getUse().get().getResNo() == `0` &&
3042	UI ->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3043	UI ->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3044	return false;
3045
3046	return true;
3047	}
3048
3049	/// getPreIndexedAddressParts - returns true by value, base pointer and
3050	/// offset pointer and addressing mode by reference if the node's address
3051	/// can be legally represented as pre-indexed load / store address.
3052	bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3053	SDValue &Offset,
3054	ISD::MemIndexedMode &AM,
3055	SelectionDAG &DAG) const {
3056	if (DisablePPCPreinc) return false;
3057
3058	bool isLoad = true;
3059	SDValue Ptr;
3060	EVT VT;
3061	Align Alignment;
3062	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3063	Ptr = LD->getBasePtr();
3064	VT = LD->getMemoryVT();
3065	Alignment = LD->getAlign();
3066	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
3067	Ptr = ST->getBasePtr();
3068	VT = ST->getMemoryVT();
3069	Alignment = ST->getAlign();
3070	isLoad = false;
3071	} else
3072	return false;
3073
3074	// Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3075	// instructions because we can fold these into a more efficient instruction
3076	// instead, (such as LXSD).
3077	if (isLoad && usePartialVectorLoads(N, ST: Subtarget)) {
3078	return false;
3079	}
3080
3081	// PowerPC doesn't have preinc load/store instructions for vectors
3082	if (VT.isVector())
3083	return false;
3084
3085	if (SelectAddressRegReg(N: Ptr, Base, Index&: Offset, DAG)) {
3086	// Common code will reject creating a pre-inc form if the base pointer
3087	// is a frame index, or if N is a store and the base pointer is either
3088	// the same as or a predecessor of the value being stored. Check for
3089	// those situations here, and try with swapped Base/Offset instead.
3090	bool Swap = false;
3091
3092	if (isa<FrameIndexSDNode>(Val: Base) \|\| isa<RegisterSDNode>(Val: Base))
3093	Swap = true;
3094	else if (!isLoad) {
3095	SDValue Val = cast<StoreSDNode>(Val: N)->getValue();
3096	if (Val == Base \|\| Base.getNode()->isPredecessorOf(N: Val.getNode()))
3097	Swap = true;
3098	}
3099
3100	if (Swap)
3101	std::swap(a&: Base, b&: Offset);
3102
3103	AM = ISD::PRE_INC;
3104	return true;
3105	}
3106
3107	// LDU/STU can only handle immediates that are a multiple of 4.
3108	if (VT != MVT::i64) {
3109	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: std::nullopt))
3110	return false;
3111	} else {
3112	// LDU/STU need an address with at least 4-byte alignment.
3113	if (Alignment < Align (`4`))
3114	return false;
3115
3116	if (!SelectAddressRegImm(N: Ptr, Disp&: Offset, Base, DAG, EncodingAlignment: Align (`4`)))
3117	return false;
3118	}
3119
3120	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
3121	// PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3122	// sext i32 to i64 when addr mode is r+i.
3123	if (LD->getValueType(`0`) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3124	LD->getExtensionType() == ISD::SEXTLOAD &&
3125	isa<ConstantSDNode>(Offset))
3126	return false;
3127	}
3128
3129	AM = ISD::PRE_INC;
3130	return true;
3131	}
3132
3133	//===----------------------------------------------------------------------===//
3134	// LowerOperation implementation
3135	//===----------------------------------------------------------------------===//
3136
3137	/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3138	/// and LoOpFlags to the target MO flags.
3139	static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3140	unsigned &HiOpFlags, unsigned &LoOpFlags,
3141	const GlobalValue GV = nullptr*) {
3142	HiOpFlags = PPCII::MO_HA;
3143	LoOpFlags = PPCII::MO_LO;
3144
3145	// Don't use the pic base if not in PIC relocation model.
3146	if (IsPIC) {
3147	HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3148	LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3149	}
3150	}
3151
3152	static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3153	SelectionDAG &DAG) {
3154	SDLoc DL(HiPart);
3155	EVT PtrVT = HiPart.getValueType();
3156	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: PtrVT);
3157
3158	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL, VT: PtrVT, N1: HiPart, N2: Zero);
3159	SDValue Lo = DAG.getNode(Opcode: PPCISD::Lo, DL, VT: PtrVT, N1: LoPart, N2: Zero);
3160
3161	// With PIC, the first instruction is actually "GR+hi(&G)".
3162	if (isPIC)
3163	Hi = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT,
3164	N1: DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL, VT: PtrVT), N2: Hi);
3165
3166	// Generate non-pic code that has direct accesses to the constant pool.
3167	// The address of the global is just (hi(&g)+lo(&g)).
3168	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Hi, N2: Lo);
3169	}
3170
3171	static void setUsesTOCBasePtr(MachineFunction &MF) {
3172	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3173	FuncInfo->setUsesTOCBasePtr();
3174	}
3175
3176	static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3177	setUsesTOCBasePtr(DAG.getMachineFunction());
3178	}
3179
3180	SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3181	SDValue GA) const {
3182	const bool Is64Bit = Subtarget.isPPC64();
3183	EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3184	SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3185	: Subtarget.isAIXABI()
3186	? DAG.getRegister(PPC::R2, VT)
3187	: DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3188	SDValue Ops[] = { GA, Reg };
3189	return DAG.getMemIntrinsicNode(
3190	PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3191	MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3192	MachineMemOperand::MOLoad);
3193	}
3194
3195	SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3196	SelectionDAG &DAG) const {
3197	EVT PtrVT = Op.getValueType();
3198	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Val&: Op);
3199	const Constant *C = CP->getConstVal();
3200
3201	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3202	// The actual address of the GlobalValue is stored in the TOC.
3203	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3204	if (Subtarget.isUsingPCRelativeCalls()) {
3205	SDLoc DL(CP);
3206	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3207	SDValue ConstPool = DAG.getTargetConstantPool(
3208	C, VT: Ty, Align: CP->getAlign(), Offset: CP->getOffset(), TargetFlags: PPCII::MO_PCREL_FLAG);
3209	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: ConstPool);
3210	}
3211	setUsesTOCBasePtr(DAG);
3212	SDValue GA = DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`);
3213	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3214	}
3215
3216	unsigned MOHiFlag, MOLoFlag;
3217	bool IsPIC = isPositionIndependent();
3218	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3219
3220	if (IsPIC && Subtarget.isSVR4ABI()) {
3221	SDValue GA =
3222	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: PPCII::MO_PIC_FLAG);
3223	return getTOCEntry(DAG, dl: SDLoc (CP), GA);
3224	}
3225
3226	SDValue CPIHi =
3227	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOHiFlag);
3228	SDValue CPILo =
3229	DAG.getTargetConstantPool(C, VT: PtrVT, Align: CP->getAlign(), Offset: `0`, TargetFlags: MOLoFlag);
3230	return LowerLabelRef(HiPart: CPIHi, LoPart: CPILo, isPIC: IsPIC, DAG);
3231	}
3232
3233	// For 64-bit PowerPC, prefer the more compact relative encodings.
3234	// This trades 32 bits per jump table entry for one or two instructions
3235	// on the jump site.
3236	unsigned PPCTargetLowering::getJumpTableEncoding() const {
3237	if (isJumpTableRelative())
3238	return MachineJumpTableInfo::EK_LabelDifference32;
3239
3240	return TargetLowering::getJumpTableEncoding();
3241	}
3242
3243	bool PPCTargetLowering::isJumpTableRelative() const {
3244	if (UseAbsoluteJumpTables)
3245	return false;
3246	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3247	return true;
3248	return TargetLowering::isJumpTableRelative();
3249	}
3250
3251	SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3252	SelectionDAG &DAG) const {
3253	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3254	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3255
3256	switch (getTargetMachine().getCodeModel()) {
3257	case CodeModel::Small:
3258	case CodeModel::Medium:
3259	return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3260	default:
3261	return DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: SDLoc (),
3262	VT: getPointerTy(DL: DAG.getDataLayout()));
3263	}
3264	}
3265
3266	const MCExpr *
3267	PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3268	unsigned JTI,
3269	MCContext &Ctx) const {
3270	if (!Subtarget.isPPC64() \|\| Subtarget.isAIXABI())
3271	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3272
3273	switch (getTargetMachine().getCodeModel()) {
3274	case CodeModel::Small:
3275	case CodeModel::Medium:
3276	return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3277	default:
3278	return MCSymbolRefExpr::create(Symbol: MF->getPICBaseSymbol(), Ctx);
3279	}
3280	}
3281
3282	SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3283	EVT PtrVT = Op.getValueType();
3284	JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
3285
3286	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3287	if (Subtarget.isUsingPCRelativeCalls()) {
3288	SDLoc DL(JT);
3289	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3290	SDValue GA =
3291	DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Ty, TargetFlags: PPCII::MO_PCREL_FLAG);
3292	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3293	return MatAddr;
3294	}
3295
3296	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3297	// The actual address of the GlobalValue is stored in the TOC.
3298	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3299	setUsesTOCBasePtr(DAG);
3300	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT);
3301	return getTOCEntry(DAG, dl: SDLoc (JT), GA);
3302	}
3303
3304	unsigned MOHiFlag, MOLoFlag;
3305	bool IsPIC = isPositionIndependent();
3306	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3307
3308	if (IsPIC && Subtarget.isSVR4ABI()) {
3309	SDValue GA = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT,
3310	TargetFlags: PPCII::MO_PIC_FLAG);
3311	return getTOCEntry(DAG, dl: SDLoc (GA), GA);
3312	}
3313
3314	SDValue JTIHi = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOHiFlag);
3315	SDValue JTILo = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT, TargetFlags: MOLoFlag);
3316	return LowerLabelRef(HiPart: JTIHi, LoPart: JTILo, isPIC: IsPIC, DAG);
3317	}
3318
3319	SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3320	SelectionDAG &DAG) const {
3321	EVT PtrVT = Op.getValueType();
3322	BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Val&: Op);
3323	const BlockAddress *BA = BASDN->getBlockAddress();
3324
3325	// isUsingPCRelativeCalls() returns true when PCRelative is enabled
3326	if (Subtarget.isUsingPCRelativeCalls()) {
3327	SDLoc DL(BASDN);
3328	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3329	SDValue GA = DAG.getTargetBlockAddress(BA, VT: Ty, Offset: BASDN->getOffset(),
3330	TargetFlags: PPCII::MO_PCREL_FLAG);
3331	SDValue MatAddr = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3332	return MatAddr;
3333	}
3334
3335	// 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3336	// The actual BlockAddress is stored in the TOC.
3337	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3338	setUsesTOCBasePtr(DAG);
3339	SDValue GA = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset());
3340	return getTOCEntry(DAG, dl: SDLoc (BASDN), GA);
3341	}
3342
3343	// 32-bit position-independent ELF stores the BlockAddress in the .got.
3344	if (Subtarget.is32BitELFABI() && isPositionIndependent())
3345	return getTOCEntry(
3346	DAG, dl: SDLoc (BASDN),
3347	GA: DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: BASDN->getOffset()));
3348
3349	unsigned MOHiFlag, MOLoFlag;
3350	bool IsPIC = isPositionIndependent();
3351	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag);
3352	SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOHiFlag);
3353	SDValue TgtBALo = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset: `0`, TargetFlags: MOLoFlag);
3354	return LowerLabelRef(HiPart: TgtBAHi, LoPart: TgtBALo, isPIC: IsPIC, DAG);
3355	}
3356
3357	SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3358	SelectionDAG &DAG) const {
3359	if (Subtarget.isAIXABI())
3360	return LowerGlobalTLSAddressAIX(Op, DAG);
3361
3362	return LowerGlobalTLSAddressLinux(Op, DAG);
3363	}
3364
3365	SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3366	SelectionDAG &DAG) const {
3367	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3368
3369	if (DAG.getTarget().useEmulatedTLS())
3370	report_fatal_error(reason: "Emulated TLS is not yet supported on AIX");
3371
3372	SDLoc dl(GA);
3373	const GlobalValue *GV = GA->getGlobal();
3374	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3375	bool Is64Bit = Subtarget.isPPC64();
3376	TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3377	bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3378
3379	if (IsTLSLocalExecModel \|\| Model == TLSModel::InitialExec) {
3380	bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3381	bool HasAIXSmallTLSGlobalAttr = false;
3382	SDValue VariableOffsetTGA =
3383	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TPREL_FLAG);
3384	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3385	SDValue TLSReg;
3386
3387	if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV))
3388	if (GVar->hasAttribute(Kind: "aix-small-tls"))
3389	HasAIXSmallTLSGlobalAttr = true;
3390
3391	if (Is64Bit) {
3392	// For local-exec and initial-exec on AIX (64-bit), the sequence generated
3393	// involves a load of the variable offset (from the TOC), followed by an
3394	// add of the loaded variable offset to R13 (the thread pointer).
3395	// This code sequence looks like:
3396	// ld reg1,var[TC](2)
3397	// add reg2, reg1, r13 // r13 contains the thread pointer
3398	TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3399
3400	// With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3401	// global variable attribute, produce a faster access sequence for
3402	// local-exec TLS variables where the offset from the TLS base is encoded
3403	// as an immediate operand.
3404	//
3405	// We only utilize the faster local-exec access sequence when the TLS
3406	// variable has a size within the policy limit. We treat types that are
3407	// not sized or are empty as being over the policy size limit.
3408	if ((HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr) &&
3409	IsTLSLocalExecModel) {
3410	Type *GVType = GV->getValueType();
3411	if (GVType->isSized() && !GVType->isEmptyTy() &&
3412	GV->getParent()->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3413	AIXSmallTlsPolicySizeLimit)
3414	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA, N2: TLSReg);
3415	}
3416	} else {
3417	// For local-exec and initial-exec on AIX (32-bit), the sequence generated
3418	// involves loading the variable offset from the TOC, generating a call to
3419	// .__get_tpointer to get the thread pointer (which will be in R3), and
3420	// adding the two together:
3421	// lwz reg1,var[TC](2)
3422	// bla .__get_tpointer
3423	// add reg2, reg1, r3
3424	TLSReg = DAG.getNode(Opcode: PPCISD::GET_TPOINTER, DL: dl, VT: PtrVT);
3425
3426	// We do not implement the 32-bit version of the faster access sequence
3427	// for local-exec that is controlled by the -maix-small-local-exec-tls
3428	// option, or the "aix-small-tls" global variable attribute.
3429	if (HasAIXSmallLocalExecTLS \|\| HasAIXSmallTLSGlobalAttr)
3430	report_fatal_error(reason: "The small-local-exec TLS access sequence is "
3431	"currently only supported on AIX (64-bit mode).");
3432	}
3433	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: VariableOffset);
3434	}
3435
3436	if (Model == TLSModel::LocalDynamic) {
3437	bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3438
3439	// We do not implement the 32-bit version of the faster access sequence
3440	// for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3441	if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3442	report_fatal_error(reason: "The small-local-dynamic TLS access sequence is "
3443	"currently only supported on AIX (64-bit mode).");
3444
3445	// For local-dynamic on AIX, we need to generate one TOC entry for each
3446	// variable offset, and a single module-handle TOC entry for the entire
3447	// file.
3448
3449	SDValue VariableOffsetTGA =
3450	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLD_FLAG);
3451	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3452
3453	Module *M = DAG.getMachineFunction().getFunction().getParent();
3454	GlobalVariable *TLSGV =
3455	dyn_cast_or_null<GlobalVariable>(Val: M->getOrInsertGlobal(
3456	Name: StringRef ("_$TLSML"), Ty: PointerType::getUnqual(C&: *DAG.getContext())));
3457	TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3458	assert(TLSGV && "Not able to create GV for _$TLSML.");
3459	SDValue ModuleHandleTGA =
3460	DAG.getTargetGlobalAddress(GV: TLSGV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSLDM_FLAG);
3461	SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, GA: ModuleHandleTGA);
3462	SDValue ModuleHandle =
3463	DAG.getNode(Opcode: PPCISD::TLSLD_AIX, DL: dl, VT: PtrVT, Operand: ModuleHandleTOC);
3464
3465	// With the -maix-small-local-dynamic-tls option, produce a faster access
3466	// sequence for local-dynamic TLS variables where the offset from the
3467	// module-handle is encoded as an immediate operand.
3468	//
3469	// We only utilize the faster local-dynamic access sequence when the TLS
3470	// variable has a size within the policy limit. We treat types that are
3471	// not sized or are empty as being over the policy size limit.
3472	if (HasAIXSmallLocalDynamicTLS) {
3473	Type *GVType = GV->getValueType();
3474	if (GVType->isSized() && !GVType->isEmptyTy() &&
3475	GV->getParent()->getDataLayout().getTypeAllocSize(Ty: GVType) <=
3476	AIXSmallTlsPolicySizeLimit)
3477	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: VariableOffsetTGA,
3478	N2: ModuleHandle);
3479	}
3480
3481	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: ModuleHandle, N2: VariableOffset);
3482	}
3483
3484	// If Local- or Initial-exec or Local-dynamic is not possible or specified,
3485	// all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3486	// need to generate two TOC entries, one for the variable offset, one for the
3487	// region handle. The global address for the TOC entry of the region handle is
3488	// created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3489	// entry of the variable offset is created with MO_TLSGD_FLAG.
3490	SDValue VariableOffsetTGA =
3491	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGD_FLAG);
3492	SDValue RegionHandleTGA =
3493	DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: PPCII::MO_TLSGDM_FLAG);
3494	SDValue VariableOffset = getTOCEntry(DAG, dl, GA: VariableOffsetTGA);
3495	SDValue RegionHandle = getTOCEntry(DAG, dl, GA: RegionHandleTGA);
3496	return DAG.getNode(Opcode: PPCISD::TLSGD_AIX, DL: dl, VT: PtrVT, N1: VariableOffset,
3497	N2: RegionHandle);
3498	}
3499
3500	SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3501	SelectionDAG &DAG) const {
3502	// FIXME: TLS addresses currently use medium model code sequences,
3503	// which is the most useful form. Eventually support for small and
3504	// large models could be added if users need it, at the cost of
3505	// additional complexity.
3506	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Op);
3507	if (DAG.getTarget().useEmulatedTLS())
3508	return LowerToTLSEmulatedModel(GA, DAG);
3509
3510	SDLoc dl(GA);
3511	const GlobalValue *GV = GA->getGlobal();
3512	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3513	bool is64bit = Subtarget.isPPC64();
3514	const Module *M = DAG.getMachineFunction().getFunction().getParent();
3515	PICLevel::Level picLevel = M->getPICLevel();
3516
3517	const TargetMachine &TM = getTargetMachine();
3518	TLSModel::Model Model = TM.getTLSModel(GV);
3519
3520	if (Model == TLSModel::LocalExec) {
3521	if (Subtarget.isUsingPCRelativeCalls()) {
3522	SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3523	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3524	TargetFlags: PPCII::MO_TPREL_PCREL_FLAG);
3525	SDValue MatAddr =
3526	DAG.getNode(Opcode: PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3527	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TLSReg, N2: MatAddr);
3528	}
3529
3530	SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3531	TargetFlags: PPCII::MO_TPREL_HA);
3532	SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3533	TargetFlags: PPCII::MO_TPREL_LO);
3534	SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3535	: DAG.getRegister(PPC::R2, MVT::i32);
3536
3537	SDValue Hi = DAG.getNode(Opcode: PPCISD::Hi, DL: dl, VT: PtrVT, N1: TGAHi, N2: TLSReg);
3538	return DAG.getNode(Opcode: PPCISD::Lo, DL: dl, VT: PtrVT, N1: TGALo, N2: Hi);
3539	}
3540
3541	if (Model == TLSModel::InitialExec) {
3542	bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3543	SDValue TGA = DAG.getTargetGlobalAddress(
3544	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : `0`);
3545	SDValue TGATLS = DAG.getTargetGlobalAddress(
3546	GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3547	SDValue TPOffset;
3548	if (IsPCRel) {
3549	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3550	TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3551	MachinePointerInfo());
3552	} else {
3553	SDValue GOTPtr;
3554	if (is64bit) {
3555	setUsesTOCBasePtr(DAG);
3556	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3557	GOTPtr =
3558	DAG.getNode(Opcode: PPCISD::ADDIS_GOT_TPREL_HA, DL: dl, VT: PtrVT, N1: GOTReg, N2: TGA);
3559	} else {
3560	if (!TM.isPositionIndependent())
3561	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_GOT, DL: dl, VT: PtrVT);
3562	else if (picLevel == PICLevel::SmallPIC)
3563	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3564	else
3565	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3566	}
3567	TPOffset = DAG.getNode(Opcode: PPCISD::LD_GOT_TPREL_L, DL: dl, VT: PtrVT, N1: TGA, N2: GOTPtr);
3568	}
3569	return DAG.getNode(Opcode: PPCISD::ADD_TLS, DL: dl, VT: PtrVT, N1: TPOffset, N2: TGATLS);
3570	}
3571
3572	if (Model == TLSModel::GeneralDynamic) {
3573	if (Subtarget.isUsingPCRelativeCalls()) {
3574	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3575	TargetFlags: PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3576	return DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3577	}
3578
3579	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3580	SDValue GOTPtr;
3581	if (is64bit) {
3582	setUsesTOCBasePtr(DAG);
3583	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3584	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSGD_HA, DL: dl, VT: PtrVT,
3585	N1: GOTReg, N2: TGA);
3586	} else {
3587	if (picLevel == PICLevel::SmallPIC)
3588	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3589	else
3590	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3591	}
3592	return DAG.getNode(Opcode: PPCISD::ADDI_TLSGD_L_ADDR, DL: dl, VT: PtrVT,
3593	N1: GOTPtr, N2: TGA, N3: TGA);
3594	}
3595
3596	if (Model == TLSModel::LocalDynamic) {
3597	if (Subtarget.isUsingPCRelativeCalls()) {
3598	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`,
3599	TargetFlags: PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3600	SDValue MatPCRel =
3601	DAG.getNode(Opcode: PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, DL: dl, VT: PtrVT, Operand: TGA);
3602	return DAG.getNode(Opcode: PPCISD::PADDI_DTPREL, DL: dl, VT: PtrVT, N1: MatPCRel, N2: TGA);
3603	}
3604
3605	SDValue TGA = DAG.getTargetGlobalAddress(GV, DL: dl, VT: PtrVT, offset: `0`, TargetFlags: `0`);
3606	SDValue GOTPtr;
3607	if (is64bit) {
3608	setUsesTOCBasePtr(DAG);
3609	SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3610	GOTPtr = DAG.getNode(Opcode: PPCISD::ADDIS_TLSLD_HA, DL: dl, VT: PtrVT,
3611	N1: GOTReg, N2: TGA);
3612	} else {
3613	if (picLevel == PICLevel::SmallPIC)
3614	GOTPtr = DAG.getNode(Opcode: PPCISD::GlobalBaseReg, DL: dl, VT: PtrVT);
3615	else
3616	GOTPtr = DAG.getNode(Opcode: PPCISD::PPC32_PICGOT, DL: dl, VT: PtrVT);
3617	}
3618	SDValue TLSAddr = DAG.getNode(Opcode: PPCISD::ADDI_TLSLD_L_ADDR, DL: dl,
3619	VT: PtrVT, N1: GOTPtr, N2: TGA, N3: TGA);
3620	SDValue DtvOffsetHi = DAG.getNode(Opcode: PPCISD::ADDIS_DTPREL_HA, DL: dl,
3621	VT: PtrVT, N1: TLSAddr, N2: TGA);
3622	return DAG.getNode(Opcode: PPCISD::ADDI_DTPREL_L, DL: dl, VT: PtrVT, N1: DtvOffsetHi, N2: TGA);
3623	}
3624
3625	llvm_unreachable("Unknown TLS model!");
3626	}
3627
3628	SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3629	SelectionDAG &DAG) const {
3630	EVT PtrVT = Op.getValueType();
3631	GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Val&: Op);
3632	SDLoc DL(GSDN);
3633	const GlobalValue *GV = GSDN->getGlobal();
3634
3635	// 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3636	// The actual address of the GlobalValue is stored in the TOC.
3637	if (Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) {
3638	if (Subtarget.isUsingPCRelativeCalls()) {
3639	EVT Ty = getPointerTy(DL: DAG.getDataLayout());
3640	if (isAccessedAsGotIndirect(N: Op)) {
3641	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3642	TargetFlags: PPCII::MO_GOT_PCREL_FLAG);
3643	SDValue MatPCRel = DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3644	SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3645	MachinePointerInfo());
3646	return Load;
3647	} else {
3648	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: Ty, offset: GSDN->getOffset(),
3649	TargetFlags: PPCII::MO_PCREL_FLAG);
3650	return DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: Ty, Operand: GA);
3651	}
3652	}
3653	setUsesTOCBasePtr(DAG);
3654	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset());
3655	return getTOCEntry(DAG, dl: DL, GA);
3656	}
3657
3658	unsigned MOHiFlag, MOLoFlag;
3659	bool IsPIC = isPositionIndependent();
3660	getLabelAccessInfo(IsPIC, Subtarget, HiOpFlags&: MOHiFlag, LoOpFlags&: MOLoFlag, GV);
3661
3662	if (IsPIC && Subtarget.isSVR4ABI()) {
3663	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT,
3664	offset: GSDN->getOffset(),
3665	TargetFlags: PPCII::MO_PIC_FLAG);
3666	return getTOCEntry(DAG, dl: DL, GA);
3667	}
3668
3669	SDValue GAHi =
3670	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOHiFlag);
3671	SDValue GALo =
3672	DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: GSDN->getOffset(), TargetFlags: MOLoFlag);
3673
3674	return LowerLabelRef(HiPart: GAHi, LoPart: GALo, isPIC: IsPIC, DAG);
3675	}
3676
3677	SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3678	bool IsStrict = Op ->isStrictFPOpcode();
3679	ISD::CondCode CC =
3680	cast<CondCodeSDNode>(Val: Op.getOperand(i: IsStrict ? `3` : `2`))->get();
3681	SDValue LHS = Op.getOperand(i: IsStrict ? `1` : `0`);
3682	SDValue RHS = Op.getOperand(i: IsStrict ? `2` : `1`);
3683	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
3684	EVT LHSVT = LHS.getValueType();
3685	SDLoc dl(Op);
3686
3687	// Soften the setcc with libcall if it is fp128.
3688	if (LHSVT == MVT::f128) {
3689	assert(!Subtarget.hasP9Vector() &&
3690	"SETCC for f128 is already legal under Power9!");
3691	softenSetCCOperands(DAG, VT: LHSVT, NewLHS&: LHS, NewRHS&: RHS, CCCode&: CC, DL: dl, OldLHS: LHS, OldRHS: RHS, Chain,
3692	IsSignaling: Op ->getOpcode() == ISD::STRICT_FSETCCS);
3693	if (RHS.getNode())
3694	LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS,
3695	N3: DAG.getCondCode(Cond: CC));
3696	if (IsStrict)
3697	return DAG.getMergeValues(Ops: {LHS, Chain}, dl);
3698	return LHS;
3699	}
3700
3701	assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3702
3703	if (Op.getValueType() == MVT::v2i64) {
3704	// When the operands themselves are v2i64 values, we need to do something
3705	// special because VSX has no underlying comparison operations for these.
3706	if (LHS.getValueType() == MVT::v2i64) {
3707	// Equality can be handled by casting to the legal type for Altivec
3708	// comparisons, everything else needs to be expanded.
3709	if (CC != ISD::SETEQ && CC != ISD::SETNE)
3710	return SDValue ();
3711	SDValue SetCC32 = DAG.getSetCC(
3712	dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3713	DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3714	int ShuffV[] = {`1`, `0`, `3`, `2`};
3715	SDValue Shuff =
3716	DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3717	return DAG.getBitcast(MVT::v2i64,
3718	DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3719	dl, MVT::v4i32, Shuff, SetCC32));
3720	}
3721
3722	// We handle most of these in the usual way.
3723	return Op;
3724	}
3725
3726	// If we're comparing for equality to zero, expose the fact that this is
3727	// implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3728	// fold the new nodes.
3729	if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3730	return V;
3731
3732	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
3733	// Leave comparisons against 0 and -1 alone for now, since they're usually
3734	// optimized. FIXME: revisit this when we can custom lower all setcc
3735	// optimizations.
3736	if (C->isAllOnes() \|\| C->isZero())
3737	return SDValue ();
3738	}
3739
3740	// If we have an integer seteq/setne, turn it into a compare against zero
3741	// by xor'ing the rhs with the lhs, which is faster than setting a
3742	// condition register, reading it back out, and masking the correct bit. The
3743	// normal approach here uses sub to do this instead of xor. Using xor exposes
3744	// the result to other bit-twiddling opportunities.
3745	if (LHSVT.isInteger() && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
3746	EVT VT = Op.getValueType();
3747	SDValue Sub = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: LHSVT, N1: LHS, N2: RHS);
3748	return DAG.getSetCC(DL: dl, VT, LHS: Sub, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: LHSVT), Cond: CC);
3749	}
3750	return SDValue ();
3751	}
3752
3753	SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3754	SDNode *Node = Op.getNode();
3755	EVT VT = Node->getValueType(ResNo: `0`);
3756	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3757	SDValue InChain = Node->getOperand(Num: `0`);
3758	SDValue VAListPtr = Node->getOperand(Num: `1`);
3759	const Value *SV = cast<SrcValueSDNode>(Val: Node->getOperand(Num: `2`))->getValue();
3760	SDLoc dl(Node);
3761
3762	assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3763
3764	// gpr_index
3765	SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3766	VAListPtr, MachinePointerInfo(SV), MVT::i8);
3767	InChain = GprIndex.getValue(R: `1`);
3768
3769	if (VT == MVT::i64) {
3770	// Check if GprIndex is even
3771	SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3772	DAG.getConstant(`1`, dl, MVT::i32));
3773	SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3774	DAG.getConstant(`0`, dl, MVT::i32), ISD::SETNE);
3775	SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3776	DAG.getConstant(`1`, dl, MVT::i32));
3777	// Align GprIndex to be even if it isn't
3778	GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3779	GprIndex);
3780	}
3781
3782	// fpr index is 1 byte after gpr
3783	SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3784	DAG.getConstant(`1`, dl, MVT::i32));
3785
3786	// fpr
3787	SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3788	FprPtr, MachinePointerInfo(SV), MVT::i8);
3789	InChain = FprIndex.getValue(R: `1`);
3790
3791	SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3792	DAG.getConstant(`8`, dl, MVT::i32));
3793
3794	SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3795	DAG.getConstant(`4`, dl, MVT::i32));
3796
3797	// areas
3798	SDValue OverflowArea =
3799	DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3800	InChain = OverflowArea.getValue(R: `1`);
3801
3802	SDValue RegSaveArea =
3803	DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3804	InChain = RegSaveArea.getValue(R: `1`);
3805
3806	// select overflow_area if index > 8
3807	SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3808	DAG.getConstant(`8`, dl, MVT::i32), ISD::SETLT);
3809
3810	// adjustment constant gpr_index 4/8*
3811	SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3812	VT.isInteger() ? GprIndex : FprIndex,
3813	DAG.getConstant(VT.isInteger() ? `4` : `8`, dl,
3814	MVT::i32));
3815
3816	// OurReg = RegSaveArea + RegConstant
3817	SDValue OurReg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: RegSaveArea,
3818	N2: RegConstant);
3819
3820	// Floating types are 32 bytes into RegSaveArea
3821	if (VT.isFloatingPoint())
3822	OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3823	DAG.getConstant(`32`, dl, MVT::i32));
3824
3825	// increase {f,g}pr_index by 1 (or 2 if VT is i64)
3826	SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3827	VT.isInteger() ? GprIndex : FprIndex,
3828	DAG.getConstant(VT == MVT::i64 ? `2` : `1`, dl,
3829	MVT::i32));
3830
3831	InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3832	VT.isInteger() ? VAListPtr : FprPtr,
3833	MachinePointerInfo(SV), MVT::i8);
3834
3835	// determine if we should load from reg_save_area or overflow_area
3836	SDValue Result = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: PtrVT, N1: CC, N2: OurReg, N3: OverflowArea);
3837
3838	// increase overflow_area by 4/8 if gpr/fpr > 8
3839	SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3840	DAG.getConstant(VT.isInteger() ? `4` : `8`,
3841	dl, MVT::i32));
3842
3843	OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3844	OverflowAreaPlusN);
3845
3846	InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3847	MachinePointerInfo(), MVT::i32);
3848
3849	return DAG.getLoad(VT, dl, Chain: InChain, Ptr: Result, PtrInfo: MachinePointerInfo ());
3850	}
3851
3852	SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3853	assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3854
3855	// We have to copy the entire va_list struct:
3856	// 2sizeof(char) + 2 Byte alignment + 2sizeof(char) = 12 Byte*
3857	return DAG.getMemcpy(Op.getOperand(`0`), Op, Op.getOperand(`1`), Op.getOperand(`2`),
3858	DAG.getConstant(`12`, SDLoc(Op), MVT::i32), Align(`8`),
3859	false, true, false, MachinePointerInfo(),
3860	MachinePointerInfo());
3861	}
3862
3863	SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3864	SelectionDAG &DAG) const {
3865	if (Subtarget.isAIXABI())
3866	report_fatal_error(reason: "ADJUST_TRAMPOLINE operation is not supported on AIX.");
3867
3868	return Op.getOperand(i: `0`);
3869	}
3870
3871	SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3872	MachineFunction &MF = DAG.getMachineFunction();
3873	PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3874
3875	assert((Op.getOpcode() == ISD::INLINEASM \|\|
3876	Op.getOpcode() == ISD::INLINEASM_BR) &&
3877	"Expecting Inline ASM node.");
3878
3879	// If an LR store is already known to be required then there is not point in
3880	// checking this ASM as well.
3881	if (MFI.isLRStoreRequired())
3882	return Op;
3883
3884	// Inline ASM nodes have an optional last operand that is an incoming Flag of
3885	// type MVT::Glue. We want to ignore this last operand if that is the case.
3886	unsigned NumOps = Op.getNumOperands();
3887	if (Op.getOperand(NumOps - `1`).getValueType() == MVT::Glue)
3888	--NumOps;
3889
3890	// Check all operands that may contain the LR.
3891	for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3892	const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3893	unsigned NumVals = Flags.getNumOperandRegisters();
3894	++i; // Skip the ID value.
3895
3896	switch (Flags.getKind()) {
3897	default:
3898	llvm_unreachable("Bad flags!");
3899	case InlineAsm::Kind::RegUse:
3900	case InlineAsm::Kind::Imm:
3901	case InlineAsm::Kind::Mem:
3902	i += NumVals;
3903	break;
3904	case InlineAsm::Kind::Clobber:
3905	case InlineAsm::Kind::RegDef:
3906	case InlineAsm::Kind::RegDefEarlyClobber: {
3907	for (; NumVals; --NumVals, ++i) {
3908	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i))->getReg();
3909	if (Reg != PPC::LR && Reg != PPC::LR8)
3910	continue;
3911	MFI.setLRStoreRequired();
3912	return Op;
3913	}
3914	break;
3915	}
3916	}
3917	}
3918
3919	return Op;
3920	}
3921
3922	SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3923	SelectionDAG &DAG) const {
3924	if (Subtarget.isAIXABI())
3925	report_fatal_error(reason: "INIT_TRAMPOLINE operation is not supported on AIX.");
3926
3927	SDValue Chain = Op.getOperand(i: `0`);
3928	SDValue Trmp = Op.getOperand(i: `1`); // trampoline
3929	SDValue FPtr = Op.getOperand(i: `2`); // nested function
3930	SDValue Nest = Op.getOperand(i: `3`); // 'nest' parameter value
3931	SDLoc dl(Op);
3932
3933	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
3934	bool isPPC64 = (PtrVT == MVT::i64);
3935	Type IntPtrTy = DAG.getDataLayout().getIntPtrType(C&: DAG.getContext());
3936
3937	TargetLowering::ArgListTy Args;
3938	TargetLowering::ArgListEntry Entry;
3939
3940	Entry.Ty = IntPtrTy;
3941	Entry.Node = Trmp; Args.push_back(x: Entry);
3942
3943	// TrampSize == (isPPC64 ? 48 : 40);
3944	Entry.Node = DAG.getConstant(isPPC64 ? `48` : `40`, dl,
3945	isPPC64 ? MVT::i64 : MVT::i32);
3946	Args.push_back(x: Entry);
3947
3948	Entry.Node = FPtr; Args.push_back(x: Entry);
3949	Entry.Node = Nest; Args.push_back(x: Entry);
3950
3951	// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3952	TargetLowering::CallLoweringInfo CLI(DAG);
3953	CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3954	CC: CallingConv::C, ResultType: Type::getVoidTy(C&: *DAG.getContext()),
3955	Target: DAG.getExternalSymbol(Sym: "__trampoline_setup", VT: PtrVT), ArgsList: std::move(Args));
3956
3957	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3958	return CallResult.second;
3959	}
3960
3961	SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3962	MachineFunction &MF = DAG.getMachineFunction();
3963	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3964	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
3965
3966	SDLoc dl(Op);
3967
3968	if (Subtarget.isPPC64() \|\| Subtarget.isAIXABI()) {
3969	// vastart just stores the address of the VarArgsFrameIndex slot into the
3970	// memory location argument.
3971	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
3972	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
3973	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl, Val: FR, Ptr: Op.getOperand(i: `1`),
3974	PtrInfo: MachinePointerInfo (SV));
3975	}
3976
3977	// For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3978	// We suppose the given va_list is already allocated.
3979	//
3980	// typedef struct {
3981	// char gpr; / index into the array of 8 GPRs*
3982	// stored in the register save area*
3983	// gpr=0 corresponds to r3,*
3984	// gpr=1 to r4, etc.*
3985	// /*
3986	// char fpr; / index into the array of 8 FPRs*
3987	// stored in the register save area*
3988	// fpr=0 corresponds to f1,*
3989	// fpr=1 to f2, etc.*
3990	// /*
3991	// char overflow_arg_area;*
3992	// / location on stack that holds*
3993	// the next overflow argument*
3994	// /*
3995	// char reg_save_area;*
3996	// / where r3:r10 and f1:f8 (if saved)*
3997	// are stored*
3998	// /*
3999	// } va_list[1];
4000
4001	SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4002	SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4003	SDValue StackOffsetFI = DAG.getFrameIndex(FI: FuncInfo->getVarArgsStackOffset(),
4004	VT: PtrVT);
4005	SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(),
4006	VT: PtrVT);
4007
4008	uint64_t FrameOffset = PtrVT.getSizeInBits()/`8`;
4009	SDValue ConstFrameOffset = DAG.getConstant(Val: FrameOffset, DL: dl, VT: PtrVT);
4010
4011	uint64_t StackOffset = PtrVT.getSizeInBits()/`8` - `1`;
4012	SDValue ConstStackOffset = DAG.getConstant(Val: StackOffset, DL: dl, VT: PtrVT);
4013
4014	uint64_t FPROffset = `1`;
4015	SDValue ConstFPROffset = DAG.getConstant(Val: FPROffset, DL: dl, VT: PtrVT);
4016
4017	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
4018
4019	// Store first byte : number of int regs
4020	SDValue firstStore =
4021	DAG.getTruncStore(Op.getOperand(`0`), dl, ArgGPR, Op.getOperand(`1`),
4022	MachinePointerInfo(SV), MVT::i8);
4023	uint64_t nextOffset = FPROffset;
4024	SDValue nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Op.getOperand(i: `1`),
4025	N2: ConstFPROffset);
4026
4027	// Store second byte : number of float regs
4028	SDValue secondStore =
4029	DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4030	MachinePointerInfo(SV, nextOffset), MVT::i8);
4031	nextOffset += StackOffset;
4032	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstStackOffset);
4033
4034	// Store second word : arguments given on stack
4035	SDValue thirdStore = DAG.getStore(Chain: secondStore, dl, Val: StackOffsetFI, Ptr: nextPtr,
4036	PtrInfo: MachinePointerInfo (SV, nextOffset));
4037	nextOffset += FrameOffset;
4038	nextPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: nextPtr, N2: ConstFrameOffset);
4039
4040	// Store third word : arguments given in registers
4041	return DAG.getStore(Chain: thirdStore, dl, Val: FR, Ptr: nextPtr,
4042	PtrInfo: MachinePointerInfo (SV, nextOffset));
4043	}
4044
4045	/// FPR - The set of FP registers that should be allocated for arguments
4046	/// on Darwin and AIX.
4047	static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4048	PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4049	PPC::F11, PPC::F12, PPC::F13};
4050
4051	/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4052	/// the stack.
4053	static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4054	unsigned PtrByteSize) {
4055	unsigned ArgSize = ArgVT.getStoreSize();
4056	if (Flags.isByVal())
4057	ArgSize = Flags.getByValSize();
4058
4059	// Round up to multiples of the pointer size, except for array members,
4060	// which are always packed.
4061	if (!Flags.isInConsecutiveRegs())
4062	ArgSize = ((ArgSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4063
4064	return ArgSize;
4065	}
4066
4067	/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4068	/// on the stack.
4069	static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4070	ISD::ArgFlagsTy Flags,
4071	unsigned PtrByteSize) {
4072	Align Alignment(PtrByteSize);
4073
4074	// Altivec parameters are padded to a 16 byte boundary.
4075	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4076	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4077	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4078	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4079	Alignment = Align (`16`);
4080
4081	// ByVal parameters are aligned as requested.
4082	if (Flags.isByVal()) {
4083	auto BVAlign = Flags.getNonZeroByValAlign();
4084	if (BVAlign > PtrByteSize) {
4085	if (BVAlign.value() % PtrByteSize != `0`)
4086	llvm_unreachable(
4087	"ByVal alignment is not a multiple of the pointer size");
4088
4089	Alignment = BVAlign;
4090	}
4091	}
4092
4093	// Array members are always packed to their original alignment.
4094	if (Flags.isInConsecutiveRegs()) {
4095	// If the array member was split into multiple registers, the first
4096	// needs to be aligned to the size of the full type. (Except for
4097	// ppcf128, which is only aligned as its f64 components.)
4098	if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4099	Alignment = Align (OrigVT.getStoreSize());
4100	else
4101	Alignment = Align (ArgVT.getStoreSize());
4102	}
4103
4104	return Alignment;
4105	}
4106
4107	/// CalculateStackSlotUsed - Return whether this argument will use its
4108	/// stack slot (instead of being passed in registers). ArgOffset,
4109	/// AvailableFPRs, and AvailableVRs must hold the current argument
4110	/// position, and will be updated to account for this argument.
4111	static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4112	unsigned PtrByteSize, unsigned LinkageSize,
4113	unsigned ParamAreaSize, unsigned &ArgOffset,
4114	unsigned &AvailableFPRs,
4115	unsigned &AvailableVRs) {
4116	bool UseMemory = false;
4117
4118	// Respect alignment of argument on the stack.
4119	Align Alignment =
4120	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4121	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4122	// If there's no space left in the argument save area, we must
4123	// use memory (this check also catches zero-sized arguments).
4124	if (ArgOffset >= LinkageSize + ParamAreaSize)
4125	UseMemory = true;
4126
4127	// Allocate argument on the stack.
4128	ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4129	if (Flags.isInConsecutiveRegsLast())
4130	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4131	// If we overran the argument save area, we must use memory
4132	// (this check catches arguments passed partially in memory)
4133	if (ArgOffset > LinkageSize + ParamAreaSize)
4134	UseMemory = true;
4135
4136	// However, if the argument is actually passed in an FPR or a VR,
4137	// we don't use memory after all.
4138	if (!Flags.isByVal()) {
4139	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64)
4140	if (AvailableFPRs > `0`) {
4141	--AvailableFPRs;
4142	return false;
4143	}
4144	if (ArgVT == MVT::v4f32 \|\| ArgVT == MVT::v4i32 \|\|
4145	ArgVT == MVT::v8i16 \|\| ArgVT == MVT::v16i8 \|\|
4146	ArgVT == MVT::v2f64 \|\| ArgVT == MVT::v2i64 \|\|
4147	ArgVT == MVT::v1i128 \|\| ArgVT == MVT::f128)
4148	if (AvailableVRs > `0`) {
4149	--AvailableVRs;
4150	return false;
4151	}
4152	}
4153
4154	return UseMemory;
4155	}
4156
4157	/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4158	/// ensure minimum alignment required for target.
4159	static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4160	unsigned NumBytes) {
4161	return alignTo(Size: NumBytes, A: Lowering->getStackAlign());
4162	}
4163
4164	SDValue PPCTargetLowering::LowerFormalArguments(
4165	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4166	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4167	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4168	if (Subtarget.isAIXABI())
4169	return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4170	InVals);
4171	if (Subtarget.is64BitELFABI())
4172	return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4173	InVals);
4174	assert(Subtarget.is32BitELFABI());
4175	return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4176	InVals);
4177	}
4178
4179	SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4180	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4181	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4182	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4183
4184	// 32-bit SVR4 ABI Stack Frame Layout:
4185	// +-----------------------------------+
4186	// +--> \| Back chain \|
4187	// \| +-----------------------------------+
4188	// \| \| Floating-point register save area \|
4189	// \| +-----------------------------------+
4190	// \| \| General register save area \|
4191	// \| +-----------------------------------+
4192	// \| \| CR save word \|
4193	// \| +-----------------------------------+
4194	// \| \| VRSAVE save word \|
4195	// \| +-----------------------------------+
4196	// \| \| Alignment padding \|
4197	// \| +-----------------------------------+
4198	// \| \| Vector register save area \|
4199	// \| +-----------------------------------+
4200	// \| \| Local variable space \|
4201	// \| +-----------------------------------+
4202	// \| \| Parameter list area \|
4203	// \| +-----------------------------------+
4204	// \| \| LR save word \|
4205	// \| +-----------------------------------+
4206	// SP--> +--- \| Back chain \|
4207	// +-----------------------------------+
4208	//
4209	// Specifications:
4210	// System V Application Binary Interface PowerPC Processor Supplement
4211	// AltiVec Technology Programming Interface Manual
4212
4213	MachineFunction &MF = DAG.getMachineFunction();
4214	MachineFrameInfo &MFI = MF.getFrameInfo();
4215	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4216
4217	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4218	// Potential tail calls could cause overwriting of argument stack slots.
4219	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4220	(CallConv == CallingConv::Fast));
4221	const Align PtrAlign(`4`);
4222
4223	// Assign locations to all of the incoming arguments.
4224	SmallVector<CCValAssign, `16`> ArgLocs;
4225	PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4226	*DAG.getContext());
4227
4228	// Reserve space for the linkage area on the stack.
4229	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4230	CCInfo.AllocateStack(Size: LinkageSize, Alignment: PtrAlign);
4231	if (useSoftFloat())
4232	CCInfo.PreAnalyzeFormalArguments(Ins);
4233
4234	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4);
4235	CCInfo.clearWasPPCF128();
4236
4237	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
4238	CCValAssign &VA = ArgLocs [i];
4239
4240	// Arguments stored in registers.
4241	if (VA.isRegLoc()) {
4242	const TargetRegisterClass *RC;
4243	EVT ValVT = VA.getValVT();
4244
4245	switch (ValVT.getSimpleVT().SimpleTy) {
4246	default:
4247	llvm_unreachable("ValVT not supported by formal arguments Lowering");
4248	case MVT::i1:
4249	case MVT::i32:
4250	RC = &PPC::GPRCRegClass;
4251	break;
4252	case MVT::f32:
4253	if (Subtarget.hasP8Vector())
4254	RC = &PPC::VSSRCRegClass;
4255	else if (Subtarget.hasSPE())
4256	RC = &PPC::GPRCRegClass;
4257	else
4258	RC = &PPC::F4RCRegClass;
4259	break;
4260	case MVT::f64:
4261	if (Subtarget.hasVSX())
4262	RC = &PPC::VSFRCRegClass;
4263	else if (Subtarget.hasSPE())
4264	// SPE passes doubles in GPR pairs.
4265	RC = &PPC::GPRCRegClass;
4266	else
4267	RC = &PPC::F8RCRegClass;
4268	break;
4269	case MVT::v16i8:
4270	case MVT::v8i16:
4271	case MVT::v4i32:
4272	RC = &PPC::VRRCRegClass;
4273	break;
4274	case MVT::v4f32:
4275	RC = &PPC::VRRCRegClass;
4276	break;
4277	case MVT::v2f64:
4278	case MVT::v2i64:
4279	RC = &PPC::VRRCRegClass;
4280	break;
4281	}
4282
4283	SDValue ArgValue;
4284	// Transform the arguments stored in physical registers into
4285	// virtual ones.
4286	if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4287	assert(i + `1` < e && "No second half of double precision argument");
4288	Register RegLo = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4289	Register RegHi = MF.addLiveIn(PReg: ArgLocs [++i].getLocReg(), RC);
4290	SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4291	SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4292	if (!Subtarget.isLittleEndian())
4293	std::swap (a&: ArgValueLo, b&: ArgValueHi);
4294	ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4295	ArgValueHi);
4296	} else {
4297	Register Reg = MF.addLiveIn(PReg: VA.getLocReg(), RC);
4298	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4299	ValVT == MVT::i1 ? MVT::i32 : ValVT);
4300	if (ValVT == MVT::i1)
4301	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4302	}
4303
4304	InVals.push_back(Elt: ArgValue);
4305	} else {
4306	// Argument stored in memory.
4307	assert(VA.isMemLoc());
4308
4309	// Get the extended size of the argument type in stack
4310	unsigned ArgSize = VA.getLocVT().getStoreSize();
4311	// Get the actual size of the argument type
4312	unsigned ObjSize = VA.getValVT().getStoreSize();
4313	unsigned ArgOffset = VA.getLocMemOffset();
4314	// Stack objects in PPC32 are right justified.
4315	ArgOffset += ArgSize - ObjSize;
4316	int FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: isImmutable);
4317
4318	// Create load nodes to retrieve arguments from the stack.
4319	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4320	InVals.push_back(
4321	Elt: DAG.getLoad(VT: VA.getValVT(), dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ()));
4322	}
4323	}
4324
4325	// Assign locations to all of the incoming aggregate by value arguments.
4326	// Aggregates passed by value are stored in the local variable space of the
4327	// caller's stack frame, right above the parameter list area.
4328	SmallVector<CCValAssign, `16`> ByValArgLocs;
4329	CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4330	ByValArgLocs, *DAG.getContext());
4331
4332	// Reserve stack space for the allocations in CCInfo.
4333	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
4334
4335	CCByValInfo.AnalyzeFormalArguments(Ins, Fn: CC_PPC32_SVR4_ByVal);
4336
4337	// Area that is at least reserved in the caller of this function.
4338	unsigned MinReservedArea = CCByValInfo.getStackSize();
4339	MinReservedArea = std::max(a: MinReservedArea, b: LinkageSize);
4340
4341	// Set the size that is at least reserved in caller of this function. Tail
4342	// call optimized function's reserved stack space needs to be aligned so that
4343	// taking the difference between two stack areas will result in an aligned
4344	// stack.
4345	MinReservedArea =
4346	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4347	FuncInfo->setMinReservedArea(MinReservedArea);
4348
4349	SmallVector<SDValue, `8`> MemOps;
4350
4351	// If the function takes variable number of arguments, make a frame index for
4352	// the start of the first vararg value... for expansion of llvm.va_start.
4353	if (isVarArg) {
4354	static const MCPhysReg GPArgRegs[] = {
4355	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4356	PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4357	};
4358	const unsigned NumGPArgRegs = std::size(GPArgRegs);
4359
4360	static const MCPhysReg FPArgRegs[] = {
4361	PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4362	PPC::F8
4363	};
4364	unsigned NumFPArgRegs = std::size(FPArgRegs);
4365
4366	if (useSoftFloat() \|\| hasSPE())
4367	NumFPArgRegs = `0`;
4368
4369	FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4370	FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4371
4372	// Make room for NumGPArgRegs and NumFPArgRegs.
4373	int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/`8` +
4374	NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/`8`;
4375
4376	FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4377	Size: PtrVT.getSizeInBits() / `8`, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
4378
4379	FuncInfo->setVarArgsFrameIndex(
4380	MFI.CreateStackObject(Size: Depth, Alignment: Align (`8`), isSpillSlot: false));
4381	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4382
4383	// The fixed integer arguments of a variadic function are stored to the
4384	// VarArgsFrameIndex on the stack so that they may be loaded by
4385	// dereferencing the result of va_next.
4386	for (unsigned GPRIndex = `0`; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4387	// Get an existing live-in vreg, or add a new one.
4388	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: GPArgRegs[GPRIndex]);
4389	if (!VReg)
4390	VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4391
4392	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4393	SDValue Store =
4394	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4395	MemOps.push_back(Elt: Store);
4396	// Increment the address by four for the next argument to store
4397	SDValue PtrOff = DAG.getConstant(Val: PtrVT.getSizeInBits()/`8`, DL: dl, VT: PtrVT);
4398	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4399	}
4400
4401	// FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4402	// is set.
4403	// The double arguments are stored to the VarArgsFrameIndex
4404	// on the stack.
4405	for (unsigned FPRIndex = `0`; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4406	// Get an existing live-in vreg, or add a new one.
4407	Register VReg = MF.getRegInfo().getLiveInVirtReg(PReg: FPArgRegs[FPRIndex]);
4408	if (!VReg)
4409	VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4410
4411	SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4412	SDValue Store =
4413	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4414	MemOps.push_back(Elt: Store);
4415	// Increment the address by eight for the next argument to store
4416	SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/`8`, dl,
4417	PtrVT);
4418	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4419	}
4420	}
4421
4422	if (!MemOps.empty())
4423	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4424
4425	return Chain;
4426	}
4427
4428	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4429	// value to MVT::i64 and then truncate to the correct register size.
4430	SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4431	EVT ObjectVT, SelectionDAG &DAG,
4432	SDValue ArgVal,
4433	const SDLoc &dl) const {
4434	if (Flags.isSExt())
4435	ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4436	DAG.getValueType(ObjectVT));
4437	else if (Flags.isZExt())
4438	ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4439	DAG.getValueType(ObjectVT));
4440
4441	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ObjectVT, Operand: ArgVal);
4442	}
4443
4444	SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4445	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4446	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4447	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4448	// TODO: add description of PPC stack frame format, or at least some docs.
4449	//
4450	bool isELFv2ABI = Subtarget.isELFv2ABI();
4451	bool isLittleEndian = Subtarget.isLittleEndian();
4452	MachineFunction &MF = DAG.getMachineFunction();
4453	MachineFrameInfo &MFI = MF.getFrameInfo();
4454	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4455
4456	assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4457	"fastcc not supported on varargs functions");
4458
4459	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
4460	// Potential tail calls could cause overwriting of argument stack slots.
4461	bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4462	(CallConv == CallingConv::Fast));
4463	unsigned PtrByteSize = `8`;
4464	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4465
4466	static const MCPhysReg GPR[] = {
4467	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4468	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4469	};
4470	static const MCPhysReg VR[] = {
4471	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4472	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4473	};
4474
4475	const unsigned Num_GPR_Regs = std::size(GPR);
4476	const unsigned Num_FPR_Regs = useSoftFloat() ? `0` : `13`;
4477	const unsigned Num_VR_Regs = std::size(VR);
4478
4479	// Do a first pass over the arguments to determine whether the ABI
4480	// guarantees that our caller has allocated the parameter save area
4481	// on its stack frame. In the ELFv1 ABI, this is always the case;
4482	// in the ELFv2 ABI, it is true if this is a vararg function or if
4483	// any parameter is located in a stack slot.
4484
4485	bool HasParameterArea = !isELFv2ABI \|\| isVarArg;
4486	unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4487	unsigned NumBytes = LinkageSize;
4488	unsigned AvailableFPRs = Num_FPR_Regs;
4489	unsigned AvailableVRs = Num_VR_Regs;
4490	for (unsigned i = `0`, e = Ins.size(); i != e; ++i) {
4491	if (Ins [i].Flags.isNest())
4492	continue;
4493
4494	if (CalculateStackSlotUsed(ArgVT: Ins [i].VT, OrigVT: Ins [i].ArgVT, Flags: Ins [i].Flags,
4495	PtrByteSize, LinkageSize, ParamAreaSize,
4496	ArgOffset&: NumBytes, AvailableFPRs, AvailableVRs))
4497	HasParameterArea = true;
4498	}
4499
4500	// Add DAG nodes to load the arguments or copy them out of registers. On
4501	// entry to a function on PPC, the arguments start after the linkage area,
4502	// although the first ones are often in registers.
4503
4504	unsigned ArgOffset = LinkageSize;
4505	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
4506	SmallVector<SDValue, `8`> MemOps;
4507	Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4508	unsigned CurArgIdx = `0`;
4509	for (unsigned ArgNo = `0`, e = Ins.size(); ArgNo != e; ++ArgNo) {
4510	SDValue ArgVal;
4511	bool needsLoad = false;
4512	EVT ObjectVT = Ins [ArgNo].VT;
4513	EVT OrigVT = Ins [ArgNo].ArgVT;
4514	unsigned ObjSize = ObjectVT.getStoreSize();
4515	unsigned ArgSize = ObjSize;
4516	ISD::ArgFlagsTy Flags = Ins [ArgNo].Flags;
4517	if (Ins [ArgNo].isOrigArg()) {
4518	std::advance(i&: FuncArg, n: Ins [ArgNo].getOrigArgIndex() - CurArgIdx);
4519	CurArgIdx = Ins [ArgNo].getOrigArgIndex();
4520	}
4521	// We re-align the argument offset for each argument, except when using the
4522	// fast calling convention, when we need to make sure we do that only when
4523	// we'll actually use a stack slot.
4524	unsigned CurArgOffset;
4525	Align Alignment;
4526	auto ComputeArgOffset = [&]() {
4527	/ Respect alignment of argument on the stack. /
4528	Alignment =
4529	CalculateStackSlotAlignment(ArgVT: ObjectVT, OrigVT, Flags, PtrByteSize);
4530	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
4531	CurArgOffset = ArgOffset;
4532	};
4533
4534	if (CallConv != CallingConv::Fast) {
4535	ComputeArgOffset ();
4536
4537	/ Compute GPR index associated with argument offset. /
4538	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4539	GPR_idx = std::min(a: GPR_idx, b: Num_GPR_Regs);
4540	}
4541
4542	// FIXME the codegen can be much improved in some cases.
4543	// We do not have to keep everything in memory.
4544	if (Flags.isByVal()) {
4545	assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4546
4547	if (CallConv == CallingConv::Fast)
4548	ComputeArgOffset ();
4549
4550	// ObjSize is the true size, ArgSize rounded up to multiple of registers.
4551	ObjSize = Flags.getByValSize();
4552	ArgSize = ((ObjSize + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4553	// Empty aggregate parameters do not take up registers. Examples:
4554	// struct { } a;
4555	// union { } b;
4556	// int c[0];
4557	// etc. However, we have to provide a place-holder in InVals, so
4558	// pretend we have an 8-byte item at the current address for that
4559	// purpose.
4560	if (!ObjSize) {
4561	int FI = MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: ArgOffset, IsImmutable: true);
4562	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4563	InVals.push_back(Elt: FIN);
4564	continue;
4565	}
4566
4567	// Create a stack object covering all stack doublewords occupied
4568	// by the argument. If the argument is (fully or partially) on
4569	// the stack, or if the argument is fully in registers but the
4570	// caller has allocated the parameter save anyway, we can refer
4571	// directly to the caller's stack frame. Otherwise, create a
4572	// local copy in our own frame.
4573	int FI;
4574	if (HasParameterArea \|\|
4575	ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4576	FI = MFI.CreateFixedObject(Size: ArgSize, SPOffset: ArgOffset, IsImmutable: false, isAliased: true);
4577	else
4578	FI = MFI.CreateStackObject(Size: ArgSize, Alignment, isSpillSlot: false);
4579	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4580
4581	// Handle aggregates smaller than 8 bytes.
4582	if (ObjSize < PtrByteSize) {
4583	// The value of the object is its address, which differs from the
4584	// address of the enclosing doubleword on big-endian systems.
4585	SDValue Arg = FIN;
4586	if (!isLittleEndian) {
4587	SDValue ArgOff = DAG.getConstant(Val: PtrByteSize - ObjSize, DL: dl, VT: PtrVT);
4588	Arg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ArgOff.getValueType(), N1: Arg, N2: ArgOff);
4589	}
4590	InVals.push_back(Elt: Arg);
4591
4592	if (GPR_idx != Num_GPR_Regs) {
4593	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4594	FuncInfo->addLiveInAttr(VReg, Flags);
4595	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4596	EVT ObjType = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: ObjSize `8`);
4597	SDValue Store =
4598	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Arg,
4599	PtrInfo: MachinePointerInfo (&*FuncArg), SVT: ObjType);
4600	MemOps.push_back(Elt: Store);
4601	}
4602	// Whether we copied from a register or not, advance the offset
4603	// into the parameter save area by a full doubleword.
4604	ArgOffset += PtrByteSize;
4605	continue;
4606	}
4607
4608	// The value of the object is its address, which is the address of
4609	// its first stack doubleword.
4610	InVals.push_back(Elt: FIN);
4611
4612	// Store whatever pieces of the object are in registers to memory.
4613	for (unsigned j = `0`; j < ArgSize; j += PtrByteSize) {
4614	if (GPR_idx == Num_GPR_Regs)
4615	break;
4616
4617	Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4618	FuncInfo->addLiveInAttr(VReg, Flags);
4619	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4620	SDValue Addr = FIN;
4621	if (j) {
4622	SDValue Off = DAG.getConstant(Val: j, DL: dl, VT: PtrVT);
4623	Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Off.getValueType(), N1: Addr, N2: Off);
4624	}
4625	unsigned StoreSizeInBits = std::min(a: PtrByteSize, b: (ObjSize - j)) * `8`;
4626	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreSizeInBits);
4627	SDValue Store =
4628	DAG.getTruncStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: Addr,
4629	PtrInfo: MachinePointerInfo (&*FuncArg, j), SVT: ObjType);
4630	MemOps.push_back(Elt: Store);
4631	++GPR_idx;
4632	}
4633	ArgOffset += ArgSize;
4634	continue;
4635	}
4636
4637	switch (ObjectVT.getSimpleVT().SimpleTy) {
4638	default: llvm_unreachable("Unhandled argument type!");
4639	case MVT::i1:
4640	case MVT::i32:
4641	case MVT::i64:
4642	if (Flags.isNest()) {
4643	// The 'nest' parameter, if any, is passed in R11.
4644	Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4645	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4646
4647	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4648	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4649
4650	break;
4651	}
4652
4653	// These can be scalar arguments or elements of an integer array type
4654	// passed directly. Clang may use those instead of "byval" aggregate
4655	// types to avoid forcing arguments to memory unnecessarily.
4656	if (GPR_idx != Num_GPR_Regs) {
4657	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4658	FuncInfo->addLiveInAttr(VReg, Flags);
4659	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4660
4661	if (ObjectVT == MVT::i32 \|\| ObjectVT == MVT::i1)
4662	// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4663	// value to MVT::i64 and then truncate to the correct register size.
4664	ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4665	} else {
4666	if (CallConv == CallingConv::Fast)
4667	ComputeArgOffset ();
4668
4669	needsLoad = true;
4670	ArgSize = PtrByteSize;
4671	}
4672	if (CallConv != CallingConv::Fast \|\| needsLoad)
4673	ArgOffset += `8`;
4674	break;
4675
4676	case MVT::f32:
4677	case MVT::f64:
4678	// These can be scalar arguments or elements of a float array type
4679	// passed directly. The latter are used to implement ELFv2 homogenous
4680	// float aggregates.
4681	if (FPR_idx != Num_FPR_Regs) {
4682	unsigned VReg;
4683
4684	if (ObjectVT == MVT::f32)
4685	VReg = MF.addLiveIn(FPR[FPR_idx],
4686	Subtarget.hasP8Vector()
4687	? &PPC::VSSRCRegClass
4688	: &PPC::F4RCRegClass);
4689	else
4690	VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4691	? &PPC::VSFRCRegClass
4692	: &PPC::F8RCRegClass);
4693
4694	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4695	++FPR_idx;
4696	} else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4697	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4698	// once we support fp <-> gpr moves.
4699
4700	// This can only ever happen in the presence of f32 array types,
4701	// since otherwise we never run out of FPRs before running out
4702	// of GPRs.
4703	Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4704	FuncInfo->addLiveInAttr(VReg, Flags);
4705	ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4706
4707	if (ObjectVT == MVT::f32) {
4708	if ((ArgOffset % PtrByteSize) == (isLittleEndian ? `4` : `0`))
4709	ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4710	DAG.getConstant(`32`, dl, MVT::i32));
4711	ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4712	}
4713
4714	ArgVal = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: ObjectVT, Operand: ArgVal);
4715	} else {
4716	if (CallConv == CallingConv::Fast)
4717	ComputeArgOffset ();
4718
4719	needsLoad = true;
4720	}
4721
4722	// When passing an array of floats, the array occupies consecutive
4723	// space in the argument area; only round up to the next doubleword
4724	// at the end of the array. Otherwise, each float takes 8 bytes.
4725	if (CallConv != CallingConv::Fast \|\| needsLoad) {
4726	ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4727	ArgOffset += ArgSize;
4728	if (Flags.isInConsecutiveRegsLast())
4729	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
4730	}
4731	break;
4732	case MVT::v4f32:
4733	case MVT::v4i32:
4734	case MVT::v8i16:
4735	case MVT::v16i8:
4736	case MVT::v2f64:
4737	case MVT::v2i64:
4738	case MVT::v1i128:
4739	case MVT::f128:
4740	// These can be scalar arguments or elements of a vector array type
4741	// passed directly. The latter are used to implement ELFv2 homogenous
4742	// vector aggregates.
4743	if (VR_idx != Num_VR_Regs) {
4744	Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4745	ArgVal = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: ObjectVT);
4746	++VR_idx;
4747	} else {
4748	if (CallConv == CallingConv::Fast)
4749	ComputeArgOffset ();
4750	needsLoad = true;
4751	}
4752	if (CallConv != CallingConv::Fast \|\| needsLoad)
4753	ArgOffset += `16`;
4754	break;
4755	}
4756
4757	// We need to load the argument to a virtual register if we determined
4758	// above that we ran out of physical registers of the appropriate type.
4759	if (needsLoad) {
4760	if (ObjSize < ArgSize && !isLittleEndian)
4761	CurArgOffset += ArgSize - ObjSize;
4762	int FI = MFI.CreateFixedObject(Size: ObjSize, SPOffset: CurArgOffset, IsImmutable: isImmutable);
4763	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
4764	ArgVal = DAG.getLoad(VT: ObjectVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4765	}
4766
4767	InVals.push_back(Elt: ArgVal);
4768	}
4769
4770	// Area that is at least reserved in the caller of this function.
4771	unsigned MinReservedArea;
4772	if (HasParameterArea)
4773	MinReservedArea = std::max(a: ArgOffset, b: LinkageSize + `8` * PtrByteSize);
4774	else
4775	MinReservedArea = LinkageSize;
4776
4777	// Set the size that is at least reserved in caller of this function. Tail
4778	// call optimized functions' reserved stack space needs to be aligned so that
4779	// taking the difference between two stack areas will result in an aligned
4780	// stack.
4781	MinReservedArea =
4782	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: MinReservedArea);
4783	FuncInfo->setMinReservedArea(MinReservedArea);
4784
4785	// If the function takes variable number of arguments, make a frame index for
4786	// the start of the first vararg value... for expansion of llvm.va_start.
4787	// On ELFv2ABI spec, it writes:
4788	// C programs that are intended to be portable* across different compilers*
4789	// and architectures must use the header file <stdarg.h> to deal with variable
4790	// argument lists.
4791	if (isVarArg && MFI.hasVAStart()) {
4792	int Depth = ArgOffset;
4793
4794	FuncInfo->setVarArgsFrameIndex(
4795	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: Depth, IsImmutable: true));
4796	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
4797
4798	// If this function is vararg, store any remaining integer argument regs
4799	// to their spots on the stack so that they may be loaded by dereferencing
4800	// the result of va_next.
4801	for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4802	GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4803	Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4804	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
4805	SDValue Store =
4806	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
4807	MemOps.push_back(Elt: Store);
4808	// Increment the address by four for the next argument to store
4809	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
4810	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
4811	}
4812	}
4813
4814	if (!MemOps.empty())
4815	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4816
4817	return Chain;
4818	}
4819
4820	/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4821	/// adjusted to accommodate the arguments for the tailcall.
4822	static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4823	unsigned ParamSize) {
4824
4825	if (!isTailCall) return `0`;
4826
4827	PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4828	unsigned CallerMinReservedArea = FI->getMinReservedArea();
4829	int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4830	// Remember only if the new adjustment is bigger.
4831	if (SPDiff < FI->getTailCallSPDelta())
4832	FI->setTailCallSPDelta(SPDiff);
4833
4834	return SPDiff;
4835	}
4836
4837	static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4838
4839	static bool callsShareTOCBase(const Function *Caller,
4840	const GlobalValue *CalleeGV,
4841	const TargetMachine &TM) {
4842	// It does not make sense to call callsShareTOCBase() with a caller that
4843	// is PC Relative since PC Relative callers do not have a TOC.
4844	#ifndef NDEBUG
4845	const PPCSubtarget STICaller = &TM.getSubtarget<PPCSubtarget>(F: Caller);
4846	assert(!STICaller->isUsingPCRelativeCalls() &&
4847	"PC Relative callers do not have a TOC and cannot share a TOC Base");
4848	#endif
4849
4850	// Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4851	// don't have enough information to determine if the caller and callee share
4852	// the same TOC base, so we have to pessimistically assume they don't for
4853	// correctness.
4854	if (!CalleeGV)
4855	return false;
4856
4857	// If the callee is preemptable, then the static linker will use a plt-stub
4858	// which saves the toc to the stack, and needs a nop after the call
4859	// instruction to convert to a toc-restore.
4860	if (!TM.shouldAssumeDSOLocal(GV: CalleeGV))
4861	return false;
4862
4863	// Functions with PC Relative enabled may clobber the TOC in the same DSO.
4864	// We may need a TOC restore in the situation where the caller requires a
4865	// valid TOC but the callee is PC Relative and does not.
4866	const Function *F = dyn_cast<Function>(Val: CalleeGV);
4867	const GlobalAlias *Alias = dyn_cast<GlobalAlias>(Val: CalleeGV);
4868
4869	// If we have an Alias we can try to get the function from there.
4870	if (Alias) {
4871	const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4872	F = dyn_cast<Function>(Val: GlobalObj);
4873	}
4874
4875	// If we still have no valid function pointer we do not have enough
4876	// information to determine if the callee uses PC Relative calls so we must
4877	// assume that it does.
4878	if (!F)
4879	return false;
4880
4881	// If the callee uses PC Relative we cannot guarantee that the callee won't
4882	// clobber the TOC of the caller and so we must assume that the two
4883	// functions do not share a TOC base.
4884	const PPCSubtarget STICallee = &TM.getSubtarget<PPCSubtarget>(F: F);
4885	if (STICallee->isUsingPCRelativeCalls())
4886	return false;
4887
4888	// If the GV is not a strong definition then we need to assume it can be
4889	// replaced by another function at link time. The function that replaces
4890	// it may not share the same TOC as the caller since the callee may be
4891	// replaced by a PC Relative version of the same function.
4892	if (!CalleeGV->isStrongDefinitionForLinker())
4893	return false;
4894
4895	// The medium and large code models are expected to provide a sufficiently
4896	// large TOC to provide all data addressing needs of a module with a
4897	// single TOC.
4898	if (CodeModel::Medium == TM.getCodeModel() \|\|
4899	CodeModel::Large == TM.getCodeModel())
4900	return true;
4901
4902	// Any explicitly-specified sections and section prefixes must also match.
4903	// Also, if we're using -ffunction-sections, then each function is always in
4904	// a different section (the same is true for COMDAT functions).
4905	if (TM.getFunctionSections() \|\| CalleeGV->hasComdat() \|\|
4906	Caller->hasComdat() \|\| CalleeGV->getSection() != Caller->getSection())
4907	return false;
4908	if (const auto *F = dyn_cast<Function>(Val: CalleeGV)) {
4909	if (F->getSectionPrefix() != Caller->getSectionPrefix())
4910	return false;
4911	}
4912
4913	return true;
4914	}
4915
4916	static bool
4917	needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4918	const SmallVectorImpl<ISD::OutputArg> &Outs) {
4919	assert(Subtarget.is64BitELFABI());
4920
4921	const unsigned PtrByteSize = `8`;
4922	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4923
4924	static const MCPhysReg GPR[] = {
4925	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4926	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4927	};
4928	static const MCPhysReg VR[] = {
4929	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4930	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4931	};
4932
4933	const unsigned NumGPRs = std::size(GPR);
4934	const unsigned NumFPRs = `13`;
4935	const unsigned NumVRs = std::size(VR);
4936	const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4937
4938	unsigned NumBytes = LinkageSize;
4939	unsigned AvailableFPRs = NumFPRs;
4940	unsigned AvailableVRs = NumVRs;
4941
4942	for (const ISD::OutputArg& Param : Outs) {
4943	if (Param.Flags.isNest()) continue;
4944
4945	if (CalculateStackSlotUsed(ArgVT: Param.VT, OrigVT: Param.ArgVT, Flags: Param.Flags, PtrByteSize,
4946	LinkageSize, ParamAreaSize, ArgOffset&: NumBytes,
4947	AvailableFPRs, AvailableVRs))
4948	return true;
4949	}
4950	return false;
4951	}
4952
4953	static bool hasSameArgumentList(const Function CallerFn, const* CallBase &CB) {
4954	if (CB.arg_size() != CallerFn->arg_size())
4955	return false;
4956
4957	auto CalleeArgIter = CB.arg_begin();
4958	auto CalleeArgEnd = CB.arg_end();
4959	Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4960
4961	for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4962	const Value* CalleeArg = *CalleeArgIter;
4963	const Value* CallerArg = &(*CallerArgIter);
4964	if (CalleeArg == CallerArg)
4965	continue;
4966
4967	// e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4968	// tail call @callee([4 x i64] undef, [4 x i64] %b)
4969	// }
4970	// 1st argument of callee is undef and has the same type as caller.
4971	if (CalleeArg->getType() == CallerArg->getType() &&
4972	isa<UndefValue>(Val: CalleeArg))
4973	continue;
4974
4975	return false;
4976	}
4977
4978	return true;
4979	}
4980
4981	// Returns true if TCO is possible between the callers and callees
4982	// calling conventions.
4983	static bool
4984	areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4985	CallingConv::ID CalleeCC) {
4986	// Tail calls are possible with fastcc and ccc.
4987	auto isTailCallableCC = [] (CallingConv::ID CC){
4988	return CC == CallingConv::C \|\| CC == CallingConv::Fast;
4989	};
4990	if (!isTailCallableCC (CallerCC) \|\| !isTailCallableCC (CalleeCC))
4991	return false;
4992
4993	// We can safely tail call both fastcc and ccc callees from a c calling
4994	// convention caller. If the caller is fastcc, we may have less stack space
4995	// than a non-fastcc caller with the same signature so disable tail-calls in
4996	// that case.
4997	return CallerCC == CallingConv::C \|\| CallerCC == CalleeCC;
4998	}
4999
5000	bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5001	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5002	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
5003	const SmallVectorImpl<ISD::OutputArg> &Outs,
5004	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5005	bool isCalleeExternalSymbol) const {
5006	bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5007
5008	if (DisableSCO && !TailCallOpt) return false;
5009
5010	// Variadic argument functions are not supported.
5011	if (isVarArg) return false;
5012
5013	// Check that the calling conventions are compatible for tco.
5014	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5015	return false;
5016
5017	// Caller contains any byval parameter is not supported.
5018	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5019	return false;
5020
5021	// Callee contains any byval parameter is not supported, too.
5022	// Note: This is a quick work around, because in some cases, e.g.
5023	// caller's stack size > callee's stack size, we are still able to apply
5024	// sibling call optimization. For example, gcc is able to do SCO for caller1
5025	// in the following example, but not for caller2.
5026	// struct test {
5027	// long int a;
5028	// char ary[56];
5029	// } gTest;
5030	// __attribute__((noinline)) int callee(struct test v, struct test b) {*
5031	// b->a = v.a;
5032	// return 0;
5033	// }
5034	// void caller1(struct test a, struct test c, struct test b) {*
5035	// callee(gTest, b); }
5036	// void caller2(struct test b) { callee(gTest, b); }*
5037	if (any_of(Range: Outs, P: [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5038	return false;
5039
5040	// If callee and caller use different calling conventions, we cannot pass
5041	// parameters on stack since offsets for the parameter area may be different.
5042	if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5043	return false;
5044
5045	// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5046	// the caller and callee share the same TOC for TCO/SCO. If the caller and
5047	// callee potentially have different TOC bases then we cannot tail call since
5048	// we need to restore the TOC pointer after the call.
5049	// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5050	// We cannot guarantee this for indirect calls or calls to external functions.
5051	// When PC-Relative addressing is used, the concept of the TOC is no longer
5052	// applicable so this check is not required.
5053	// Check first for indirect calls.
5054	if (!Subtarget.isUsingPCRelativeCalls() &&
5055	!isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5056	return false;
5057
5058	// Check if we share the TOC base.
5059	if (!Subtarget.isUsingPCRelativeCalls() &&
5060	!callsShareTOCBase(Caller: CallerFunc, CalleeGV, TM: getTargetMachine()))
5061	return false;
5062
5063	// TCO allows altering callee ABI, so we don't have to check further.
5064	if (CalleeCC == CallingConv::Fast && TailCallOpt)
5065	return true;
5066
5067	if (DisableSCO) return false;
5068
5069	// If callee use the same argument list that caller is using, then we can
5070	// apply SCO on this case. If it is not, then we need to check if callee needs
5071	// stack for passing arguments.
5072	// PC Relative tail calls may not have a CallBase.
5073	// If there is no CallBase we cannot verify if we have the same argument
5074	// list so assume that we don't have the same argument list.
5075	if (CB && !hasSameArgumentList(CallerFn: CallerFunc, CB: *CB) &&
5076	needStackSlotPassParameters(Subtarget, Outs))
5077	return false;
5078	else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5079	return false;
5080
5081	return true;
5082	}
5083
5084	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5085	/// for tail call optimization. Targets which want to do tail call
5086	/// optimization should implement this function.
5087	bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5088	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5089	CallingConv::ID CallerCC, bool isVarArg,
5090	const SmallVectorImpl<ISD::InputArg> &Ins) const {
5091	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5092	return false;
5093
5094	// Variable argument functions are not supported.
5095	if (isVarArg)
5096	return false;
5097
5098	if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5099	// Functions containing by val parameters are not supported.
5100	if (any_of(Range: Ins, P: [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5101	return false;
5102
5103	// Non-PIC/GOT tail calls are supported.
5104	if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5105	return true;
5106
5107	// At the moment we can only do local tail calls (in same module, hidden
5108	// or protected) if we are generating PIC.
5109	if (CalleeGV)
5110	return CalleeGV->hasHiddenVisibility() \|\|
5111	CalleeGV->hasProtectedVisibility();
5112	}
5113
5114	return false;
5115	}
5116
5117	/// isCallCompatibleAddress - Return the immediate to use if the specified
5118	/// 32-bit value is representable in the immediate field of a BxA instruction.
5119	static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5120	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: Op);
5121	if (!C) return nullptr;
5122
5123	int Addr = C->getZExtValue();
5124	if ((Addr & `3`) != `0` \|\| // Low 2 bits are implicitly zero.
5125	SignExtend32<`26`>(X: Addr) != Addr)
5126	return nullptr; // Top 6 bits have to be sext of immediate.
5127
5128	return DAG
5129	.getConstant(
5130	Val: (int)C->getZExtValue() >> `2`, DL: SDLoc (Op),
5131	VT: DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()))
5132	.getNode();
5133	}
5134
5135	namespace {
5136
5137	struct TailCallArgumentInfo {
5138	SDValue Arg;
5139	SDValue FrameIdxOp;
5140	int FrameIdx = `0`;
5141
5142	TailCallArgumentInfo() = default;
5143	};
5144
5145	} // end anonymous namespace
5146
5147	/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5148	static void StoreTailCallArgumentsToStackSlot(
5149	SelectionDAG &DAG, SDValue Chain,
5150	const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5151	SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5152	for (unsigned i = `0`, e = TailCallArgs.size(); i != e; ++i) {
5153	SDValue Arg = TailCallArgs [i].Arg;
5154	SDValue FIN = TailCallArgs [i].FrameIdxOp;
5155	int FI = TailCallArgs [i].FrameIdx;
5156	// Store relative to framepointer.
5157	MemOpChains.push_back(Elt: DAG.getStore(
5158	Chain, dl, Val: Arg, Ptr: FIN,
5159	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI)));
5160	}
5161	}
5162
5163	/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5164	/// the appropriate stack slot for the tail call optimized function call.
5165	static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5166	SDValue OldRetAddr, SDValue OldFP,
5167	int SPDiff, const SDLoc &dl) {
5168	if (SPDiff) {
5169	// Calculate the new stack slot for the return address.
5170	MachineFunction &MF = DAG.getMachineFunction();
5171	const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5172	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5173	bool isPPC64 = Subtarget.isPPC64();
5174	int SlotSize = isPPC64 ? `8` : `4`;
5175	int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5176	int NewRetAddr = MF.getFrameInfo().CreateFixedObject(Size: SlotSize,
5177	SPOffset: NewRetAddrLoc, IsImmutable: true);
5178	EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5179	SDValue NewRetAddrFrIdx = DAG.getFrameIndex(FI: NewRetAddr, VT);
5180	Chain = DAG.getStore(Chain, dl, Val: OldRetAddr, Ptr: NewRetAddrFrIdx,
5181	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: NewRetAddr));
5182	}
5183	return Chain;
5184	}
5185
5186	/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5187	/// the position of the argument.
5188	static void
5189	CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5190	SDValue Arg, int SPDiff, unsigned ArgOffset,
5191	SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5192	int Offset = ArgOffset + SPDiff;
5193	uint32_t OpSize = (Arg.getValueSizeInBits() + `7`) / `8`;
5194	int FI = MF.getFrameInfo().CreateFixedObject(Size: OpSize, SPOffset: Offset, IsImmutable: true);
5195	EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5196	SDValue FIN = DAG.getFrameIndex(FI, VT);
5197	TailCallArgumentInfo Info;
5198	Info.Arg = Arg;
5199	Info.FrameIdxOp = FIN;
5200	Info.FrameIdx = FI;
5201	TailCallArguments.push_back(Elt: Info);
5202	}
5203
5204	/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5205	/// stack slot. Returns the chain as result and the loaded frame pointers in
5206	/// LROpOut/FPOpout. Used when tail calling.
5207	SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5208	SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5209	SDValue &FPOpOut, const SDLoc &dl) const {
5210	if (SPDiff) {
5211	// Load the LR and FP stack slot for later adjusting.
5212	EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5213	LROpOut = getReturnAddrFrameIndex(DAG);
5214	LROpOut = DAG.getLoad(VT, dl, Chain, Ptr: LROpOut, PtrInfo: MachinePointerInfo ());
5215	Chain = SDValue (LROpOut.getNode(), `1`);
5216	}
5217	return Chain;
5218	}
5219
5220	/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5221	/// by "Src" to address "Dst" of size "Size". Alignment information is
5222	/// specified by the specific parameter attribute. The copy will be passed as
5223	/// a byval function parameter.
5224	/// Sometimes what we are copying is the end of a larger object, the part that
5225	/// does not fit in registers.
5226	static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5227	SDValue Chain, ISD::ArgFlagsTy Flags,
5228	SelectionDAG &DAG, const SDLoc &dl) {
5229	SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5230	return DAG.getMemcpy(Chain, dl, Dst, Src, Size: SizeNode,
5231	Alignment: Flags.getNonZeroByValAlign(), isVol: false, AlwaysInline: false, isTailCall: false,
5232	DstPtrInfo: MachinePointerInfo (), SrcPtrInfo: MachinePointerInfo ());
5233	}
5234
5235	/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5236	/// tail calls.
5237	static void LowerMemOpCallTo(
5238	SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5239	SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5240	bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5241	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5242	EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5243	if (!isTailCall) {
5244	if (isVector) {
5245	SDValue StackPtr;
5246	if (isPPC64)
5247	StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5248	else
5249	StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5250	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr,
5251	N2: DAG.getConstant(Val: ArgOffset, DL: dl, VT: PtrVT));
5252	}
5253	MemOpChains.push_back(
5254	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
5255	// Calculate and remember argument location.
5256	} else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5257	TailCallArguments);
5258	}
5259
5260	static void
5261	PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5262	const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5263	SDValue FPOp,
5264	SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5265	// Emit a sequence of copyto/copyfrom virtual registers for arguments that
5266	// might overwrite each other in case of tail call optimization.
5267	SmallVector<SDValue, `8`> MemOpChains2;
5268	// Do not flag preceding copytoreg stuff together with the following stuff.
5269	InGlue = SDValue ();
5270	StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArgs: TailCallArguments,
5271	MemOpChains&: MemOpChains2, dl);
5272	if (!MemOpChains2.empty())
5273	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5274
5275	// Store the return address to the appropriate stack slot.
5276	Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, OldRetAddr: LROp, OldFP: FPOp, SPDiff, dl);
5277
5278	// Emit callseq_end just before tailcall node.
5279	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue: InGlue, DL: dl);
5280	InGlue = Chain.getValue(R: `1`);
5281	}
5282
5283	// Is this global address that of a function that can be called by name? (as
5284	// opposed to something that must hold a descriptor for an indirect call).
5285	static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5286	if (GV) {
5287	if (GV->isThreadLocal())
5288	return false;
5289
5290	return GV->getValueType()->isFunctionTy();
5291	}
5292
5293	return false;
5294	}
5295
5296	SDValue PPCTargetLowering::LowerCallResult(
5297	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5298	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5299	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5300	SmallVector<CCValAssign, `16`> RVLocs;
5301	CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5302	*DAG.getContext());
5303
5304	CCRetInfo.AnalyzeCallResult(
5305	Ins, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5306	? RetCC_PPC_Cold
5307	: RetCC_PPC);
5308
5309	// Copy all of the result registers out of their specified physreg.
5310	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
5311	CCValAssign &VA = RVLocs [i];
5312	assert(VA.isRegLoc() && "Can only return in registers!");
5313
5314	SDValue Val;
5315
5316	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5317	SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5318	InGlue);
5319	Chain = Lo.getValue(R: `1`);
5320	InGlue = Lo.getValue(R: `2`);
5321	VA = RVLocs [++i]; // skip ahead to next loc
5322	SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5323	InGlue);
5324	Chain = Hi.getValue(R: `1`);
5325	InGlue = Hi.getValue(R: `2`);
5326	if (!Subtarget.isLittleEndian())
5327	std::swap (a&: Lo, b&: Hi);
5328	Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5329	} else {
5330	Val = DAG.getCopyFromReg(Chain, dl,
5331	Reg: VA.getLocReg(), VT: VA.getLocVT(), Glue: InGlue);
5332	Chain = Val.getValue(R: `1`);
5333	InGlue = Val.getValue(R: `2`);
5334	}
5335
5336	switch (VA.getLocInfo()) {
5337	default: llvm_unreachable("Unknown loc info!");
5338	case CCValAssign::Full: break;
5339	case CCValAssign::AExt:
5340	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5341	break;
5342	case CCValAssign::ZExt:
5343	Val = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: VA.getLocVT(), N1: Val,
5344	N2: DAG.getValueType(VA.getValVT()));
5345	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5346	break;
5347	case CCValAssign::SExt:
5348	Val = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: VA.getLocVT(), N1: Val,
5349	N2: DAG.getValueType(VA.getValVT()));
5350	Val = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: VA.getValVT(), Operand: Val);
5351	break;
5352	}
5353
5354	InVals.push_back(Elt: Val);
5355	}
5356
5357	return Chain;
5358	}
5359
5360	static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5361	const PPCSubtarget &Subtarget, bool isPatchPoint) {
5362	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5363	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5364
5365	// PatchPoint calls are not indirect.
5366	if (isPatchPoint)
5367	return false;
5368
5369	if (isFunctionGlobalAddress(GV) \|\| isa<ExternalSymbolSDNode>(Val: Callee))
5370	return false;
5371
5372	// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5373	// becuase the immediate function pointer points to a descriptor instead of
5374	// a function entry point. The ELFv2 ABI cannot use a BLA because the function
5375	// pointer immediate points to the global entry point, while the BLA would
5376	// need to jump to the local entry point (see rL211174).
5377	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5378	isBLACompatibleAddress(Op: Callee, DAG))
5379	return false;
5380
5381	return true;
5382	}
5383
5384	// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5385	static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5386	return Subtarget.isAIXABI() \|\|
5387	(Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5388	}
5389
5390	static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5391	const Function &Caller, const SDValue &Callee,
5392	const PPCSubtarget &Subtarget,
5393	const TargetMachine &TM,
5394	bool IsStrictFPCall = false) {
5395	if (CFlags.IsTailCall)
5396	return PPCISD::TC_RETURN;
5397
5398	unsigned RetOpc = `0`;
5399	// This is a call through a function pointer.
5400	if (CFlags.IsIndirect) {
5401	// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5402	// indirect calls. The save of the caller's TOC pointer to the stack will be
5403	// inserted into the DAG as part of call lowering. The restore of the TOC
5404	// pointer is modeled by using a pseudo instruction for the call opcode that
5405	// represents the 2 instruction sequence of an indirect branch and link,
5406	// immediately followed by a load of the TOC pointer from the stack save
5407	// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5408	// as it is not saved or used.
5409	RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5410	: PPCISD::BCTRL;
5411	} else if (Subtarget.isUsingPCRelativeCalls()) {
5412	assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5413	RetOpc = PPCISD::CALL_NOTOC;
5414	} else if (Subtarget.isAIXABI() \|\| Subtarget.is64BitELFABI()) {
5415	// The ABIs that maintain a TOC pointer accross calls need to have a nop
5416	// immediately following the call instruction if the caller and callee may
5417	// have different TOC bases. At link time if the linker determines the calls
5418	// may not share a TOC base, the call is redirected to a trampoline inserted
5419	// by the linker. The trampoline will (among other things) save the callers
5420	// TOC pointer at an ABI designated offset in the linkage area and the
5421	// linker will rewrite the nop to be a load of the TOC pointer from the
5422	// linkage area into gpr2.
5423	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5424	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5425	RetOpc =
5426	callsShareTOCBase(Caller: &Caller, CalleeGV: GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5427	} else
5428	RetOpc = PPCISD::CALL;
5429	if (IsStrictFPCall) {
5430	switch (RetOpc) {
5431	default:
5432	llvm_unreachable("Unknown call opcode");
5433	case PPCISD::BCTRL_LOAD_TOC:
5434	RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5435	break;
5436	case PPCISD::BCTRL:
5437	RetOpc = PPCISD::BCTRL_RM;
5438	break;
5439	case PPCISD::CALL_NOTOC:
5440	RetOpc = PPCISD::CALL_NOTOC_RM;
5441	break;
5442	case PPCISD::CALL:
5443	RetOpc = PPCISD::CALL_RM;
5444	break;
5445	case PPCISD::CALL_NOP:
5446	RetOpc = PPCISD::CALL_NOP_RM;
5447	break;
5448	}
5449	}
5450	return RetOpc;
5451	}
5452
5453	static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5454	const SDLoc &dl, const PPCSubtarget &Subtarget) {
5455	if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5456	if (SDNode *Dest = isBLACompatibleAddress(Op: Callee, DAG))
5457	return SDValue (Dest, `0`);
5458
5459	// Returns true if the callee is local, and false otherwise.
5460	auto isLocalCallee = [&]() {
5461	const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5462	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5463
5464	return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5465	!isa_and_nonnull<GlobalIFunc>(Val: GV);
5466	};
5467
5468	// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5469	// a static relocation model causes some versions of GNU LD (2.17.50, at
5470	// least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5471	// built with secure-PLT.
5472	bool UsePlt =
5473	Subtarget.is32BitELFABI() && !isLocalCallee () &&
5474	Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5475
5476	const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5477	const TargetMachine &TM = Subtarget.getTargetMachine();
5478	const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5479	MCSymbolXCOFF *S =
5480	cast<MCSymbolXCOFF>(Val: TLOF->getFunctionEntryPointSymbol(Func: GV, TM));
5481
5482	MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout());
5483	return DAG.getMCSymbol(Sym: S, VT: PtrVT);
5484	};
5485
5486	auto *G = dyn_cast<GlobalAddressSDNode>(Val: Callee);
5487	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5488	if (isFunctionGlobalAddress(GV)) {
5489	const GlobalValue *GV = cast<GlobalAddressSDNode>(Val: Callee)->getGlobal();
5490
5491	if (Subtarget.isAIXABI()) {
5492	assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5493	return getAIXFuncEntryPointSymbolSDNode (GV);
5494	}
5495	return DAG.getTargetGlobalAddress(GV, DL: dl, VT: Callee.getValueType(), offset: `0`,
5496	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5497	}
5498
5499	if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Val: Callee)) {
5500	const char *SymName = S->getSymbol();
5501	if (Subtarget.isAIXABI()) {
5502	// If there exists a user-declared function whose name is the same as the
5503	// ExternalSymbol's, then we pick up the user-declared version.
5504	const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5505	if (const Function *F =
5506	dyn_cast_or_null<Function>(Val: Mod->getNamedValue(Name: SymName)))
5507	return getAIXFuncEntryPointSymbolSDNode (F);
5508
5509	// On AIX, direct function calls reference the symbol for the function's
5510	// entry point, which is named by prepending a "." before the function's
5511	// C-linkage name. A Qualname is returned here because an external
5512	// function entry point is a csect with XTY_ER property.
5513	const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5514	auto &Context = DAG.getMachineFunction().getMMI().getContext();
5515	MCSectionXCOFF *Sec = Context.getXCOFFSection(
5516	Section: (Twine (".") + Twine (SymName)).str(), K: SectionKind::getMetadata(),
5517	CsectProp: XCOFF::CsectProperties (XCOFF::XMC_PR, XCOFF::XTY_ER));
5518	return Sec->getQualNameSymbol();
5519	};
5520
5521	SymName = getExternalFunctionEntryPointSymbol (SymName)->getName().data();
5522	}
5523	return DAG.getTargetExternalSymbol(Sym: SymName, VT: Callee.getValueType(),
5524	TargetFlags: UsePlt ? PPCII::MO_PLT : `0`);
5525	}
5526
5527	// No transformation needed.
5528	assert(Callee.getNode() && "What no callee?");
5529	return Callee;
5530	}
5531
5532	static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5533	assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5534	"Expected a CALLSEQ_STARTSDNode.");
5535
5536	// The last operand is the chain, except when the node has glue. If the node
5537	// has glue, then the last operand is the glue, and the chain is the second
5538	// last operand.
5539	SDValue LastValue = CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `1`);
5540	if (LastValue.getValueType() != MVT::Glue)
5541	return LastValue;
5542
5543	return CallSeqStart.getValue(R: CallSeqStart ->getNumValues() - `2`);
5544	}
5545
5546	// Creates the node that moves a functions address into the count register
5547	// to prepare for an indirect call instruction.
5548	static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5549	SDValue &Glue, SDValue &Chain,
5550	const SDLoc &dl) {
5551	SDValue MTCTROps[] = {Chain, Callee, Glue};
5552	EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5553	Chain = DAG.getNode(Opcode: PPCISD::MTCTR, DL: dl, ResultTys: ArrayRef(ReturnTypes, `2`),
5554	Ops: ArrayRef(MTCTROps, Glue.getNode() ? `3` : `2`));
5555	// The glue is the second value produced.
5556	Glue = Chain.getValue(R: `1`);
5557	}
5558
5559	static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5560	SDValue &Glue, SDValue &Chain,
5561	SDValue CallSeqStart,
5562	const CallBase CB, const* SDLoc &dl,
5563	bool hasNest,
5564	const PPCSubtarget &Subtarget) {
5565	// Function pointers in the 64-bit SVR4 ABI do not point to the function
5566	// entry point, but to the function descriptor (the function entry point
5567	// address is part of the function descriptor though).
5568	// The function descriptor is a three doubleword structure with the
5569	// following fields: function entry point, TOC base address and
5570	// environment pointer.
5571	// Thus for a call through a function pointer, the following actions need
5572	// to be performed:
5573	// 1. Save the TOC of the caller in the TOC save area of its stack
5574	// frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5575	// 2. Load the address of the function entry point from the function
5576	// descriptor.
5577	// 3. Load the TOC of the callee from the function descriptor into r2.
5578	// 4. Load the environment pointer from the function descriptor into
5579	// r11.
5580	// 5. Branch to the function entry point address.
5581	// 6. On return of the callee, the TOC of the caller needs to be
5582	// restored (this is done in FinishCall()).
5583	//
5584	// The loads are scheduled at the beginning of the call sequence, and the
5585	// register copies are flagged together to ensure that no other
5586	// operations can be scheduled in between. E.g. without flagging the
5587	// copies together, a TOC access in the caller could be scheduled between
5588	// the assignment of the callee TOC and the branch to the callee, which leads
5589	// to incorrect code.
5590
5591	// Start by loading the function address from the descriptor.
5592	SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5593	auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5594	? (MachineMemOperand::MODereferenceable \|
5595	MachineMemOperand::MOInvariant)
5596	: MachineMemOperand::MONone;
5597
5598	MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5599
5600	// Registers used in building the DAG.
5601	const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5602	const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5603
5604	// Offsets of descriptor members.
5605	const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5606	const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5607
5608	const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5609	const Align Alignment = Subtarget.isPPC64() ? Align (`8`) : Align (`4`);
5610
5611	// One load for the functions entry point address.
5612	SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5613	Alignment, MMOFlags);
5614
5615	// One for loading the TOC anchor for the module that contains the called
5616	// function.
5617	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCAnchorOffset, DL: dl);
5618	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: TOCOff);
5619	SDValue TOCPtr =
5620	DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5621	MPI.getWithOffset(O: TOCAnchorOffset), Alignment, MMOFlags);
5622
5623	// One for loading the environment pointer.
5624	SDValue PtrOff = DAG.getIntPtrConstant(Val: EnvPtrOffset, DL: dl);
5625	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: Callee, N2: PtrOff);
5626	SDValue LoadEnvPtr =
5627	DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5628	MPI.getWithOffset(O: EnvPtrOffset), Alignment, MMOFlags);
5629
5630
5631	// Then copy the newly loaded TOC anchor to the TOC pointer.
5632	SDValue TOCVal = DAG.getCopyToReg(Chain, dl, Reg: TOCReg, N: TOCPtr, Glue);
5633	Chain = TOCVal.getValue(R: `0`);
5634	Glue = TOCVal.getValue(R: `1`);
5635
5636	// If the function call has an explicit 'nest' parameter, it takes the
5637	// place of the environment pointer.
5638	assert((!hasNest \|\| !Subtarget.isAIXABI()) &&
5639	"Nest parameter is not supported on AIX.");
5640	if (!hasNest) {
5641	SDValue EnvVal = DAG.getCopyToReg(Chain, dl, Reg: EnvPtrReg, N: LoadEnvPtr, Glue);
5642	Chain = EnvVal.getValue(R: `0`);
5643	Glue = EnvVal.getValue(R: `1`);
5644	}
5645
5646	// The rest of the indirect call sequence is the same as the non-descriptor
5647	// DAG.
5648	prepareIndirectCall(DAG, Callee&: LoadFuncPtr, Glue, Chain, dl);
5649	}
5650
5651	static void
5652	buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5653	PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5654	SelectionDAG &DAG,
5655	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass,
5656	SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5657	const PPCSubtarget &Subtarget) {
5658	const bool IsPPC64 = Subtarget.isPPC64();
5659	// MVT for a general purpose register.
5660	const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5661
5662	// First operand is always the chain.
5663	Ops.push_back(Elt: Chain);
5664
5665	// If it's a direct call pass the callee as the second operand.
5666	if (!CFlags.IsIndirect)
5667	Ops.push_back(Elt: Callee);
5668	else {
5669	assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5670
5671	// For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5672	// on the stack (this would have been done in `LowerCall_64SVR4` or
5673	// `LowerCall_AIX`). The call instruction is a pseudo instruction that
5674	// represents both the indirect branch and a load that restores the TOC
5675	// pointer from the linkage area. The operand for the TOC restore is an add
5676	// of the TOC save offset to the stack pointer. This must be the second
5677	// operand: after the chain input but before any other variadic arguments.
5678	// For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5679	// saved or used.
5680	if (isTOCSaveRestoreRequired(Subtarget)) {
5681	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5682
5683	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: RegVT);
5684	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5685	SDValue TOCOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
5686	SDValue AddTOC = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RegVT, N1: StackPtr, N2: TOCOff);
5687	Ops.push_back(Elt: AddTOC);
5688	}
5689
5690	// Add the register used for the environment pointer.
5691	if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5692	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getEnvironmentPointerRegister(),
5693	VT: RegVT));
5694
5695
5696	// Add CTR register as callee so a bctr can be emitted later.
5697	if (CFlags.IsTailCall)
5698	Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5699	}
5700
5701	// If this is a tail call add stack pointer delta.
5702	if (CFlags.IsTailCall)
5703	Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5704
5705	// Add argument registers to the end of the list so that they are known live
5706	// into the call.
5707	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i)
5708	Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass [i].first,
5709	VT: RegsToPass [i].second.getValueType()));
5710
5711	// We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5712	// no way to mark dependencies as implicit here.
5713	// We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5714	if ((Subtarget.is64BitELFABI() \|\| Subtarget.isAIXABI()) &&
5715	!CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5716	Ops.push_back(Elt: DAG.getRegister(Reg: Subtarget.getTOCPointerRegister(), VT: RegVT));
5717
5718	// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5719	if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5720	Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5721
5722	// Add a register mask operand representing the call-preserved registers.
5723	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5724	const uint32_t *Mask =
5725	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CFlags.CallConv);
5726	assert(Mask && "Missing call preserved mask for calling convention");
5727	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
5728
5729	// If the glue is valid, it is the last operand.
5730	if (Glue.getNode())
5731	Ops.push_back(Elt: Glue);
5732	}
5733
5734	SDValue PPCTargetLowering::FinishCall(
5735	CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5736	SmallVector<std::pair<unsigned, SDValue>, `8`> &RegsToPass, SDValue Glue,
5737	SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5738	unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5739	SmallVectorImpl<SDValue> &InVals, const CallBase CB) const* {
5740
5741	if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) \|\|
5742	Subtarget.isAIXABI())
5743	setUsesTOCBasePtr(DAG);
5744
5745	unsigned CallOpc =
5746	getCallOpcode(CFlags, Caller: DAG.getMachineFunction().getFunction(), Callee,
5747	Subtarget, TM: DAG.getTarget(), IsStrictFPCall: CB ? CB->isStrictFP() : false);
5748
5749	if (!CFlags.IsIndirect)
5750	Callee = transformCallee(Callee, DAG, dl, Subtarget);
5751	else if (Subtarget.usesFunctionDescriptors())
5752	prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5753	dl, hasNest: CFlags.HasNest, Subtarget);
5754	else
5755	prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5756
5757	// Build the operand list for the call instruction.
5758	SmallVector<SDValue, `8`> Ops;
5759	buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5760	SPDiff, Subtarget);
5761
5762	// Emit tail call.
5763	if (CFlags.IsTailCall) {
5764	// Indirect tail call when using PC Relative calls do not have the same
5765	// constraints.
5766	assert(((Callee.getOpcode() == ISD::Register &&
5767	cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) \|\|
5768	Callee.getOpcode() == ISD::TargetExternalSymbol \|\|
5769	Callee.getOpcode() == ISD::TargetGlobalAddress \|\|
5770	isa<ConstantSDNode>(Callee) \|\|
5771	(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5772	"Expecting a global address, external symbol, absolute value, "
5773	"register or an indirect tail call when PC Relative calls are "
5774	"used.");
5775	// PC Relative calls also use TC_RETURN as the way to mark tail calls.
5776	assert(CallOpc == PPCISD::TC_RETURN &&
5777	"Unexpected call opcode for a tail call.");
5778	DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5779	SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5780	DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CFlags.NoMerge);
5781	return Ret;
5782	}
5783
5784	std::array<EVT, `2`> ReturnTypes = {{MVT::Other, MVT::Glue}};
5785	Chain = DAG.getNode(Opcode: CallOpc, DL: dl, ResultTys: ReturnTypes, Ops);
5786	DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CFlags.NoMerge);
5787	Glue = Chain.getValue(R: `1`);
5788
5789	// When performing tail call optimization the callee pops its arguments off
5790	// the stack. Account for this here so these bytes can be pushed back on in
5791	// PPCFrameLowering::eliminateCallFramePseudoInstr.
5792	int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5793	getTargetMachine().Options.GuaranteedTailCallOpt)
5794	? NumBytes
5795	: `0`;
5796
5797	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: BytesCalleePops, Glue, DL: dl);
5798	Glue = Chain.getValue(R: `1`);
5799
5800	return LowerCallResult(Chain, InGlue: Glue, CallConv: CFlags.CallConv, isVarArg: CFlags.IsVarArg, Ins, dl,
5801	DAG, InVals);
5802	}
5803
5804	bool PPCTargetLowering::supportsTailCallFor(const CallBase CB) const* {
5805	CallingConv::ID CalleeCC = CB->getCallingConv();
5806	const Function *CallerFunc = CB->getCaller();
5807	CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5808	const Function *CalleeFunc = CB->getCalledFunction();
5809	if (!CalleeFunc)
5810	return false;
5811	const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(Val: CalleeFunc);
5812
5813	SmallVector<ISD::OutputArg, `2`> Outs;
5814	SmallVector<ISD::InputArg, `2`> Ins;
5815
5816	GetReturnInfo(CC: CalleeCC, ReturnType: CalleeFunc->getReturnType(),
5817	attr: CalleeFunc->getAttributes(), Outs, TLI: *this,
5818	DL: CalleeFunc->getParent()->getDataLayout());
5819
5820	return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5821	isVarArg: CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5822	isCalleeExternalSymbol: false /isCalleeExternalSymbol/);
5823	}
5824
5825	bool PPCTargetLowering::isEligibleForTCO(
5826	const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5827	CallingConv::ID CallerCC, const CallBase CB, bool* isVarArg,
5828	const SmallVectorImpl<ISD::OutputArg> &Outs,
5829	const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5830	bool isCalleeExternalSymbol) const {
5831	if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5832	return false;
5833
5834	if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5835	return IsEligibleForTailCallOptimization_64SVR4(
5836	CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5837	isCalleeExternalSymbol);
5838	else
5839	return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5840	isVarArg, Ins);
5841	}
5842
5843	SDValue
5844	PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5845	SmallVectorImpl<SDValue> &InVals) const {
5846	SelectionDAG &DAG = CLI.DAG;
5847	SDLoc &dl = CLI.DL;
5848	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5849	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5850	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5851	SDValue Chain = CLI.Chain;
5852	SDValue Callee = CLI.Callee;
5853	bool &isTailCall = CLI.IsTailCall;
5854	CallingConv::ID CallConv = CLI.CallConv;
5855	bool isVarArg = CLI.IsVarArg;
5856	bool isPatchPoint = CLI.IsPatchPoint;
5857	const CallBase *CB = CLI.CB;
5858
5859	if (isTailCall) {
5860	MachineFunction &MF = DAG.getMachineFunction();
5861	CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5862	auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee);
5863	const GlobalValue GV = G ? G->getGlobal() : nullptr*;
5864	bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Val: Callee);
5865
5866	isTailCall =
5867	isEligibleForTCO(CalleeGV: GV, CalleeCC: CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5868	CallerFunc: &(MF.getFunction()), isCalleeExternalSymbol: IsCalleeExternalSymbol);
5869	if (isTailCall) {
5870	++NumTailCalls;
5871	if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5872	++NumSiblingCalls;
5873
5874	// PC Relative calls no longer guarantee that the callee is a Global
5875	// Address Node. The callee could be an indirect tail call in which
5876	// case the SDValue for the callee could be a load (to load the address
5877	// of a function pointer) or it may be a register copy (to move the
5878	// address of the callee from a function parameter into a virtual
5879	// register). It may also be an ExternalSymbolSDNode (ex memcopy).
5880	assert((Subtarget.isUsingPCRelativeCalls() \|\|
5881	isa<GlobalAddressSDNode>(Callee)) &&
5882	"Callee should be an llvm::Function object.");
5883
5884	LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5885	<< "\nTCO callee: ");
5886	LLVM_DEBUG(Callee.dump());
5887	}
5888	}
5889
5890	if (!isTailCall && CB && CB->isMustTailCall())
5891	report_fatal_error(reason: "failed to perform tail call elimination on a call "
5892	"site marked musttail");
5893
5894	// When long calls (i.e. indirect calls) are always used, calls are always
5895	// made via function pointer. If we have a function name, first translate it
5896	// into a pointer.
5897	if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Val: Callee) &&
5898	!isTailCall)
5899	Callee = LowerGlobalAddress(Op: Callee, DAG);
5900
5901	CallFlags CFlags(
5902	CallConv, isTailCall, isVarArg, isPatchPoint,
5903	isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5904	// hasNest
5905	Subtarget.is64BitELFABI() &&
5906	any_of(Range&: Outs, P: [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5907	CLI.NoMerge);
5908
5909	if (Subtarget.isAIXABI())
5910	return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5911	InVals, CB);
5912
5913	assert(Subtarget.isSVR4ABI());
5914	if (Subtarget.isPPC64())
5915	return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5916	InVals, CB);
5917	return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5918	InVals, CB);
5919	}
5920
5921	SDValue PPCTargetLowering::LowerCall_32SVR4(
5922	SDValue Chain, SDValue Callee, CallFlags CFlags,
5923	const SmallVectorImpl<ISD::OutputArg> &Outs,
5924	const SmallVectorImpl<SDValue> &OutVals,
5925	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5926	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5927	const CallBase CB) const* {
5928	// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5929	// of the 32-bit SVR4 ABI stack frame layout.
5930
5931	const CallingConv::ID CallConv = CFlags.CallConv;
5932	const bool IsVarArg = CFlags.IsVarArg;
5933	const bool IsTailCall = CFlags.IsTailCall;
5934
5935	assert((CallConv == CallingConv::C \|\|
5936	CallConv == CallingConv::Cold \|\|
5937	CallConv == CallingConv::Fast) && "Unknown calling convention!");
5938
5939	const Align PtrAlign(`4`);
5940
5941	MachineFunction &MF = DAG.getMachineFunction();
5942
5943	// Mark this function as potentially containing a function that contains a
5944	// tail call. As a consequence the frame pointer will be used for dynamicalloc
5945	// and restoring the callers stack pointer in this functions epilog. This is
5946	// done because by tail calling the called function might overwrite the value
5947	// in this function's (MF) stack pointer stack slot 0(SP).
5948	if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5949	CallConv == CallingConv::Fast)
5950	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5951
5952	// Count how many bytes are to be pushed on the stack, including the linkage
5953	// area, parameter list area and the part of the local variable space which
5954	// contains copies of aggregates which are passed by value.
5955
5956	// Assign locations to all of the outgoing arguments.
5957	SmallVector<CCValAssign, `16`> ArgLocs;
5958	PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5959
5960	// Reserve space for the linkage area on the stack.
5961	CCInfo.AllocateStack(Size: Subtarget.getFrameLowering()->getLinkageSize(),
5962	Alignment: PtrAlign);
5963	if (useSoftFloat())
5964	CCInfo.PreAnalyzeCallOperands(Outs);
5965
5966	if (IsVarArg) {
5967	// Handle fixed and variable vector arguments differently.
5968	// Fixed vector arguments go into registers as long as registers are
5969	// available. Variable vector arguments always go into memory.
5970	unsigned NumArgs = Outs.size();
5971
5972	for (unsigned i = `0`; i != NumArgs; ++i) {
5973	MVT ArgVT = Outs [i].VT;
5974	ISD::ArgFlagsTy ArgFlags = Outs [i].Flags;
5975	bool Result;
5976
5977	if (Outs [i].IsFixed) {
5978	Result = CC_PPC32_SVR4(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full, ArgFlags,
5979	State&: CCInfo);
5980	} else {
5981	Result = CC_PPC32_SVR4_VarArg(ValNo: i, ValVT: ArgVT, LocVT: ArgVT, LocInfo: CCValAssign::Full,
5982	ArgFlags, State&: CCInfo);
5983	}
5984
5985	if (Result) {
5986	#ifndef NDEBUG
5987	errs() << "Call operand #" << i << " has unhandled type "
5988	<< ArgVT << "\n";
5989	#endif
5990	llvm_unreachable(nullptr);
5991	}
5992	}
5993	} else {
5994	// All arguments are treated the same.
5995	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4);
5996	}
5997	CCInfo.clearWasPPCF128();
5998
5999	// Assign locations to all of the outgoing aggregate by value arguments.
6000	SmallVector<CCValAssign, `16`> ByValArgLocs;
6001	CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6002
6003	// Reserve stack space for the allocations in CCInfo.
6004	CCByValInfo.AllocateStack(Size: CCInfo.getStackSize(), Alignment: PtrAlign);
6005
6006	CCByValInfo.AnalyzeCallOperands(Outs, Fn: CC_PPC32_SVR4_ByVal);
6007
6008	// Size of the linkage area, parameter list area and the part of the local
6009	// space variable where copies of aggregates which are passed by value are
6010	// stored.
6011	unsigned NumBytes = CCByValInfo.getStackSize();
6012
6013	// Calculate by how many bytes the stack has to be adjusted in case of tail
6014	// call optimization.
6015	int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: IsTailCall, ParamSize: NumBytes);
6016
6017	// Adjust the stack pointer for the new arguments...
6018	// These operations are automatically eliminated by the prolog/epilog pass
6019	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
6020	SDValue CallSeqStart = Chain;
6021
6022	// Load the return address and frame pointer so it can be moved somewhere else
6023	// later.
6024	SDValue LROp, FPOp;
6025	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6026
6027	// Set up a copy of the stack pointer for use loading and storing any
6028	// arguments that may not fit in the registers available for argument
6029	// passing.
6030	SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6031
6032	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
6033	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
6034	SmallVector<SDValue, `8`> MemOpChains;
6035
6036	bool seenFloatArg = false;
6037	// Walk the register/memloc assignments, inserting copies/loads.
6038	// i - Tracks the index into the list of registers allocated for the call
6039	// RealArgIdx - Tracks the index into the list of actual function arguments
6040	// j - Tracks the index into the list of byval arguments
6041	for (unsigned i = `0`, RealArgIdx = `0`, j = `0`, e = ArgLocs.size();
6042	i != e;
6043	++i, ++RealArgIdx) {
6044	CCValAssign &VA = ArgLocs [i];
6045	SDValue Arg = OutVals [RealArgIdx];
6046	ISD::ArgFlagsTy Flags = Outs [RealArgIdx].Flags;
6047
6048	if (Flags.isByVal()) {
6049	// Argument is an aggregate which is passed by value, thus we need to
6050	// create a copy of it in the local variable space of the current stack
6051	// frame (which is the stack frame of the caller) and pass the address of
6052	// this copy to the callee.
6053	assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6054	CCValAssign &ByValVA = ByValArgLocs [j++];
6055	assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6056
6057	// Memory reserved in the local variable space of the callers stack frame.
6058	unsigned LocMemOffset = ByValVA.getLocMemOffset();
6059
6060	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6061	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6062	N1: StackPtr, N2: PtrOff);
6063
6064	// Create a copy of the argument in the local area of the current
6065	// stack frame.
6066	SDValue MemcpyCall =
6067	CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6068	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6069	Flags, DAG, dl);
6070
6071	// This must go outside the CALLSEQ_START..END.
6072	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: NumBytes, OutSize: `0`,
6073	DL: SDLoc (MemcpyCall));
6074	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6075	To: NewCallSeqStart.getNode());
6076	Chain = CallSeqStart = NewCallSeqStart;
6077
6078	// Pass the address of the aggregate copy on the stack either in a
6079	// physical register or in the parameter list area of the current stack
6080	// frame to the callee.
6081	Arg = PtrOff;
6082	}
6083
6084	// When useCRBits() is true, there can be i1 arguments.
6085	// It is because getRegisterType(MVT::i1) => MVT::i1,
6086	// and for other integer types getRegisterType() => MVT::i32.
6087	// Extend i1 and ensure callee will get i32.
6088	if (Arg.getValueType() == MVT::i1)
6089	Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6090	dl, MVT::i32, Arg);
6091
6092	if (VA.isRegLoc()) {
6093	seenFloatArg \|= VA.getLocVT().isFloatingPoint();
6094	// Put argument in a physical register.
6095	if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6096	bool IsLE = Subtarget.isLittleEndian();
6097	SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6098	DAG.getIntPtrConstant(IsLE ? `0` : `1`, dl));
6099	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y: SVal.getValue(R: `0`)));
6100	SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6101	DAG.getIntPtrConstant(IsLE ? `1` : `0`, dl));
6102	RegsToPass.push_back(Elt: std::make_pair(x: ArgLocs [++i].getLocReg(),
6103	y: SVal.getValue(R: `0`)));
6104	} else
6105	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
6106	} else {
6107	// Put argument in the parameter list area of the current stack frame.
6108	assert(VA.isMemLoc());
6109	unsigned LocMemOffset = VA.getLocMemOffset();
6110
6111	if (!IsTailCall) {
6112	SDValue PtrOff = DAG.getIntPtrConstant(Val: LocMemOffset, DL: dl);
6113	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()),
6114	N1: StackPtr, N2: PtrOff);
6115
6116	MemOpChains.push_back(
6117	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
6118	} else {
6119	// Calculate and remember argument location.
6120	CalculateTailCallArgDest(DAG, MF, isPPC64: false, Arg, SPDiff, ArgOffset: LocMemOffset,
6121	TailCallArguments);
6122	}
6123	}
6124	}
6125
6126	if (!MemOpChains.empty())
6127	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6128
6129	// Build a sequence of copy-to-reg nodes chained together with token chain
6130	// and flag operands which copy the outgoing args into the appropriate regs.
6131	SDValue InGlue;
6132	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
6133	Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass [i].first,
6134	N: RegsToPass [i].second, Glue: InGlue);
6135	InGlue = Chain.getValue(R: `1`);
6136	}
6137
6138	// Set CR bit 6 to true if this is a vararg call with floating args passed in
6139	// registers.
6140	if (IsVarArg) {
6141	SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6142	SDValue Ops[] = { Chain, InGlue };
6143
6144	Chain = DAG.getNode(Opcode: seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, DL: dl,
6145	VTList: VTs, Ops: ArrayRef(Ops, InGlue.getNode() ? `2` : `1`));
6146
6147	InGlue = Chain.getValue(R: `1`);
6148	}
6149
6150	if (IsTailCall)
6151	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6152	TailCallArguments);
6153
6154	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6155	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6156	}
6157
6158	// Copy an argument into memory, being careful to do this outside the
6159	// call sequence for the call to which the argument belongs.
6160	SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6161	SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6162	SelectionDAG &DAG, const SDLoc &dl) const {
6163	SDValue MemcpyCall = CreateCopyOfByValArgument(Src: Arg, Dst: PtrOff,
6164	Chain: CallSeqStart.getNode()->getOperand(Num: `0`),
6165	Flags, DAG, dl);
6166	// The MEMCPY must go outside the CALLSEQ_START..END.
6167	int64_t FrameSize = CallSeqStart.getConstantOperandVal(i: `1`);
6168	SDValue NewCallSeqStart = DAG.getCALLSEQ_START(Chain: MemcpyCall, InSize: FrameSize, OutSize: `0`,
6169	DL: SDLoc (MemcpyCall));
6170	DAG.ReplaceAllUsesWith(From: CallSeqStart.getNode(),
6171	To: NewCallSeqStart.getNode());
6172	return NewCallSeqStart;
6173	}
6174
6175	SDValue PPCTargetLowering::LowerCall_64SVR4(
6176	SDValue Chain, SDValue Callee, CallFlags CFlags,
6177	const SmallVectorImpl<ISD::OutputArg> &Outs,
6178	const SmallVectorImpl<SDValue> &OutVals,
6179	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6180	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6181	const CallBase CB) const* {
6182	bool isELFv2ABI = Subtarget.isELFv2ABI();
6183	bool isLittleEndian = Subtarget.isLittleEndian();
6184	unsigned NumOps = Outs.size();
6185	bool IsSibCall = false;
6186	bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6187
6188	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
6189	unsigned PtrByteSize = `8`;
6190
6191	MachineFunction &MF = DAG.getMachineFunction();
6192
6193	if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6194	IsSibCall = true;
6195
6196	// Mark this function as potentially containing a function that contains a
6197	// tail call. As a consequence the frame pointer will be used for dynamicalloc
6198	// and restoring the callers stack pointer in this functions epilog. This is
6199	// done because by tail calling the called function might overwrite the value
6200	// in this function's (MF) stack pointer stack slot 0(SP).
6201	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6202	MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6203
6204	assert(!(IsFastCall && CFlags.IsVarArg) &&
6205	"fastcc not supported on varargs functions");
6206
6207	// Count how many bytes are to be pushed on the stack, including the linkage
6208	// area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6209	// reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6210	// area is 32 bytes reserved space for [SP][CR][LR][TOC].
6211	unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6212	unsigned NumBytes = LinkageSize;
6213	unsigned GPR_idx = `0`, FPR_idx = `0`, VR_idx = `0`;
6214
6215	static const MCPhysReg GPR[] = {
6216	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6217	PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6218	};
6219	static const MCPhysReg VR[] = {
6220	PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6221	PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6222	};
6223
6224	const unsigned NumGPRs = std::size(GPR);
6225	const unsigned NumFPRs = useSoftFloat() ? `0` : `13`;
6226	const unsigned NumVRs = std::size(VR);
6227
6228	// On ELFv2, we can avoid allocating the parameter area if all the arguments
6229	// can be passed to the callee in registers.
6230	// For the fast calling convention, there is another check below.
6231	// Note: We should keep consistent with LowerFormalArguments_64SVR4()
6232	bool HasParameterArea = !isELFv2ABI \|\| CFlags.IsVarArg \|\| IsFastCall;
6233	if (!HasParameterArea) {
6234	unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6235	unsigned AvailableFPRs = NumFPRs;
6236	unsigned AvailableVRs = NumVRs;
6237	unsigned NumBytesTmp = NumBytes;
6238	for (unsigned i = `0`; i != NumOps; ++i) {
6239	if (Outs [i].Flags.isNest()) continue;
6240	if (CalculateStackSlotUsed(ArgVT: Outs [i].VT, OrigVT: Outs [i].ArgVT, Flags: Outs [i].Flags,
6241	PtrByteSize, LinkageSize, ParamAreaSize,
6242	ArgOffset&: NumBytesTmp, AvailableFPRs, AvailableVRs))
6243	HasParameterArea = true;
6244	}
6245	}
6246
6247	// When using the fast calling convention, we don't provide backing for
6248	// arguments that will be in registers.
6249	unsigned NumGPRsUsed = `0`, NumFPRsUsed = `0`, NumVRsUsed = `0`;
6250
6251	// Avoid allocating parameter area for fastcc functions if all the arguments
6252	// can be passed in the registers.
6253	if (IsFastCall)
6254	HasParameterArea = false;
6255
6256	// Add up all the space actually used.
6257	for (unsigned i = `0`; i != NumOps; ++i) {
6258	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6259	EVT ArgVT = Outs [i].VT;
6260	EVT OrigVT = Outs [i].ArgVT;
6261
6262	if (Flags.isNest())
6263	continue;
6264
6265	if (IsFastCall) {
6266	if (Flags.isByVal()) {
6267	NumGPRsUsed += (Flags.getByValSize()+`7`)/`8`;
6268	if (NumGPRsUsed > NumGPRs)
6269	HasParameterArea = true;
6270	} else {
6271	switch (ArgVT.getSimpleVT().SimpleTy) {
6272	default: llvm_unreachable("Unexpected ValueType for argument!");
6273	case MVT::i1:
6274	case MVT::i32:
6275	case MVT::i64:
6276	if (++NumGPRsUsed <= NumGPRs)
6277	continue;
6278	break;
6279	case MVT::v4i32:
6280	case MVT::v8i16:
6281	case MVT::v16i8:
6282	case MVT::v2f64:
6283	case MVT::v2i64:
6284	case MVT::v1i128:
6285	case MVT::f128:
6286	if (++NumVRsUsed <= NumVRs)
6287	continue;
6288	break;
6289	case MVT::v4f32:
6290	if (++NumVRsUsed <= NumVRs)
6291	continue;
6292	break;
6293	case MVT::f32:
6294	case MVT::f64:
6295	if (++NumFPRsUsed <= NumFPRs)
6296	continue;
6297	break;
6298	}
6299	HasParameterArea = true;
6300	}
6301	}
6302
6303	/ Respect alignment of argument on the stack. /
6304	auto Alignement =
6305	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6306	NumBytes = alignTo(Size: NumBytes, A: Alignement);
6307
6308	NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6309	if (Flags.isInConsecutiveRegsLast())
6310	NumBytes = ((NumBytes + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6311	}
6312
6313	unsigned NumBytesActuallyUsed = NumBytes;
6314
6315	// In the old ELFv1 ABI,
6316	// the prolog code of the callee may store up to 8 GPR argument registers to
6317	// the stack, allowing va_start to index over them in memory if its varargs.
6318	// Because we cannot tell if this is needed on the caller side, we have to
6319	// conservatively assume that it is needed. As such, make sure we have at
6320	// least enough stack space for the caller to store the 8 GPRs.
6321	// In the ELFv2 ABI, we allocate the parameter area iff a callee
6322	// really requires memory operands, e.g. a vararg function.
6323	if (HasParameterArea)
6324	NumBytes = std::max(a: NumBytes, b: LinkageSize + `8` * PtrByteSize);
6325	else
6326	NumBytes = LinkageSize;
6327
6328	// Tail call needs the stack to be aligned.
6329	if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6330	NumBytes = EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes);
6331
6332	int SPDiff = `0`;
6333
6334	// Calculate by how many bytes the stack has to be adjusted in case of tail
6335	// call optimization.
6336	if (!IsSibCall)
6337	SPDiff = CalculateTailCallSPDiff(DAG, isTailCall: CFlags.IsTailCall, ParamSize: NumBytes);
6338
6339	// To protect arguments on the stack from being clobbered in a tail call,
6340	// force all the loads to happen before doing any other lowering.
6341	if (CFlags.IsTailCall)
6342	Chain = DAG.getStackArgumentTokenFactor(Chain);
6343
6344	// Adjust the stack pointer for the new arguments...
6345	// These operations are automatically eliminated by the prolog/epilog pass
6346	if (!IsSibCall)
6347	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
6348	SDValue CallSeqStart = Chain;
6349
6350	// Load the return address and frame pointer so it can be move somewhere else
6351	// later.
6352	SDValue LROp, FPOp;
6353	Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROpOut&: LROp, FPOpOut&: FPOp, dl);
6354
6355	// Set up a copy of the stack pointer for use loading and storing any
6356	// arguments that may not fit in the registers available for argument
6357	// passing.
6358	SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6359
6360	// Figure out which arguments are going to go in registers, and which in
6361	// memory. Also, if this is a vararg function, floating point operations
6362	// must be stored to our stack, and loaded into integer regs as well, if
6363	// any integer regs are available for argument passing.
6364	unsigned ArgOffset = LinkageSize;
6365
6366	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
6367	SmallVector<TailCallArgumentInfo, `8`> TailCallArguments;
6368
6369	SmallVector<SDValue, `8`> MemOpChains;
6370	for (unsigned i = `0`; i != NumOps; ++i) {
6371	SDValue Arg = OutVals [i];
6372	ISD::ArgFlagsTy Flags = Outs [i].Flags;
6373	EVT ArgVT = Outs [i].VT;
6374	EVT OrigVT = Outs [i].ArgVT;
6375
6376	// PtrOff will be used to store the current argument to the stack if a
6377	// register cannot be found for it.
6378	SDValue PtrOff;
6379
6380	// We re-align the argument offset for each argument, except when using the
6381	// fast calling convention, when we need to make sure we do that only when
6382	// we'll actually use a stack slot.
6383	auto ComputePtrOff = [&]() {
6384	/ Respect alignment of argument on the stack. /
6385	auto Alignment =
6386	CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6387	ArgOffset = alignTo(Size: ArgOffset, A: Alignment);
6388
6389	PtrOff = DAG.getConstant(Val: ArgOffset, DL: dl, VT: StackPtr.getValueType());
6390
6391	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6392	};
6393
6394	if (!IsFastCall) {
6395	ComputePtrOff ();
6396
6397	/ Compute GPR index associated with argument offset. /
6398	GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6399	GPR_idx = std::min(a: GPR_idx, b: NumGPRs);
6400	}
6401
6402	// Promote integers to 64-bit values.
6403	if (Arg.getValueType() == MVT::i32 \|\| Arg.getValueType() == MVT::i1) {
6404	// FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6405	unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6406	Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6407	}
6408
6409	// FIXME memcpy is used way more than necessary. Correctness first.
6410	// Note: "by value" is code for passing a structure by value, not
6411	// basic types.
6412	if (Flags.isByVal()) {
6413	// Note: Size includes alignment padding, so
6414	// struct x { short a; char b; }
6415	// will have Size = 4. With #pragma pack(1), it will have Size = 3.
6416	// These are the proper values we need for right-justifying the
6417	// aggregate in a parameter register.
6418	unsigned Size = Flags.getByValSize();
6419
6420	// An empty aggregate parameter takes up no storage and no
6421	// registers.
6422	if (Size == `0`)
6423	continue;
6424
6425	if (IsFastCall)
6426	ComputePtrOff ();
6427
6428	// All aggregates smaller than 8 bytes must be passed right-justified.
6429	if (Size==`1` \|\| Size==`2` \|\| Size==`4`) {
6430	EVT VT = (Size==`1`) ? MVT::i8 : ((Size==`2`) ? MVT::i16 : MVT::i32);
6431	if (GPR_idx != NumGPRs) {
6432	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: Arg,
6433	PtrInfo: MachinePointerInfo (), MemVT: VT);
6434	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6435	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6436
6437	ArgOffset += PtrByteSize;
6438	continue;
6439	}
6440	}
6441
6442	if (GPR_idx == NumGPRs && Size < `8`) {
6443	SDValue AddPtr = PtrOff;
6444	if (!isLittleEndian) {
6445	SDValue Const = DAG.getConstant(Val: PtrByteSize - Size, DL: dl,
6446	VT: PtrOff.getValueType());
6447	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6448	}
6449	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6450	CallSeqStart,
6451	Flags, DAG, dl);
6452	ArgOffset += PtrByteSize;
6453	continue;
6454	}
6455	// Copy the object to parameter save area if it can not be entirely passed
6456	// by registers.
6457	// FIXME: we only need to copy the parts which need to be passed in
6458	// parameter save area. For the parts passed by registers, we don't need
6459	// to copy them to the stack although we need to allocate space for them
6460	// in parameter save area.
6461	if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6462	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6463	CallSeqStart,
6464	Flags, DAG, dl);
6465
6466	// When a register is available, pass a small aggregate right-justified.
6467	if (Size < `8` && GPR_idx != NumGPRs) {
6468	// The easiest way to get this right-justified in a register
6469	// is to copy the structure into the rightmost portion of a
6470	// local variable slot, then load the whole slot into the
6471	// register.
6472	// FIXME: The memcpy seems to produce pretty awful code for
6473	// small aggregates, particularly for packed ones.
6474	// FIXME: It would be preferable to use the slot in the
6475	// parameter save area instead of a new local variable.
6476	SDValue AddPtr = PtrOff;
6477	if (!isLittleEndian) {
6478	SDValue Const = DAG.getConstant(Val: `8` - Size, DL: dl, VT: PtrOff.getValueType());
6479	AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: Const);
6480	}
6481	Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff: AddPtr,
6482	CallSeqStart,
6483	Flags, DAG, dl);
6484
6485	// Load the slot into the register.
6486	SDValue Load =
6487	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6488	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6489	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6490
6491	// Done with this argument.
6492	ArgOffset += PtrByteSize;
6493	continue;
6494	}
6495
6496	// For aggregates larger than PtrByteSize, copy the pieces of the
6497	// object that fit into registers from the parameter save area.
6498	for (unsigned j=`0`; j<Size; j+=PtrByteSize) {
6499	SDValue Const = DAG.getConstant(Val: j, DL: dl, VT: PtrOff.getValueType());
6500	SDValue AddArg = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: Arg, N2: Const);
6501	if (GPR_idx != NumGPRs) {
6502	unsigned LoadSizeInBits = std::min(a: PtrByteSize, b: (Size - j)) * `8`;
6503	EVT ObjType = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadSizeInBits);
6504	SDValue Load = DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: PtrVT, Chain, Ptr: AddArg,
6505	PtrInfo: MachinePointerInfo (), MemVT: ObjType);
6506
6507	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6508	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6509	ArgOffset += PtrByteSize;
6510	} else {
6511	ArgOffset += ((Size - j + PtrByteSize-`1`)/PtrByteSize)*PtrByteSize;
6512	break;
6513	}
6514	}
6515	continue;
6516	}
6517
6518	switch (Arg.getSimpleValueType().SimpleTy) {
6519	default: llvm_unreachable("Unexpected ValueType for argument!");
6520	case MVT::i1:
6521	case MVT::i32:
6522	case MVT::i64:
6523	if (Flags.isNest()) {
6524	// The 'nest' parameter, if any, is passed in R11.
6525	RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6526	break;
6527	}
6528
6529	// These can be scalar arguments or elements of an integer array type
6530	// passed directly. Clang may use those instead of "byval" aggregate
6531	// types to avoid forcing arguments to memory unnecessarily.
6532	if (GPR_idx != NumGPRs) {
6533	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Arg));
6534	} else {
6535	if (IsFastCall)
6536	ComputePtrOff ();
6537
6538	assert(HasParameterArea &&
6539	"Parameter area must exist to pass an argument in memory.");
6540	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6541	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6542	TailCallArguments, dl);
6543	if (IsFastCall)
6544	ArgOffset += PtrByteSize;
6545	}
6546	if (!IsFastCall)
6547	ArgOffset += PtrByteSize;
6548	break;
6549	case MVT::f32:
6550	case MVT::f64: {
6551	// These can be scalar arguments or elements of a float array type
6552	// passed directly. The latter are used to implement ELFv2 homogenous
6553	// float aggregates.
6554
6555	// Named arguments go into FPRs first, and once they overflow, the
6556	// remaining arguments go into GPRs and then the parameter save area.
6557	// Unnamed arguments for vararg functions always go to GPRs and
6558	// then the parameter save area. For now, put all arguments to vararg
6559	// routines always in both locations (FPR and* GPR or stack slot).*
6560	bool NeedGPROrStack = CFlags.IsVarArg \|\| FPR_idx == NumFPRs;
6561	bool NeededLoad = false;
6562
6563	// First load the argument into the next available FPR.
6564	if (FPR_idx != NumFPRs)
6565	RegsToPass.push_back(Elt: std::make_pair(x: FPR[FPR_idx++], y&: Arg));
6566
6567	// Next, load the argument into GPR or stack slot if needed.
6568	if (!NeedGPROrStack)
6569	;
6570	else if (GPR_idx != NumGPRs && !IsFastCall) {
6571	// FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6572	// once we support fp <-> gpr moves.
6573
6574	// In the non-vararg case, this can only ever happen in the
6575	// presence of f32 array types, since otherwise we never run
6576	// out of FPRs before running out of GPRs.
6577	SDValue ArgVal;
6578
6579	// Double values are always passed in a single GPR.
6580	if (Arg.getValueType() != MVT::f32) {
6581	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6582
6583	// Non-array float values are extended and passed in a GPR.
6584	} else if (!Flags.isInConsecutiveRegs()) {
6585	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6586	ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6587
6588	// If we have an array of floats, we collect every odd element
6589	// together with its predecessor into one GPR.
6590	} else if (ArgOffset % PtrByteSize != `0`) {
6591	SDValue Lo, Hi;
6592	Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - `1`]);
6593	Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6594	if (!isLittleEndian)
6595	std::swap(a&: Lo, b&: Hi);
6596	ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6597
6598	// The final element, if even, goes into the first half of a GPR.
6599	} else if (Flags.isInConsecutiveRegsLast()) {
6600	ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6601	ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6602	if (!isLittleEndian)
6603	ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6604	DAG.getConstant(`32`, dl, MVT::i32));
6605
6606	// Non-final even elements are skipped; they will be handled
6607	// together the with subsequent argument on the next go-around.
6608	} else
6609	ArgVal = SDValue ();
6610
6611	if (ArgVal.getNode())
6612	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: ArgVal));
6613	} else {
6614	if (IsFastCall)
6615	ComputePtrOff ();
6616
6617	// Single-precision floating-point values are mapped to the
6618	// second (rightmost) word of the stack doubleword.
6619	if (Arg.getValueType() == MVT::f32 &&
6620	!isLittleEndian && !Flags.isInConsecutiveRegs()) {
6621	SDValue ConstFour = DAG.getConstant(Val: `4`, DL: dl, VT: PtrOff.getValueType());
6622	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff, N2: ConstFour);
6623	}
6624
6625	assert(HasParameterArea &&
6626	"Parameter area must exist to pass an argument in memory.");
6627	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6628	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: false, MemOpChains,
6629	TailCallArguments, dl);
6630
6631	NeededLoad = true;
6632	}
6633	// When passing an array of floats, the array occupies consecutive
6634	// space in the argument area; only round up to the next doubleword
6635	// at the end of the array. Otherwise, each float takes 8 bytes.
6636	if (!IsFastCall \|\| NeededLoad) {
6637	ArgOffset += (Arg.getValueType() == MVT::f32 &&
6638	Flags.isInConsecutiveRegs()) ? `4` : `8`;
6639	if (Flags.isInConsecutiveRegsLast())
6640	ArgOffset = ((ArgOffset + PtrByteSize - `1`)/PtrByteSize) * PtrByteSize;
6641	}
6642	break;
6643	}
6644	case MVT::v4f32:
6645	case MVT::v4i32:
6646	case MVT::v8i16:
6647	case MVT::v16i8:
6648	case MVT::v2f64:
6649	case MVT::v2i64:
6650	case MVT::v1i128:
6651	case MVT::f128:
6652	// These can be scalar arguments or elements of a vector array type
6653	// passed directly. The latter are used to implement ELFv2 homogenous
6654	// vector aggregates.
6655
6656	// For a varargs call, named arguments go into VRs or on the stack as
6657	// usual; unnamed arguments always go to the stack or the corresponding
6658	// GPRs when within range. For now, we always put the value in both
6659	// locations (or even all three).
6660	if (CFlags.IsVarArg) {
6661	assert(HasParameterArea &&
6662	"Parameter area must exist if we have a varargs call.");
6663	// We could elide this store in the case where the object fits
6664	// entirely in R registers. Maybe later.
6665	SDValue Store =
6666	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
6667	MemOpChains.push_back(Elt: Store);
6668	if (VR_idx != NumVRs) {
6669	SDValue Load =
6670	DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6671	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6672	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Load));
6673	}
6674	ArgOffset += `16`;
6675	for (unsigned i=`0`; i<`16`; i+=PtrByteSize) {
6676	if (GPR_idx == NumGPRs)
6677	break;
6678	SDValue Ix = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
6679	N2: DAG.getConstant(Val: i, DL: dl, VT: PtrVT));
6680	SDValue Load =
6681	DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Ix, PtrInfo: MachinePointerInfo ());
6682	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
6683	RegsToPass.push_back(Elt: std::make_pair(x: GPR[GPR_idx++], y&: Load));
6684	}
6685	break;
6686	}
6687
6688	// Non-varargs Altivec params go into VRs or on the stack.
6689	if (VR_idx != NumVRs) {
6690	RegsToPass.push_back(Elt: std::make_pair(x: VR[VR_idx++], y&: Arg));
6691	} else {
6692	if (IsFastCall)
6693	ComputePtrOff ();
6694
6695	assert(HasParameterArea &&
6696	"Parameter area must exist to pass an argument in memory.");
6697	LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6698	isPPC64: true, isTailCall: CFlags.IsTailCall, isVector: true, MemOpChains,
6699	TailCallArguments, dl);
6700	if (IsFastCall)
6701	ArgOffset += `16`;
6702	}
6703
6704	if (!IsFastCall)
6705	ArgOffset += `16`;
6706	break;
6707	}
6708	}
6709
6710	assert((!HasParameterArea \|\| NumBytesActuallyUsed == ArgOffset) &&
6711	"mismatch in size of parameter area");
6712	(void)NumBytesActuallyUsed;
6713
6714	if (!MemOpChains.empty())
6715	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6716
6717	// Check if this is an indirect call (MTCTR/BCTRL).
6718	// See prepareDescriptorIndirectCall and buildCallOperands for more
6719	// information about calls through function pointers in the 64-bit SVR4 ABI.
6720	if (CFlags.IsIndirect) {
6721	// For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6722	// caller in the TOC save area.
6723	if (isTOCSaveRestoreRequired(Subtarget)) {
6724	assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6725	// Load r2 into a virtual register and store it to the TOC save area.
6726	setUsesTOCBasePtr(DAG);
6727	SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6728	// TOC save area offset.
6729	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6730	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
6731	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
6732	Chain = DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
6733	PtrInfo: MachinePointerInfo::getStack(
6734	MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
6735	}
6736	// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6737	// This does not mean the MTCTR instruction must use R12; it's easier
6738	// to model this as an extra parameter, so do that.
6739	if (isELFv2ABI && !CFlags.IsPatchPoint)
6740	RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6741	}
6742
6743	// Build a sequence of copy-to-reg nodes chained together with token chain
6744	// and flag operands which copy the outgoing args into the appropriate regs.
6745	SDValue InGlue;
6746	for (unsigned i = `0`, e = RegsToPass.size(); i != e; ++i) {
6747	Chain = DAG.getCopyToReg(Chain, dl, Reg: RegsToPass [i].first,
6748	N: RegsToPass [i].second, Glue: InGlue);
6749	InGlue = Chain.getValue(R: `1`);
6750	}
6751
6752	if (CFlags.IsTailCall && !IsSibCall)
6753	PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6754	TailCallArguments);
6755
6756	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
6757	Callee, SPDiff, NumBytes, Ins, InVals, CB);
6758	}
6759
6760	// Returns true when the shadow of a general purpose argument register
6761	// in the parameter save area is aligned to at least 'RequiredAlign'.
6762	static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6763	assert(RequiredAlign.value() <= `16` &&
6764	"Required alignment greater than stack alignment.");
6765	switch (Reg) {
6766	default:
6767	report_fatal_error(reason: "called on invalid register.");
6768	case PPC::R5:
6769	case PPC::R9:
6770	case PPC::X3:
6771	case PPC::X5:
6772	case PPC::X7:
6773	case PPC::X9:
6774	// These registers are 16 byte aligned which is the most strict aligment
6775	// we can support.
6776	return true;
6777	case PPC::R3:
6778	case PPC::R7:
6779	case PPC::X4:
6780	case PPC::X6:
6781	case PPC::X8:
6782	case PPC::X10:
6783	// The shadow of these registers in the PSA is 8 byte aligned.
6784	return RequiredAlign <= `8`;
6785	case PPC::R4:
6786	case PPC::R6:
6787	case PPC::R8:
6788	case PPC::R10:
6789	return RequiredAlign <= `4`;
6790	}
6791	}
6792
6793	static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6794	CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6795	CCState &S) {
6796	AIXCCState &State = static_cast<AIXCCState &>(S);
6797	const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6798	State.getMachineFunction().getSubtarget());
6799	const bool IsPPC64 = Subtarget.isPPC64();
6800	const Align PtrAlign = IsPPC64 ? Align (`8`) : Align (`4`);
6801	const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6802
6803	if (ValVT == MVT::f128)
6804	report_fatal_error(reason: "f128 is unimplemented on AIX.");
6805
6806	if (ArgFlags.isNest())
6807	report_fatal_error(reason: "Nest arguments are unimplemented.");
6808
6809	static const MCPhysReg GPR_32[] = {// 32-bit registers.
6810	PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6811	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6812	static const MCPhysReg GPR_64[] = {// 64-bit registers.
6813	PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6814	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6815
6816	static const MCPhysReg VR[] = {// Vector registers.
6817	PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6818	PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6819	PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6820
6821	if (ArgFlags.isByVal()) {
6822	if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6823	report_fatal_error(reason: "Pass-by-value arguments with alignment greater than "
6824	"register width are not supported.");
6825
6826	const unsigned ByValSize = ArgFlags.getByValSize();
6827
6828	// An empty aggregate parameter takes up no storage and no registers,
6829	// but needs a MemLoc for a stack slot for the formal arguments side.
6830	if (ByValSize == `0`) {
6831	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6832	Offset: State.getStackSize(), LocVT: RegVT, HTP: LocInfo));
6833	return false;
6834	}
6835
6836	const unsigned StackSize = alignTo(Size: ByValSize, A: PtrAlign);
6837	unsigned Offset = State.AllocateStack(Size: StackSize, Alignment: PtrAlign);
6838	for (const unsigned E = Offset + StackSize; Offset < E;
6839	Offset += PtrAlign.value()) {
6840	if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6841	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT: RegVT, HTP: LocInfo));
6842	else {
6843	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6844	Offset, LocVT: MVT::INVALID_SIMPLE_VALUE_TYPE,
6845	HTP: LocInfo));
6846	break;
6847	}
6848	}
6849	return false;
6850	}
6851
6852	// Arguments always reserve parameter save area.
6853	switch (ValVT.SimpleTy) {
6854	default:
6855	report_fatal_error(reason: "Unhandled value type for argument.");
6856	case MVT::i64:
6857	// i64 arguments should have been split to i32 for PPC32.
6858	assert(IsPPC64 && "PPC32 should have split i64 values.");
6859	[[fallthrough]];
6860	case MVT::i1:
6861	case MVT::i32: {
6862	const unsigned Offset = State.AllocateStack(Size: PtrAlign.value(), Alignment: PtrAlign);
6863	// AIX integer arguments are always passed in register width.
6864	if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6865	LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6866	: CCValAssign::LocInfo::ZExt;
6867	if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6868	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: Reg, LocVT: RegVT, HTP: LocInfo));
6869	else
6870	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT: RegVT, HTP: LocInfo));
6871
6872	return false;
6873	}
6874	case MVT::f32:
6875	case MVT::f64: {
6876	// Parameter save area (PSA) is reserved even if the float passes in fpr.
6877	const unsigned StoreSize = LocVT.getStoreSize();
6878	// Floats are always 4-byte aligned in the PSA on AIX.
6879	// This includes f64 in 64-bit mode for ABI compatibility.
6880	const unsigned Offset =
6881	State.AllocateStack(Size: IsPPC64 ? `8` : StoreSize, Alignment: Align (`4`));
6882	unsigned FReg = State.AllocateReg(FPR);
6883	if (FReg)
6884	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: FReg, LocVT, HTP: LocInfo));
6885
6886	// Reserve and initialize GPRs or initialize the PSA as required.
6887	for (unsigned I = `0`; I < StoreSize; I += PtrAlign.value()) {
6888	if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6889	assert(FReg && "An FPR should be available when a GPR is reserved.");
6890	if (State.isVarArg()) {
6891	// Successfully reserved GPRs are only initialized for vararg calls.
6892	// Custom handling is required for:
6893	// f64 in PPC32 needs to be split into 2 GPRs.
6894	// f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6895	State.addLoc(
6896	V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT: RegVT, HTP: LocInfo));
6897	}
6898	} else {
6899	// If there are insufficient GPRs, the PSA needs to be initialized.
6900	// Initialization occurs even if an FPR was initialized for
6901	// compatibility with the AIX XL compiler. The full memory for the
6902	// argument will be initialized even if a prior word is saved in GPR.
6903	// A custom memLoc is used when the argument also passes in FPR so
6904	// that the callee handling can skip over it easily.
6905	State.addLoc(
6906	V: FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6907	HTP: LocInfo)
6908	: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6909	break;
6910	}
6911	}
6912
6913	return false;
6914	}
6915	case MVT::v4f32:
6916	case MVT::v4i32:
6917	case MVT::v8i16:
6918	case MVT::v16i8:
6919	case MVT::v2i64:
6920	case MVT::v2f64:
6921	case MVT::v1i128: {
6922	const unsigned VecSize = `16`;
6923	const Align VecAlign(VecSize);
6924
6925	if (!State.isVarArg()) {
6926	// If there are vector registers remaining we don't consume any stack
6927	// space.
6928	if (unsigned VReg = State.AllocateReg(VR)) {
6929	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: VReg, LocVT, HTP: LocInfo));
6930	return false;
6931	}
6932	// Vectors passed on the stack do not shadow GPRs or FPRs even though they
6933	// might be allocated in the portion of the PSA that is shadowed by the
6934	// GPRs.
6935	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6936	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6937	return false;
6938	}
6939
6940	const unsigned PtrSize = IsPPC64 ? `8` : `4`;
6941	ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6942
6943	unsigned NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6944	// Burn any underaligned registers and their shadowed stack space until
6945	// we reach the required alignment.
6946	while (NextRegIndex != GPRs.size() &&
6947	!isGPRShadowAligned(Reg: GPRs [NextRegIndex], RequiredAlign: VecAlign)) {
6948	// Shadow allocate register and its stack shadow.
6949	unsigned Reg = State.AllocateReg(Regs: GPRs);
6950	State.AllocateStack(Size: PtrSize, Alignment: PtrAlign);
6951	assert(Reg && "Allocating register unexpectedly failed.");
6952	(void)Reg;
6953	NextRegIndex = State.getFirstUnallocated(Regs: GPRs);
6954	}
6955
6956	// Vectors that are passed as fixed arguments are handled differently.
6957	// They are passed in VRs if any are available (unlike arguments passed
6958	// through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6959	// functions)
6960	if (State.isFixed(ValNo)) {
6961	if (unsigned VReg = State.AllocateReg(VR)) {
6962	State.addLoc(V: CCValAssign::getReg(ValNo, ValVT, RegNo: VReg, LocVT, HTP: LocInfo));
6963	// Shadow allocate GPRs and stack space even though we pass in a VR.
6964	for (unsigned I = `0`; I != VecSize; I += PtrSize)
6965	State.AllocateReg(Regs: GPRs);
6966	State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6967	return false;
6968	}
6969	// No vector registers remain so pass on the stack.
6970	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6971	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6972	return false;
6973	}
6974
6975	// If all GPRS are consumed then we pass the argument fully on the stack.
6976	if (NextRegIndex == GPRs.size()) {
6977	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6978	State.addLoc(V: CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6979	return false;
6980	}
6981
6982	// Corner case for 32-bit codegen. We have 2 registers to pass the first
6983	// half of the argument, and then need to pass the remaining half on the
6984	// stack.
6985	if (GPRs[NextRegIndex] == PPC::R9) {
6986	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
6987	State.addLoc(
6988	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
6989
6990	const unsigned FirstReg = State.AllocateReg(PPC::R9);
6991	const unsigned SecondReg = State.AllocateReg(PPC::R10);
6992	assert(FirstReg && SecondReg &&
6993	"Allocating R9 or R10 unexpectedly failed.");
6994	State.addLoc(
6995	V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: FirstReg, LocVT: RegVT, HTP: LocInfo));
6996	State.addLoc(
6997	V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: SecondReg, LocVT: RegVT, HTP: LocInfo));
6998	return false;
6999	}
7000
7001	// We have enough GPRs to fully pass the vector argument, and we have
7002	// already consumed any underaligned registers. Start with the custom
7003	// MemLoc and then the custom RegLocs.
7004	const unsigned Offset = State.AllocateStack(Size: VecSize, Alignment: VecAlign);
7005	State.addLoc(
7006	V: CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, HTP: LocInfo));
7007	for (unsigned I = `0`; I != VecSize; I += PtrSize) {
7008	const unsigned Reg = State.AllocateReg(Regs: GPRs);
7009	assert(Reg && "Failed to allocated register for vararg vector argument");
7010	State.addLoc(
7011	V: CCValAssign::getCustomReg(ValNo, ValVT, RegNo: Reg, LocVT: RegVT, HTP: LocInfo));
7012	}
7013	return false;
7014	}
7015	}
7016	return true;
7017	}
7018
7019	// So far, this function is only used by LowerFormalArguments_AIX()
7020	static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7021	bool IsPPC64,
7022	bool HasP8Vector,
7023	bool HasVSX) {
7024	assert((IsPPC64 \|\| SVT != MVT::i64) &&
7025	"i64 should have been split for 32-bit codegen.");
7026
7027	switch (SVT) {
7028	default:
7029	report_fatal_error(reason: "Unexpected value type for formal argument");
7030	case MVT::i1:
7031	case MVT::i32:
7032	case MVT::i64:
7033	return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7034	case MVT::f32:
7035	return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7036	case MVT::f64:
7037	return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7038	case MVT::v4f32:
7039	case MVT::v4i32:
7040	case MVT::v8i16:
7041	case MVT::v16i8:
7042	case MVT::v2i64:
7043	case MVT::v2f64:
7044	case MVT::v1i128:
7045	return &PPC::VRRCRegClass;
7046	}
7047	}
7048
7049	static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7050	SelectionDAG &DAG, SDValue ArgValue,
7051	MVT LocVT, const SDLoc &dl) {
7052	assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7053	assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7054
7055	if (Flags.isSExt())
7056	ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL: dl, VT: LocVT, N1: ArgValue,
7057	N2: DAG.getValueType(ValVT));
7058	else if (Flags.isZExt())
7059	ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL: dl, VT: LocVT, N1: ArgValue,
7060	N2: DAG.getValueType(ValVT));
7061
7062	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: ValVT, Operand: ArgValue);
7063	}
7064
7065	static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7066	const unsigned LASize = FL->getLinkageSize();
7067
7068	if (PPC::GPRCRegClass.contains(Reg)) {
7069	assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7070	"Reg must be a valid argument register!");
7071	return LASize + `4` * (Reg - PPC::R3);
7072	}
7073
7074	if (PPC::G8RCRegClass.contains(Reg)) {
7075	assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7076	"Reg must be a valid argument register!");
7077	return LASize + `8` * (Reg - PPC::X3);
7078	}
7079
7080	llvm_unreachable("Only general purpose registers expected.");
7081	}
7082
7083	// AIX ABI Stack Frame Layout:
7084	//
7085	// Low Memory +--------------------------------------------+
7086	// SP +---> \| Back chain \| ---+
7087	// \| +--------------------------------------------+ \|
7088	// \| \| Saved Condition Register \| \|
7089	// \| +--------------------------------------------+ \|
7090	// \| \| Saved Linkage Register \| \|
7091	// \| +--------------------------------------------+ \| Linkage Area
7092	// \| \| Reserved for compilers \| \|
7093	// \| +--------------------------------------------+ \|
7094	// \| \| Reserved for binders \| \|
7095	// \| +--------------------------------------------+ \|
7096	// \| \| Saved TOC pointer \| ---+
7097	// \| +--------------------------------------------+
7098	// \| \| Parameter save area \|
7099	// \| +--------------------------------------------+
7100	// \| \| Alloca space \|
7101	// \| +--------------------------------------------+
7102	// \| \| Local variable space \|
7103	// \| +--------------------------------------------+
7104	// \| \| Float/int conversion temporary \|
7105	// \| +--------------------------------------------+
7106	// \| \| Save area for AltiVec registers \|
7107	// \| +--------------------------------------------+
7108	// \| \| AltiVec alignment padding \|
7109	// \| +--------------------------------------------+
7110	// \| \| Save area for VRSAVE register \|
7111	// \| +--------------------------------------------+
7112	// \| \| Save area for General Purpose registers \|
7113	// \| +--------------------------------------------+
7114	// \| \| Save area for Floating Point registers \|
7115	// \| +--------------------------------------------+
7116	// +---- \| Back chain \|
7117	// High Memory +--------------------------------------------+
7118	//
7119	// Specifications:
7120	// AIX 7.2 Assembler Language Reference
7121	// Subroutine linkage convention
7122
7123	SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7124	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7125	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7126	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7127
7128	assert((CallConv == CallingConv::C \|\| CallConv == CallingConv::Cold \|\|
7129	CallConv == CallingConv::Fast) &&
7130	"Unexpected calling convention!");
7131
7132	if (getTargetMachine().Options.GuaranteedTailCallOpt)
7133	report_fatal_error(reason: "Tail call support is unimplemented on AIX.");
7134
7135	if (useSoftFloat())
7136	report_fatal_error(reason: "Soft float support is unimplemented on AIX.");
7137
7138	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7139
7140	const bool IsPPC64 = Subtarget.isPPC64();
7141	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7142
7143	// Assign locations to all of the incoming arguments.
7144	SmallVector<CCValAssign, `16`> ArgLocs;
7145	MachineFunction &MF = DAG.getMachineFunction();
7146	MachineFrameInfo &MFI = MF.getFrameInfo();
7147	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7148	AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7149
7150	const EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7151	// Reserve space for the linkage area on the stack.
7152	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7153	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7154	CCInfo.AnalyzeFormalArguments(Ins, Fn: CC_AIX);
7155
7156	SmallVector<SDValue, `8`> MemOps;
7157
7158	for (size_t I = `0`, End = ArgLocs.size(); I != End; / No increment here /) {
7159	CCValAssign &VA = ArgLocs [I++];
7160	MVT LocVT = VA.getLocVT();
7161	MVT ValVT = VA.getValVT();
7162	ISD::ArgFlagsTy Flags = Ins [VA.getValNo()].Flags;
7163	// For compatibility with the AIX XL compiler, the float args in the
7164	// parameter save area are initialized even if the argument is available
7165	// in register. The caller is required to initialize both the register
7166	// and memory, however, the callee can choose to expect it in either.
7167	// The memloc is dismissed here because the argument is retrieved from
7168	// the register.
7169	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7170	continue;
7171
7172	auto HandleMemLoc = [&]() {
7173	const unsigned LocSize = LocVT.getStoreSize();
7174	const unsigned ValSize = ValVT.getStoreSize();
7175	assert((ValSize <= LocSize) &&
7176	"Object size is larger than size of MemLoc");
7177	int CurArgOffset = VA.getLocMemOffset();
7178	// Objects are right-justified because AIX is big-endian.
7179	if (LocSize > ValSize)
7180	CurArgOffset += LocSize - ValSize;
7181	// Potential tail calls could cause overwriting of argument stack slots.
7182	const bool IsImmutable =
7183	!(getTargetMachine().Options.GuaranteedTailCallOpt &&
7184	(CallConv == CallingConv::Fast));
7185	int FI = MFI.CreateFixedObject(Size: ValSize, SPOffset: CurArgOffset, IsImmutable);
7186	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7187	SDValue ArgValue =
7188	DAG.getLoad(VT: ValVT, dl, Chain, Ptr: FIN, PtrInfo: MachinePointerInfo ());
7189	InVals.push_back(Elt: ArgValue);
7190	};
7191
7192	// Vector arguments to VaArg functions are passed both on the stack, and
7193	// in any available GPRs. Load the value from the stack and add the GPRs
7194	// as live ins.
7195	if (VA.isMemLoc() && VA.needsCustom()) {
7196	assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7197	assert(isVarArg && "Only use custom memloc for vararg.");
7198	// ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7199	// matching custom RegLocs.
7200	const unsigned OriginalValNo = VA.getValNo();
7201	(void)OriginalValNo;
7202
7203	auto HandleCustomVecRegLoc = [&]() {
7204	assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7205	"Missing custom RegLoc.");
7206	VA = ArgLocs [I++];
7207	assert(VA.getValVT().isVector() &&
7208	"Unexpected Val type for custom RegLoc.");
7209	assert(VA.getValNo() == OriginalValNo &&
7210	"ValNo mismatch between custom MemLoc and RegLoc.");
7211	MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7212	MF.addLiveIn(PReg: VA.getLocReg(),
7213	RC: getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7214	Subtarget.hasVSX()));
7215	};
7216
7217	HandleMemLoc ();
7218	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7219	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7220	// R10.
7221	HandleCustomVecRegLoc ();
7222	HandleCustomVecRegLoc ();
7223
7224	// If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7225	// we passed the vector in R5, R6, R7 and R8.
7226	if (I != End && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom()) {
7227	assert(!IsPPC64 &&
7228	"Only 2 custom RegLocs expected for 64-bit codegen.");
7229	HandleCustomVecRegLoc ();
7230	HandleCustomVecRegLoc ();
7231	}
7232
7233	continue;
7234	}
7235
7236	if (VA.isRegLoc()) {
7237	if (VA.getValVT().isScalarInteger())
7238	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7239	else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7240	switch (VA.getValVT().SimpleTy) {
7241	default:
7242	report_fatal_error(reason: "Unhandled value type for argument.");
7243	case MVT::f32:
7244	FuncInfo->appendParameterType(Type: PPCFunctionInfo::ShortFloatingPoint);
7245	break;
7246	case MVT::f64:
7247	FuncInfo->appendParameterType(Type: PPCFunctionInfo::LongFloatingPoint);
7248	break;
7249	}
7250	} else if (VA.getValVT().isVector()) {
7251	switch (VA.getValVT().SimpleTy) {
7252	default:
7253	report_fatal_error(reason: "Unhandled value type for argument.");
7254	case MVT::v16i8:
7255	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorChar);
7256	break;
7257	case MVT::v8i16:
7258	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorShort);
7259	break;
7260	case MVT::v4i32:
7261	case MVT::v2i64:
7262	case MVT::v1i128:
7263	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorInt);
7264	break;
7265	case MVT::v4f32:
7266	case MVT::v2f64:
7267	FuncInfo->appendParameterType(Type: PPCFunctionInfo::VectorFloat);
7268	break;
7269	}
7270	}
7271	}
7272
7273	if (Flags.isByVal() && VA.isMemLoc()) {
7274	const unsigned Size =
7275	alignTo(Value: Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7276	Align: PtrByteSize);
7277	const int FI = MF.getFrameInfo().CreateFixedObject(
7278	Size, SPOffset: VA.getLocMemOffset(), / IsImmutable / false,
7279	/ IsAliased / isAliased: true);
7280	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7281	InVals.push_back(Elt: FIN);
7282
7283	continue;
7284	}
7285
7286	if (Flags.isByVal()) {
7287	assert(VA.isRegLoc() && "MemLocs should already be handled.");
7288
7289	const MCPhysReg ArgReg = VA.getLocReg();
7290	const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7291
7292	if (Flags.getNonZeroByValAlign() > PtrByteSize)
7293	report_fatal_error(reason: "Over aligned byvals not supported yet.");
7294
7295	const unsigned StackSize = alignTo(Value: Flags.getByValSize(), Align: PtrByteSize);
7296	const int FI = MF.getFrameInfo().CreateFixedObject(
7297	Size: StackSize, SPOffset: mapArgRegToOffsetAIX(Reg: ArgReg, FL), / IsImmutable / false,
7298	/ IsAliased / isAliased: true);
7299	SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT);
7300	InVals.push_back(Elt: FIN);
7301
7302	// Add live ins for all the RegLocs for the same ByVal.
7303	const TargetRegisterClass *RegClass =
7304	IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7305
7306	auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7307	unsigned Offset) {
7308	const Register VReg = MF.addLiveIn(PReg: PhysReg, RC: RegClass);
7309	// Since the callers side has left justified the aggregate in the
7310	// register, we can simply store the entire register into the stack
7311	// slot.
7312	SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7313	// The store to the fixedstack object is needed becuase accessing a
7314	// field of the ByVal will use a gep and load. Ideally we will optimize
7315	// to extracting the value from the register directly, and elide the
7316	// stores when the arguments address is not taken, but that will need to
7317	// be future work.
7318	SDValue Store = DAG.getStore(
7319	Chain: CopyFrom.getValue(R: `1`), dl, Val: CopyFrom,
7320	Ptr: DAG.getObjectPtrOffset(SL: dl, Ptr: FIN, Offset: TypeSize::getFixed(ExactSize: Offset)),
7321	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset));
7322
7323	MemOps.push_back(Elt: Store);
7324	};
7325
7326	unsigned Offset = `0`;
7327	HandleRegLoc (VA.getLocReg(), Offset);
7328	Offset += PtrByteSize;
7329	for (; Offset != StackSize && ArgLocs [I].isRegLoc();
7330	Offset += PtrByteSize) {
7331	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7332	"RegLocs should be for ByVal argument.");
7333
7334	const CCValAssign RL = ArgLocs [I++];
7335	HandleRegLoc (RL.getLocReg(), Offset);
7336	FuncInfo->appendParameterType(Type: PPCFunctionInfo::FixedType);
7337	}
7338
7339	if (Offset != StackSize) {
7340	assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7341	"Expected MemLoc for remaining bytes.");
7342	assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7343	// Consume the MemLoc.The InVal has already been emitted, so nothing
7344	// more needs to be done.
7345	++I;
7346	}
7347
7348	continue;
7349	}
7350
7351	if (VA.isRegLoc() && !VA.needsCustom()) {
7352	MVT::SimpleValueType SVT = ValVT.SimpleTy;
7353	Register VReg =
7354	MF.addLiveIn(PReg: VA.getLocReg(),
7355	RC: getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7356	Subtarget.hasVSX()));
7357	SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: LocVT);
7358	if (ValVT.isScalarInteger() &&
7359	(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7360	ArgValue =
7361	truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7362	}
7363	InVals.push_back(Elt: ArgValue);
7364	continue;
7365	}
7366	if (VA.isMemLoc()) {
7367	HandleMemLoc ();
7368	continue;
7369	}
7370	}
7371
7372	// On AIX a minimum of 8 words is saved to the parameter save area.
7373	const unsigned MinParameterSaveArea = `8` * PtrByteSize;
7374	// Area that is at least reserved in the caller of this function.
7375	unsigned CallerReservedArea = std::max<unsigned>(
7376	a: CCInfo.getStackSize(), b: LinkageSize + MinParameterSaveArea);
7377
7378	// Set the size that is at least reserved in caller of this function. Tail
7379	// call optimized function's reserved stack space needs to be aligned so
7380	// that taking the difference between two stack areas will result in an
7381	// aligned stack.
7382	CallerReservedArea =
7383	EnsureStackAlignment(Lowering: Subtarget.getFrameLowering(), NumBytes: CallerReservedArea);
7384	FuncInfo->setMinReservedArea(CallerReservedArea);
7385
7386	if (isVarArg) {
7387	FuncInfo->setVarArgsFrameIndex(
7388	MFI.CreateFixedObject(Size: PtrByteSize, SPOffset: CCInfo.getStackSize(), IsImmutable: true));
7389	SDValue FIN = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT);
7390
7391	static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7392	PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7393
7394	static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7395	PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7396	const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7397
7398	// The fixed integer arguments of a variadic function are stored to the
7399	// VarArgsFrameIndex on the stack so that they may be loaded by
7400	// dereferencing the result of va_next.
7401	for (unsigned GPRIndex =
7402	(CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7403	GPRIndex < NumGPArgRegs; ++GPRIndex) {
7404
7405	const Register VReg =
7406	IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7407	: MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7408
7409	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: VReg, VT: PtrVT);
7410	SDValue Store =
7411	DAG.getStore(Chain: Val.getValue(R: `1`), dl, Val, Ptr: FIN, PtrInfo: MachinePointerInfo ());
7412	MemOps.push_back(Elt: Store);
7413	// Increment the address for the next argument to store.
7414	SDValue PtrOff = DAG.getConstant(Val: PtrByteSize, DL: dl, VT: PtrVT);
7415	FIN = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrOff.getValueType(), N1: FIN, N2: PtrOff);
7416	}
7417	}
7418
7419	if (!MemOps.empty())
7420	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7421
7422	return Chain;
7423	}
7424
7425	SDValue PPCTargetLowering::LowerCall_AIX(
7426	SDValue Chain, SDValue Callee, CallFlags CFlags,
7427	const SmallVectorImpl<ISD::OutputArg> &Outs,
7428	const SmallVectorImpl<SDValue> &OutVals,
7429	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7430	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7431	const CallBase CB) const* {
7432	// See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7433	// AIX ABI stack frame layout.
7434
7435	assert((CFlags.CallConv == CallingConv::C \|\|
7436	CFlags.CallConv == CallingConv::Cold \|\|
7437	CFlags.CallConv == CallingConv::Fast) &&
7438	"Unexpected calling convention!");
7439
7440	if (CFlags.IsPatchPoint)
7441	report_fatal_error(reason: "This call type is unimplemented on AIX.");
7442
7443	const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7444
7445	MachineFunction &MF = DAG.getMachineFunction();
7446	SmallVector<CCValAssign, `16`> ArgLocs;
7447	AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7448	*DAG.getContext());
7449
7450	// Reserve space for the linkage save area (LSA) on the stack.
7451	// In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7452	// [SP][CR][LR][2 x reserved][TOC].
7453	// The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7454	const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7455	const bool IsPPC64 = Subtarget.isPPC64();
7456	const EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7457	const unsigned PtrByteSize = IsPPC64 ? `8` : `4`;
7458	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (PtrByteSize));
7459	CCInfo.AnalyzeCallOperands(Outs, Fn: CC_AIX);
7460
7461	// The prolog code of the callee may store up to 8 GPR argument registers to
7462	// the stack, allowing va_start to index over them in memory if the callee
7463	// is variadic.
7464	// Because we cannot tell if this is needed on the caller side, we have to
7465	// conservatively assume that it is needed. As such, make sure we have at
7466	// least enough stack space for the caller to store the 8 GPRs.
7467	const unsigned MinParameterSaveAreaSize = `8` * PtrByteSize;
7468	const unsigned NumBytes = std::max<unsigned>(
7469	a: LinkageSize + MinParameterSaveAreaSize, b: CCInfo.getStackSize());
7470
7471	// Adjust the stack pointer for the new arguments...
7472	// These operations are automatically eliminated by the prolog/epilog pass.
7473	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL: dl);
7474	SDValue CallSeqStart = Chain;
7475
7476	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
7477	SmallVector<SDValue, `8`> MemOpChains;
7478
7479	// Set up a copy of the stack pointer for loading and storing any
7480	// arguments that may not fit in the registers available for argument
7481	// passing.
7482	const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7483	: DAG.getRegister(PPC::R1, MVT::i32);
7484
7485	for (unsigned I = `0`, E = ArgLocs.size(); I != E;) {
7486	const unsigned ValNo = ArgLocs [I].getValNo();
7487	SDValue Arg = OutVals [ValNo];
7488	ISD::ArgFlagsTy Flags = Outs [ValNo].Flags;
7489
7490	if (Flags.isByVal()) {
7491	const unsigned ByValSize = Flags.getByValSize();
7492
7493	// Nothing to do for zero-sized ByVals on the caller side.
7494	if (!ByValSize) {
7495	++I;
7496	continue;
7497	}
7498
7499	auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7500	return DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT: PtrVT, Chain,
7501	Ptr: (LoadOffset != `0`)
7502	? DAG.getObjectPtrOffset(
7503	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7504	: Arg,
7505	PtrInfo: MachinePointerInfo (), MemVT: VT);
7506	};
7507
7508	unsigned LoadOffset = `0`;
7509
7510	// Initialize registers, which are fully occupied by the by-val argument.
7511	while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs [I].isRegLoc()) {
7512	SDValue Load = GetLoad (PtrVT, LoadOffset);
7513	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7514	LoadOffset += PtrByteSize;
7515	const CCValAssign &ByValVA = ArgLocs [I++];
7516	assert(ByValVA.getValNo() == ValNo &&
7517	"Unexpected location for pass-by-value argument.");
7518	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: Load));
7519	}
7520
7521	if (LoadOffset == ByValSize)
7522	continue;
7523
7524	// There must be one more loc to handle the remainder.
7525	assert(ArgLocs[I].getValNo() == ValNo &&
7526	"Expected additional location for by-value argument.");
7527
7528	if (ArgLocs [I].isMemLoc()) {
7529	assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7530	const CCValAssign &ByValVA = ArgLocs [I++];
7531	ISD::ArgFlagsTy MemcpyFlags = Flags;
7532	// Only memcpy the bytes that don't pass in register.
7533	MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7534	Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7535	Arg: (LoadOffset != `0`) ? DAG.getObjectPtrOffset(
7536	SL: dl, Ptr: Arg, Offset: TypeSize::getFixed(ExactSize: LoadOffset))
7537	: Arg,
7538	PtrOff: DAG.getObjectPtrOffset(
7539	SL: dl, Ptr: StackPtr, Offset: TypeSize::getFixed(ExactSize: ByValVA.getLocMemOffset())),
7540	CallSeqStart, Flags: MemcpyFlags, DAG, dl);
7541	continue;
7542	}
7543
7544	// Initialize the final register residue.
7545	// Any residue that occupies the final by-val arg register must be
7546	// left-justified on AIX. Loads must be a power-of-2 size and cannot be
7547	// larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7548	// 2 and 1 byte loads.
7549	const unsigned ResidueBytes = ByValSize % PtrByteSize;
7550	assert(ResidueBytes != `0` && LoadOffset + PtrByteSize > ByValSize &&
7551	"Unexpected register residue for by-value argument.");
7552	SDValue ResidueVal;
7553	for (unsigned Bytes = `0`; Bytes != ResidueBytes;) {
7554	const unsigned N = llvm::bit_floor(Value: ResidueBytes - Bytes);
7555	const MVT VT =
7556	N == `1` ? MVT::i8
7557	: ((N == `2`) ? MVT::i16 : (N == `4` ? MVT::i32 : MVT::i64));
7558	SDValue Load = GetLoad (VT, LoadOffset);
7559	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7560	LoadOffset += N;
7561	Bytes += N;
7562
7563	// By-val arguments are passed left-justfied in register.
7564	// Every load here needs to be shifted, otherwise a full register load
7565	// should have been used.
7566	assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * `8`) &&
7567	"Unexpected load emitted during handling of pass-by-value "
7568	"argument.");
7569	unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * `8`);
7570	EVT ShiftAmountTy =
7571	getShiftAmountTy(LHSTy: Load ->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
7572	SDValue SHLAmt = DAG.getConstant(Val: NumSHLBits, DL: dl, VT: ShiftAmountTy);
7573	SDValue ShiftedLoad =
7574	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: Load.getValueType(), N1: Load, N2: SHLAmt);
7575	ResidueVal = ResidueVal ? DAG.getNode(Opcode: ISD::OR, DL: dl, VT: PtrVT, N1: ResidueVal,
7576	N2: ShiftedLoad)
7577	: ShiftedLoad;
7578	}
7579
7580	const CCValAssign &ByValVA = ArgLocs [I++];
7581	RegsToPass.push_back(Elt: std::make_pair(x: ByValVA.getLocReg(), y&: ResidueVal));
7582	continue;
7583	}
7584
7585	CCValAssign &VA = ArgLocs [I++];
7586	const MVT LocVT = VA.getLocVT();
7587	const MVT ValVT = VA.getValVT();
7588
7589	switch (VA.getLocInfo()) {
7590	default:
7591	report_fatal_error(reason: "Unexpected argument extension type.");
7592	case CCValAssign::Full:
7593	break;
7594	case CCValAssign::ZExt:
7595	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7596	break;
7597	case CCValAssign::SExt:
7598	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7599	break;
7600	}
7601
7602	if (VA.isRegLoc() && !VA.needsCustom()) {
7603	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
7604	continue;
7605	}
7606
7607	// Vector arguments passed to VarArg functions need custom handling when
7608	// they are passed (at least partially) in GPRs.
7609	if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7610	assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7611	// Store value to its stack slot.
7612	SDValue PtrOff =
7613	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7614	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7615	SDValue Store =
7616	DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ());
7617	MemOpChains.push_back(Elt: Store);
7618	const unsigned OriginalValNo = VA.getValNo();
7619	// Then load the GPRs from the stack
7620	unsigned LoadOffset = `0`;
7621	auto HandleCustomVecRegLoc = [&]() {
7622	assert(I != E && "Unexpected end of CCvalAssigns.");
7623	assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7624	"Expected custom RegLoc.");
7625	CCValAssign RegVA = ArgLocs [I++];
7626	assert(RegVA.getValNo() == OriginalValNo &&
7627	"Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7628	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: PtrOff,
7629	N2: DAG.getConstant(Val: LoadOffset, DL: dl, VT: PtrVT));
7630	SDValue Load = DAG.getLoad(VT: PtrVT, dl, Chain: Store, Ptr: Add, PtrInfo: MachinePointerInfo ());
7631	MemOpChains.push_back(Elt: Load.getValue(R: `1`));
7632	RegsToPass.push_back(Elt: std::make_pair(x: RegVA.getLocReg(), y&: Load));
7633	LoadOffset += PtrByteSize;
7634	};
7635
7636	// In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7637	// in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7638	// R10.
7639	HandleCustomVecRegLoc ();
7640	HandleCustomVecRegLoc ();
7641
7642	if (I != E && ArgLocs [I].isRegLoc() && ArgLocs [I].needsCustom() &&
7643	ArgLocs [I].getValNo() == OriginalValNo) {
7644	assert(!IsPPC64 &&
7645	"Only 2 custom RegLocs expected for 64-bit codegen.");
7646	HandleCustomVecRegLoc ();
7647	HandleCustomVecRegLoc ();
7648	}
7649
7650	continue;
7651	}
7652
7653	if (VA.isMemLoc()) {
7654	SDValue PtrOff =
7655	DAG.getConstant(Val: VA.getLocMemOffset(), DL: dl, VT: StackPtr.getValueType());
7656	PtrOff = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7657	MemOpChains.push_back(
7658	Elt: DAG.getStore(Chain, dl, Val: Arg, Ptr: PtrOff, PtrInfo: MachinePointerInfo ()));
7659
7660	continue;
7661	}
7662
7663	if (!ValVT.isFloatingPoint())
7664	report_fatal_error(
7665	reason: "Unexpected register handling for calling convention.");
7666
7667	// Custom handling is used for GPR initializations for vararg float
7668	// arguments.
7669	assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7670	LocVT.isInteger() &&
7671	"Custom register handling only expected for VarArg.");
7672
7673	SDValue ArgAsInt =
7674	DAG.getBitcast(VT: MVT::getIntegerVT(BitWidth: ValVT.getSizeInBits()), V: Arg);
7675
7676	if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7677	// f32 in 32-bit GPR
7678	// f64 in 64-bit GPR
7679	RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgAsInt));
7680	else if (Arg.getValueType().getFixedSizeInBits() <
7681	LocVT.getFixedSizeInBits())
7682	// f32 in 64-bit GPR.
7683	RegsToPass.push_back(Elt: std::make_pair(
7684	x: VA.getLocReg(), y: DAG.getZExtOrTrunc(Op: ArgAsInt, DL: dl, VT: LocVT)));
7685	else {
7686	// f64 in two 32-bit GPRs
7687	// The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7688	assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7689	"Unexpected custom register for argument!");
7690	CCValAssign &GPR1 = VA;
7691	SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7692	DAG.getConstant(`32`, dl, MVT::i8));
7693	RegsToPass.push_back(std::make_pair(
7694	GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7695
7696	if (I != E) {
7697	// If only 1 GPR was available, there will only be one custom GPR and
7698	// the argument will also pass in memory.
7699	CCValAssign &PeekArg = ArgLocs [I];
7700	if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7701	assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7702	CCValAssign &GPR2 = ArgLocs [I++];
7703	RegsToPass.push_back(std::make_pair(
7704	GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7705	}
7706	}
7707	}
7708	}
7709
7710	if (!MemOpChains.empty())
7711	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7712
7713	// For indirect calls, we need to save the TOC base to the stack for
7714	// restoration after the call.
7715	if (CFlags.IsIndirect) {
7716	assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7717	const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7718	const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7719	const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7720	const unsigned TOCSaveOffset =
7721	Subtarget.getFrameLowering()->getTOCSaveOffset();
7722
7723	setUsesTOCBasePtr(DAG);
7724	SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg: TOCBaseReg, VT: PtrVT);
7725	SDValue PtrOff = DAG.getIntPtrConstant(Val: TOCSaveOffset, DL: dl);
7726	SDValue StackPtr = DAG.getRegister(Reg: StackPtrReg, VT: PtrVT);
7727	SDValue AddPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackPtr, N2: PtrOff);
7728	Chain = DAG.getStore(
7729	Chain: Val.getValue(R: `1`), dl, Val, Ptr: AddPtr,
7730	PtrInfo: MachinePointerInfo::getStack(MF&: DAG.getMachineFunction(), Offset: TOCSaveOffset));
7731	}
7732
7733	// Build a sequence of copy-to-reg nodes chained together with token chain
7734	// and flag operands which copy the outgoing args into the appropriate regs.
7735	SDValue InGlue;
7736	for (auto Reg : RegsToPass) {
7737	Chain = DAG.getCopyToReg(Chain, dl, Reg: Reg.first, N: Reg.second, Glue: InGlue);
7738	InGlue = Chain.getValue(R: `1`);
7739	}
7740
7741	const int SPDiff = `0`;
7742	return FinishCall(CFlags, dl, DAG, RegsToPass, Glue: InGlue, Chain, CallSeqStart,
7743	Callee, SPDiff, NumBytes, Ins, InVals, CB);
7744	}
7745
7746	bool
7747	PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7748	MachineFunction &MF, bool isVarArg,
7749	const SmallVectorImpl<ISD::OutputArg> &Outs,
7750	LLVMContext &Context) const {
7751	SmallVector<CCValAssign, `16`> RVLocs;
7752	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7753	return CCInfo.CheckReturn(
7754	Outs, Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7755	? RetCC_PPC_Cold
7756	: RetCC_PPC);
7757	}
7758
7759	SDValue
7760	PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7761	bool isVarArg,
7762	const SmallVectorImpl<ISD::OutputArg> &Outs,
7763	const SmallVectorImpl<SDValue> &OutVals,
7764	const SDLoc &dl, SelectionDAG &DAG) const {
7765	SmallVector<CCValAssign, `16`> RVLocs;
7766	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7767	*DAG.getContext());
7768	CCInfo.AnalyzeReturn(Outs,
7769	Fn: (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7770	? RetCC_PPC_Cold
7771	: RetCC_PPC);
7772
7773	SDValue Glue;
7774	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
7775
7776	// Copy the result values into the output registers.
7777	for (unsigned i = `0`, RealResIdx = `0`; i != RVLocs.size(); ++i, ++RealResIdx) {
7778	CCValAssign &VA = RVLocs [i];
7779	assert(VA.isRegLoc() && "Can only return in registers!");
7780
7781	SDValue Arg = OutVals [RealResIdx];
7782
7783	switch (VA.getLocInfo()) {
7784	default: llvm_unreachable("Unknown loc info!");
7785	case CCValAssign::Full: break;
7786	case CCValAssign::AExt:
7787	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7788	break;
7789	case CCValAssign::ZExt:
7790	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7791	break;
7792	case CCValAssign::SExt:
7793	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: VA.getLocVT(), Operand: Arg);
7794	break;
7795	}
7796	if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7797	bool isLittleEndian = Subtarget.isLittleEndian();
7798	// Legalize ret f64 -> ret 2 x i32.
7799	SDValue SVal =
7800	DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7801	DAG.getIntPtrConstant(isLittleEndian ? `0` : `1`, dl));
7802	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7803	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7804	SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7805	DAG.getIntPtrConstant(isLittleEndian ? `1` : `0`, dl));
7806	Glue = Chain.getValue(R: `1`);
7807	VA = RVLocs [++i]; // skip ahead to next loc
7808	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: SVal, Glue);
7809	} else
7810	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: Arg, Glue);
7811	Glue = Chain.getValue(R: `1`);
7812	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
7813	}
7814
7815	RetOps [`0`] = Chain; // Update chain.
7816
7817	// Add the glue if we have it.
7818	if (Glue.getNode())
7819	RetOps.push_back(Elt: Glue);
7820
7821	return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7822	}
7823
7824	SDValue
7825	PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7826	SelectionDAG &DAG) const {
7827	SDLoc dl(Op);
7828
7829	// Get the correct type for integers.
7830	EVT IntVT = Op.getValueType();
7831
7832	// Get the inputs.
7833	SDValue Chain = Op.getOperand(i: `0`);
7834	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7835	// Build a DYNAREAOFFSET node.
7836	SDValue Ops[`2`] = {Chain, FPSIdx};
7837	SDVTList VTs = DAG.getVTList(VT: IntVT);
7838	return DAG.getNode(Opcode: PPCISD::DYNAREAOFFSET, DL: dl, VTList: VTs, Ops);
7839	}
7840
7841	SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7842	SelectionDAG &DAG) const {
7843	// When we pop the dynamic allocation we need to restore the SP link.
7844	SDLoc dl(Op);
7845
7846	// Get the correct type for pointers.
7847	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7848
7849	// Construct the stack pointer operand.
7850	bool isPPC64 = Subtarget.isPPC64();
7851	unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7852	SDValue StackPtr = DAG.getRegister(Reg: SP, VT: PtrVT);
7853
7854	// Get the operands for the STACKRESTORE.
7855	SDValue Chain = Op.getOperand(i: `0`);
7856	SDValue SaveSP = Op.getOperand(i: `1`);
7857
7858	// Load the old link SP.
7859	SDValue LoadLinkSP =
7860	DAG.getLoad(VT: PtrVT, dl, Chain, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7861
7862	// Restore the stack pointer.
7863	Chain = DAG.getCopyToReg(Chain: LoadLinkSP.getValue(R: `1`), dl, Reg: SP, N: SaveSP);
7864
7865	// Store the old link SP.
7866	return DAG.getStore(Chain, dl, Val: LoadLinkSP, Ptr: StackPtr, PtrInfo: MachinePointerInfo ());
7867	}
7868
7869	SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7870	MachineFunction &MF = DAG.getMachineFunction();
7871	bool isPPC64 = Subtarget.isPPC64();
7872	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7873
7874	// Get current frame pointer save index. The users of this index will be
7875	// primarily DYNALLOC instructions.
7876	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7877	int RASI = FI->getReturnAddrSaveIndex();
7878
7879	// If the frame pointer save index hasn't been defined yet.
7880	if (!RASI) {
7881	// Find out what the fix offset of the frame pointer save area.
7882	int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7883	// Allocate the frame index for frame pointer save area.
7884	RASI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: LROffset, IsImmutable: false);
7885	// Save the result.
7886	FI->setReturnAddrSaveIndex(RASI);
7887	}
7888	return DAG.getFrameIndex(FI: RASI, VT: PtrVT);
7889	}
7890
7891	SDValue
7892	PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7893	MachineFunction &MF = DAG.getMachineFunction();
7894	bool isPPC64 = Subtarget.isPPC64();
7895	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
7896
7897	// Get current frame pointer save index. The users of this index will be
7898	// primarily DYNALLOC instructions.
7899	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7900	int FPSI = FI->getFramePointerSaveIndex();
7901
7902	// If the frame pointer save index hasn't been defined yet.
7903	if (!FPSI) {
7904	// Find out what the fix offset of the frame pointer save area.
7905	int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7906	// Allocate the frame index for frame pointer save area.
7907	FPSI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64? `8` : `4`, SPOffset: FPOffset, IsImmutable: true);
7908	// Save the result.
7909	FI->setFramePointerSaveIndex(FPSI);
7910	}
7911	return DAG.getFrameIndex(FI: FPSI, VT: PtrVT);
7912	}
7913
7914	SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7915	SelectionDAG &DAG) const {
7916	MachineFunction &MF = DAG.getMachineFunction();
7917	// Get the inputs.
7918	SDValue Chain = Op.getOperand(i: `0`);
7919	SDValue Size = Op.getOperand(i: `1`);
7920	SDLoc dl(Op);
7921
7922	// Get the correct type for pointers.
7923	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7924	// Negate the size.
7925	SDValue NegSize = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: PtrVT,
7926	N1: DAG.getConstant(Val: `0`, DL: dl, VT: PtrVT), N2: Size);
7927	// Construct a node for the frame pointer save index.
7928	SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7929	SDValue Ops[`3`] = { Chain, NegSize, FPSIdx };
7930	SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7931	if (hasInlineStackProbe(MF))
7932	return DAG.getNode(Opcode: PPCISD::PROBED_ALLOCA, DL: dl, VTList: VTs, Ops);
7933	return DAG.getNode(Opcode: PPCISD::DYNALLOC, DL: dl, VTList: VTs, Ops);
7934	}
7935
7936	SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7937	SelectionDAG &DAG) const {
7938	MachineFunction &MF = DAG.getMachineFunction();
7939
7940	bool isPPC64 = Subtarget.isPPC64();
7941	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
7942
7943	int FI = MF.getFrameInfo().CreateFixedObject(Size: isPPC64 ? `8` : `4`, SPOffset: `0`, IsImmutable: false);
7944	return DAG.getFrameIndex(FI, VT: PtrVT);
7945	}
7946
7947	SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7948	SelectionDAG &DAG) const {
7949	SDLoc DL(Op);
7950	return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7951	DAG.getVTList(MVT::i32, MVT::Other),
7952	Op.getOperand(`0`), Op.getOperand(`1`));
7953	}
7954
7955	SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7956	SelectionDAG &DAG) const {
7957	SDLoc DL(Op);
7958	return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7959	Op.getOperand(`0`), Op.getOperand(`1`));
7960	}
7961
7962	SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7963	if (Op.getValueType().isVector())
7964	return LowerVectorLoad(Op, DAG);
7965
7966	assert(Op.getValueType() == MVT::i1 &&
7967	"Custom lowering only for i1 loads");
7968
7969	// First, load 8 bits into 32 bits, then truncate to 1 bit.
7970
7971	SDLoc dl(Op);
7972	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op);
7973
7974	SDValue Chain = LD->getChain();
7975	SDValue BasePtr = LD->getBasePtr();
7976	MachineMemOperand *MMO = LD->getMemOperand();
7977
7978	SDValue NewLD =
7979	DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7980	BasePtr, MVT::i8, MMO);
7981	SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7982
7983	SDValue Ops[] = { Result, SDValue (NewLD.getNode(), `1`) };
7984	return DAG.getMergeValues(Ops, dl);
7985	}
7986
7987	SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7988	if (Op.getOperand(i: `1`).getValueType().isVector())
7989	return LowerVectorStore(Op, DAG);
7990
7991	assert(Op.getOperand(`1`).getValueType() == MVT::i1 &&
7992	"Custom lowering only for i1 stores");
7993
7994	// First, zero extend to 32 bits, then use a truncating store to 8 bits.
7995
7996	SDLoc dl(Op);
7997	StoreSDNode *ST = cast<StoreSDNode>(Val&: Op);
7998
7999	SDValue Chain = ST->getChain();
8000	SDValue BasePtr = ST->getBasePtr();
8001	SDValue Value = ST->getValue();
8002	MachineMemOperand *MMO = ST->getMemOperand();
8003
8004	Value = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
8005	Operand: Value);
8006	return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8007	}
8008
8009	// FIXME: Remove this once the ANDI glue bug is fixed:
8010	SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8011	assert(Op.getValueType() == MVT::i1 &&
8012	"Custom lowering only for i1 results");
8013
8014	SDLoc DL(Op);
8015	return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(`0`));
8016	}
8017
8018	SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8019	SelectionDAG &DAG) const {
8020
8021	// Implements a vector truncate that fits in a vector register as a shuffle.
8022	// We want to legalize vector truncates down to where the source fits in
8023	// a vector register (and target is therefore smaller than vector register
8024	// size). At that point legalization will try to custom lower the sub-legal
8025	// result and get here - where we can contain the truncate as a single target
8026	// operation.
8027
8028	// For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8029	// <MSB1\|LSB1, MSB2\|LSB2> to <LSB1, LSB2>
8030	//
8031	// We will implement it for big-endian ordering as this (where x denotes
8032	// undefined):
8033	// < MSB1\|LSB1, MSB2\|LSB2, uu, uu, uu, uu, uu, uu> to
8034	// < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8035	//
8036	// The same operation in little-endian ordering will be:
8037	// <uu, uu, uu, uu, uu, uu, LSB2\|MSB2, LSB1\|MSB1> to
8038	// <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8039
8040	EVT TrgVT = Op.getValueType();
8041	assert(TrgVT.isVector() && "Vector type expected.");
8042	unsigned TrgNumElts = TrgVT.getVectorNumElements();
8043	EVT EltVT = TrgVT.getVectorElementType();
8044	if (!isOperationCustom(Op: Op.getOpcode(), VT: TrgVT) \|\|
8045	TrgVT.getSizeInBits() > `128` \|\| !isPowerOf2_32(Value: TrgNumElts) \|\|
8046	!llvm::has_single_bit<uint32_t>(Value: EltVT.getSizeInBits()))
8047	return SDValue ();
8048
8049	SDValue N1 = Op.getOperand(i: `0`);
8050	EVT SrcVT = N1.getValueType();
8051	unsigned SrcSize = SrcVT.getSizeInBits();
8052	if (SrcSize > `256` \|\| !isPowerOf2_32(Value: SrcVT.getVectorNumElements()) \|\|
8053	!llvm::has_single_bit<uint32_t>(
8054	Value: SrcVT.getVectorElementType().getSizeInBits()))
8055	return SDValue ();
8056	if (SrcSize == `256` && SrcVT.getVectorNumElements() < `2`)
8057	return SDValue ();
8058
8059	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8060	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8061
8062	SDLoc DL(Op);
8063	SDValue Op1, Op2;
8064	if (SrcSize == `256`) {
8065	EVT VecIdxTy = getVectorIdxTy(DL: DAG.getDataLayout());
8066	EVT SplitVT =
8067	N1.getValueType().getHalfNumVectorElementsVT(Context&: *DAG.getContext());
8068	unsigned SplitNumElts = SplitVT.getVectorNumElements();
8069	Op1 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8070	N2: DAG.getConstant(Val: `0`, DL, VT: VecIdxTy));
8071	Op2 = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: SplitVT, N1,
8072	N2: DAG.getConstant(Val: SplitNumElts, DL, VT: VecIdxTy));
8073	}
8074	else {
8075	Op1 = SrcSize == `128` ? N1 : widenVec(DAG, Vec: N1, dl: DL);
8076	Op2 = DAG.getUNDEF(VT: WideVT);
8077	}
8078
8079	// First list the elements we want to keep.
8080	unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8081	SmallVector<int, `16`> ShuffV;
8082	if (Subtarget.isLittleEndian())
8083	for (unsigned i = `0`; i < TrgNumElts; ++i)
8084	ShuffV.push_back(Elt: i * SizeMult);
8085	else
8086	for (unsigned i = `1`; i <= TrgNumElts; ++i)
8087	ShuffV.push_back(Elt: i * SizeMult - `1`);
8088
8089	// Populate the remaining elements with undefs.
8090	for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8091	// ShuffV.push_back(i + WideNumElts);
8092	ShuffV.push_back(Elt: WideNumElts + `1`);
8093
8094	Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op1);
8095	Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: WideVT, Operand: Op2);
8096	return DAG.getVectorShuffle(VT: WideVT, dl: DL, N1: Op1, N2: Op2, Mask: ShuffV);
8097	}
8098
8099	/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8100	/// possible.
8101	SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8102	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `4`))->get();
8103	EVT ResVT = Op.getValueType();
8104	EVT CmpVT = Op.getOperand(i: `0`).getValueType();
8105	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
8106	SDValue TV = Op.getOperand(i: `2`), FV = Op.getOperand(i: `3`);
8107	SDLoc dl(Op);
8108
8109	// Without power9-vector, we don't have native instruction for f128 comparison.
8110	// Following transformation to libcall is needed for setcc:
8111	// select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8112	if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8113	SDValue Z = DAG.getSetCC(
8114	DL: dl, VT: getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: CmpVT),
8115	LHS, RHS, Cond: CC);
8116	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: Z.getValueType());
8117	return DAG.getSelectCC(DL: dl, LHS: Z, RHS: Zero, True: TV, False: FV, Cond: ISD::SETNE);
8118	}
8119
8120	// Not FP, or using SPE? Not a fsel.
8121	if (!CmpVT.isFloatingPoint() \|\| !TV.getValueType().isFloatingPoint() \|\|
8122	Subtarget.hasSPE())
8123	return Op;
8124
8125	SDNodeFlags Flags = Op.getNode()->getFlags();
8126
8127	// We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8128	// presence of infinities.
8129	if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8130	switch (CC) {
8131	default:
8132	break;
8133	case ISD::SETOGT:
8134	case ISD::SETGT:
8135	return DAG.getNode(Opcode: PPCISD::XSMAXC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8136	case ISD::SETOLT:
8137	case ISD::SETLT:
8138	return DAG.getNode(Opcode: PPCISD::XSMINC, DL: dl, VT: Op.getValueType(), N1: LHS, N2: RHS);
8139	}
8140	}
8141
8142	// We might be able to do better than this under some circumstances, but in
8143	// general, fsel-based lowering of select is a finite-math-only optimization.
8144	// For more information, see section F.3 of the 2.06 ISA specification.
8145	// With ISA 3.0
8146	if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) \|\|
8147	(!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) \|\|
8148	ResVT == MVT::f128)
8149	return Op;
8150
8151	// If the RHS of the comparison is a 0.0, we don't need to do the
8152	// subtraction at all.
8153	SDValue Sel1;
8154	if (isFloatingPointZero(Op: RHS))
8155	switch (CC) {
8156	default: break; // SETUO etc aren't handled by fsel.
8157	case ISD::SETNE:
8158	std::swap(a&: TV, b&: FV);
8159	[[fallthrough]];
8160	case ISD::SETEQ:
8161	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8162	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8163	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8164	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8165	Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8166	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8167	DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8168	case ISD::SETULT:
8169	case ISD::SETLT:
8170	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8171	[[fallthrough]];
8172	case ISD::SETOGE:
8173	case ISD::SETGE:
8174	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8175	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8176	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: LHS, N2: TV, N3: FV);
8177	case ISD::SETUGT:
8178	case ISD::SETGT:
8179	std::swap(a&: TV, b&: FV); // fsel is natively setge, swap operands for setlt
8180	[[fallthrough]];
8181	case ISD::SETOLE:
8182	case ISD::SETLE:
8183	if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8184	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8185	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8186	DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8187	}
8188
8189	SDValue Cmp;
8190	switch (CC) {
8191	default: break; // SETUO etc aren't handled by fsel.
8192	case ISD::SETNE:
8193	std::swap(a&: TV, b&: FV);
8194	[[fallthrough]];
8195	case ISD::SETEQ:
8196	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8197	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8198	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8199	Sel1 = DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8200	if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8201	Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8202	return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8203	DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8204	case ISD::SETULT:
8205	case ISD::SETLT:
8206	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8207	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8208	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8209	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8210	case ISD::SETOGE:
8211	case ISD::SETGE:
8212	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: LHS, N2: RHS, Flags);
8213	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8214	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8215	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8216	case ISD::SETUGT:
8217	case ISD::SETGT:
8218	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8219	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8220	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8221	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: FV, N3: TV);
8222	case ISD::SETOLE:
8223	case ISD::SETLE:
8224	Cmp = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: CmpVT, N1: RHS, N2: LHS, Flags);
8225	if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8226	Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8227	return DAG.getNode(Opcode: PPCISD::FSEL, DL: dl, VT: ResVT, N1: Cmp, N2: TV, N3: FV);
8228	}
8229	return Op;
8230	}
8231
8232	static unsigned getPPCStrictOpcode(unsigned Opc) {
8233	switch (Opc) {
8234	default:
8235	llvm_unreachable("No strict version of this opcode!");
8236	case PPCISD::FCTIDZ:
8237	return PPCISD::STRICT_FCTIDZ;
8238	case PPCISD::FCTIWZ:
8239	return PPCISD::STRICT_FCTIWZ;
8240	case PPCISD::FCTIDUZ:
8241	return PPCISD::STRICT_FCTIDUZ;
8242	case PPCISD::FCTIWUZ:
8243	return PPCISD::STRICT_FCTIWUZ;
8244	case PPCISD::FCFID:
8245	return PPCISD::STRICT_FCFID;
8246	case PPCISD::FCFIDU:
8247	return PPCISD::STRICT_FCFIDU;
8248	case PPCISD::FCFIDS:
8249	return PPCISD::STRICT_FCFIDS;
8250	case PPCISD::FCFIDUS:
8251	return PPCISD::STRICT_FCFIDUS;
8252	}
8253	}
8254
8255	static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8256	const PPCSubtarget &Subtarget) {
8257	SDLoc dl(Op);
8258	bool IsStrict = Op ->isStrictFPOpcode();
8259	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8260	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8261
8262	// TODO: Any other flags to propagate?
8263	SDNodeFlags Flags;
8264	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8265
8266	// For strict nodes, source is the second operand.
8267	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8268	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : SDValue ();
8269	MVT DestTy = Op.getSimpleValueType();
8270	assert(Src.getValueType().isFloatingPoint() &&
8271	(DestTy == MVT::i8 \|\| DestTy == MVT::i16 \|\| DestTy == MVT::i32 \|\|
8272	DestTy == MVT::i64) &&
8273	"Invalid FP_TO_INT types");
8274	if (Src.getValueType() == MVT::f32) {
8275	if (IsStrict) {
8276	Src =
8277	DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8278	DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8279	Chain = Src.getValue(R: `1`);
8280	} else
8281	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8282	}
8283	if ((DestTy == MVT::i8 \|\| DestTy == MVT::i16) && Subtarget.hasP9Vector())
8284	DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8285	unsigned Opc = ISD::DELETED_NODE;
8286	switch (DestTy.SimpleTy) {
8287	default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8288	case MVT::i32:
8289	Opc = IsSigned ? PPCISD::FCTIWZ
8290	: (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8291	break;
8292	case MVT::i64:
8293	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8294	"i64 FP_TO_UINT is supported only with FPCVT");
8295	Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8296	}
8297	EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8298	SDValue Conv;
8299	if (IsStrict) {
8300	Opc = getPPCStrictOpcode(Opc);
8301	Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8302	Flags);
8303	} else {
8304	Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: ConvTy, Operand: Src);
8305	}
8306	return Conv;
8307	}
8308
8309	void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8310	SelectionDAG &DAG,
8311	const SDLoc &dl) const {
8312	SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8313	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8314	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8315	bool IsStrict = Op ->isStrictFPOpcode();
8316
8317	// Convert the FP value to an int value through memory.
8318	bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8319	(IsSigned \|\| Subtarget.hasFPCVT());
8320	SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8321	int FI = cast<FrameIndexSDNode>(Val&: FIPtr)->getIndex();
8322	MachinePointerInfo MPI =
8323	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI);
8324
8325	// Emit a store to the stack slot.
8326	SDValue Chain = IsStrict ? Tmp.getValue(R: `1`) : DAG.getEntryNode();
8327	Align Alignment(DAG.getEVTAlign(MemoryVT: Tmp.getValueType()));
8328	if (i32Stack) {
8329	MachineFunction &MF = DAG.getMachineFunction();
8330	Alignment = Align (`4`);
8331	MachineMemOperand *MMO =
8332	MF.getMachineMemOperand(PtrInfo: MPI, F: MachineMemOperand::MOStore, Size: `4`, BaseAlignment: Alignment);
8333	SDValue Ops[] = { Chain, Tmp, FIPtr };
8334	Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8335	DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8336	} else
8337	Chain = DAG.getStore(Chain, dl, Val: Tmp, Ptr: FIPtr, PtrInfo: MPI, Alignment);
8338
8339	// Result is a load from the stack slot. If loading 4 bytes, make sure to
8340	// add in a bias on big endian.
8341	if (Op.getValueType() == MVT::i32 && !i32Stack) {
8342	FIPtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: FIPtr.getValueType(), N1: FIPtr,
8343	N2: DAG.getConstant(Val: `4`, DL: dl, VT: FIPtr.getValueType()));
8344	MPI = MPI.getWithOffset(O: Subtarget.isLittleEndian() ? `0` : `4`);
8345	}
8346
8347	RLI.Chain = Chain;
8348	RLI.Ptr = FIPtr;
8349	RLI.MPI = MPI;
8350	RLI.Alignment = Alignment;
8351	}
8352
8353	/// Custom lowers floating point to integer conversions to use
8354	/// the direct move instructions available in ISA 2.07 to avoid the
8355	/// need for load/store combinations.
8356	SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8357	SelectionDAG &DAG,
8358	const SDLoc &dl) const {
8359	SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8360	SDValue Mov = DAG.getNode(Opcode: PPCISD::MFVSR, DL: dl, VT: Op.getValueType(), Operand: Conv);
8361	if (Op ->isStrictFPOpcode())
8362	return DAG.getMergeValues(Ops: {Mov, Conv.getValue(R: `1`)}, dl);
8363	else
8364	return Mov;
8365	}
8366
8367	SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8368	const SDLoc &dl) const {
8369	bool IsStrict = Op ->isStrictFPOpcode();
8370	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT \|\|
8371	Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8372	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8373	EVT SrcVT = Src.getValueType();
8374	EVT DstVT = Op.getValueType();
8375
8376	// FP to INT conversions are legal for f128.
8377	if (SrcVT == MVT::f128)
8378	return Subtarget.hasP9Vector() ? Op : SDValue ();
8379
8380	// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8381	// PPC (the libcall is not available).
8382	if (SrcVT == MVT::ppcf128) {
8383	if (DstVT == MVT::i32) {
8384	// TODO: Conservatively pass only nofpexcept flag here. Need to check and
8385	// set other fast-math flags to FP operations in both strict and
8386	// non-strict cases. (FP_TO_SINT, FSUB)
8387	SDNodeFlags Flags;
8388	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8389
8390	if (IsSigned) {
8391	SDValue Lo, Hi;
8392	std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8393
8394	// Add the two halves of the long double in round-to-zero mode, and use
8395	// a smaller FP_TO_SINT.
8396	if (IsStrict) {
8397	SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8398	DAG.getVTList(MVT::f64, MVT::Other),
8399	{Op.getOperand(`0`), Lo, Hi}, Flags);
8400	return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8401	DAG.getVTList(MVT::i32, MVT::Other),
8402	{Res.getValue(`1`), Res}, Flags);
8403	} else {
8404	SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8405	return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8406	}
8407	} else {
8408	const uint64_t TwoE31[] = {`0x41e0000000000000LL`, `0`};
8409	APFloat APF = APFloat (APFloat::PPCDoubleDouble(), APInt (`128`, TwoE31));
8410	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8411	SDValue SignMask = DAG.getConstant(Val: `0x80000000`, DL: dl, VT: DstVT);
8412	if (IsStrict) {
8413	// Sel = Src < 0x80000000
8414	// FltOfs = select Sel, 0.0, 0x80000000
8415	// IntOfs = select Sel, 0, 0x80000000
8416	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8417	SDValue Chain = Op.getOperand(i: `0`);
8418	EVT SetCCVT =
8419	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: SrcVT);
8420	EVT DstSetCCVT =
8421	getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(), VT: DstVT);
8422	SDValue Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8423	Chain, IsSignaling: true);
8424	Chain = Sel.getValue(R: `1`);
8425
8426	SDValue FltOfs = DAG.getSelect(
8427	DL: dl, VT: SrcVT, Cond: Sel, LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8428	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8429
8430	SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8431	DAG.getVTList(SrcVT, MVT::Other),
8432	{Chain, Src, FltOfs}, Flags);
8433	Chain = Val.getValue(R: `1`);
8434	SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8435	DAG.getVTList(DstVT, MVT::Other),
8436	{Chain, Val}, Flags);
8437	Chain = SInt.getValue(R: `1`);
8438	SDValue IntOfs = DAG.getSelect(
8439	DL: dl, VT: DstVT, Cond: Sel, LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), RHS: SignMask);
8440	SDValue Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8441	return DAG.getMergeValues(Ops: {Result, Chain}, dl);
8442	} else {
8443	// X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8444	// FIXME: generated code sucks.
8445	SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8446	True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8447	True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8448	SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8449	return DAG.getSelectCC(DL: dl, LHS: Src, RHS: Cst, True, False, Cond: ISD::SETGE);
8450	}
8451	}
8452	}
8453
8454	return SDValue ();
8455	}
8456
8457	if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8458	return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8459
8460	ReuseLoadInfo RLI;
8461	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8462
8463	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: RLI.Chain, Ptr: RLI.Ptr, PtrInfo: RLI.MPI,
8464	Alignment: RLI.Alignment, MMOFlags: RLI.MMOFlags(), AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8465	}
8466
8467	// We're trying to insert a regular store, S, and then a load, L. If the
8468	// incoming value, O, is a load, we might just be able to have our load use the
8469	// address used by O. However, we don't know if anything else will store to
8470	// that address before we can load from it. To prevent this situation, we need
8471	// to insert our load, L, into the chain as a peer of O. To do this, we give L
8472	// the same chain operand as O, we create a token factor from the chain results
8473	// of O and L, and we replace all uses of O's chain result with that token
8474	// factor (see spliceIntoChain below for this last part).
8475	bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8476	ReuseLoadInfo &RLI,
8477	SelectionDAG &DAG,
8478	ISD::LoadExtType ET) const {
8479	// Conservatively skip reusing for constrained FP nodes.
8480	if (Op ->isStrictFPOpcode())
8481	return false;
8482
8483	SDLoc dl(Op);
8484	bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8485	(Subtarget.hasFPCVT() \|\| Op.getValueType() == MVT::i32);
8486	if (ET == ISD::NON_EXTLOAD &&
8487	(ValidFPToUint \|\| Op.getOpcode() == ISD::FP_TO_SINT) &&
8488	isOperationLegalOrCustom(Op: Op.getOpcode(),
8489	VT: Op.getOperand(i: `0`).getValueType())) {
8490
8491	LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8492	return true;
8493	}
8494
8495	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: Op);
8496	if (!LD \|\| LD->getExtensionType() != ET \|\| LD->isVolatile() \|\|
8497	LD->isNonTemporal())
8498	return false;
8499	if (LD->getMemoryVT() != MemVT)
8500	return false;
8501
8502	// If the result of the load is an illegal type, then we can't build a
8503	// valid chain for reuse since the legalised loads and token factor node that
8504	// ties the legalised loads together uses a different output chain then the
8505	// illegal load.
8506	if (!isTypeLegal(VT: LD->getValueType(ResNo: `0`)))
8507	return false;
8508
8509	RLI.Ptr = LD->getBasePtr();
8510	if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8511	assert(LD->getAddressingMode() == ISD::PRE_INC &&
8512	"Non-pre-inc AM on PPC?");
8513	RLI.Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: RLI.Ptr.getValueType(), N1: RLI.Ptr,
8514	N2: LD->getOffset());
8515	}
8516
8517	RLI.Chain = LD->getChain();
8518	RLI.MPI = LD->getPointerInfo();
8519	RLI.IsDereferenceable = LD->isDereferenceable();
8520	RLI.IsInvariant = LD->isInvariant();
8521	RLI.Alignment = LD->getAlign();
8522	RLI.AAInfo = LD->getAAInfo();
8523	RLI.Ranges = LD->getRanges();
8524
8525	RLI.ResChain = SDValue (LD, LD->isIndexed() ? `2` : `1`);
8526	return true;
8527	}
8528
8529	// Given the head of the old chain, ResChain, insert a token factor containing
8530	// it and NewResChain, and make users of ResChain now be users of that token
8531	// factor.
8532	// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8533	void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8534	SDValue NewResChain,
8535	SelectionDAG &DAG) const {
8536	if (!ResChain)
8537	return;
8538
8539	SDLoc dl(NewResChain);
8540
8541	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8542	NewResChain, DAG.getUNDEF(MVT::Other));
8543	assert(TF.getNode() != NewResChain.getNode() &&
8544	"A new TF really is required here");
8545
8546	DAG.ReplaceAllUsesOfValueWith(From: ResChain, To: TF);
8547	DAG.UpdateNodeOperands(N: TF.getNode(), Op1: ResChain, Op2: NewResChain);
8548	}
8549
8550	/// Analyze profitability of direct move
8551	/// prefer float load to int load plus direct move
8552	/// when there is no integer use of int load
8553	bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8554	SDNode *Origin = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`).getNode();
8555	if (Origin->getOpcode() != ISD::LOAD)
8556	return true;
8557
8558	// If there is no LXSIBZX/LXSIHZX, like Power8,
8559	// prefer direct move if the memory size is 1 or 2 bytes.
8560	MachineMemOperand *MMO = cast<LoadSDNode>(Val: Origin)->getMemOperand();
8561	if (!Subtarget.hasP9Vector() &&
8562	(!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() <= `2`))
8563	return true;
8564
8565	for (SDNode::use_iterator UI = Origin->use_begin(),
8566	UE = Origin->use_end();
8567	UI != UE; ++UI) {
8568
8569	// Only look at the users of the loaded value.
8570	if (UI.getUse().get().getResNo() != `0`)
8571	continue;
8572
8573	if (UI ->getOpcode() != ISD::SINT_TO_FP &&
8574	UI ->getOpcode() != ISD::UINT_TO_FP &&
8575	UI ->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8576	UI ->getOpcode() != ISD::STRICT_UINT_TO_FP)
8577	return true;
8578	}
8579
8580	return false;
8581	}
8582
8583	static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8584	const PPCSubtarget &Subtarget,
8585	SDValue Chain = SDValue ()) {
8586	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8587	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8588	SDLoc dl(Op);
8589
8590	// TODO: Any other flags to propagate?
8591	SDNodeFlags Flags;
8592	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8593
8594	// If we have FCFIDS, then use it when converting to single-precision.
8595	// Otherwise, convert to double-precision and then round.
8596	bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8597	unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8598	: (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8599	EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8600	if (Op ->isStrictFPOpcode()) {
8601	if (!Chain)
8602	Chain = Op.getOperand(i: `0`);
8603	return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8604	DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8605	} else
8606	return DAG.getNode(Opcode: ConvOpc, DL: dl, VT: ConvTy, Operand: Src);
8607	}
8608
8609	/// Custom lowers integer to floating point conversions to use
8610	/// the direct move instructions available in ISA 2.07 to avoid the
8611	/// need for load/store combinations.
8612	SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8613	SelectionDAG &DAG,
8614	const SDLoc &dl) const {
8615	assert((Op.getValueType() == MVT::f32 \|\|
8616	Op.getValueType() == MVT::f64) &&
8617	"Invalid floating point type as target of conversion");
8618	assert(Subtarget.hasFPCVT() &&
8619	"Int to FP conversions with direct moves require FPCVT");
8620	SDValue Src = Op.getOperand(i: Op ->isStrictFPOpcode() ? `1` : `0`);
8621	bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8622	bool Signed = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8623	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8624	unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8625	SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8626	return convertIntToFP(Op, Src: Mov, DAG, Subtarget);
8627	}
8628
8629	static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8630
8631	EVT VecVT = Vec.getValueType();
8632	assert(VecVT.isVector() && "Expected a vector type.");
8633	assert(VecVT.getSizeInBits() < `128` && "Vector is already full width.");
8634
8635	EVT EltVT = VecVT.getVectorElementType();
8636	unsigned WideNumElts = `128` / EltVT.getSizeInBits();
8637	EVT WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EltVT, NumElements: WideNumElts);
8638
8639	unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8640	SmallVector<SDValue, `16`> Ops(NumConcat);
8641	Ops [`0`] = Vec;
8642	SDValue UndefVec = DAG.getUNDEF(VT: VecVT);
8643	for (unsigned i = `1`; i < NumConcat; ++i)
8644	Ops [i] = UndefVec;
8645
8646	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: WideVT, Ops);
8647	}
8648
8649	SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8650	const SDLoc &dl) const {
8651	bool IsStrict = Op ->isStrictFPOpcode();
8652	unsigned Opc = Op.getOpcode();
8653	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8654	assert((Opc == ISD::UINT_TO_FP \|\| Opc == ISD::SINT_TO_FP \|\|
8655	Opc == ISD::STRICT_UINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP) &&
8656	"Unexpected conversion type");
8657	assert((Op.getValueType() == MVT::v2f64 \|\| Op.getValueType() == MVT::v4f32) &&
8658	"Supports conversions to v2f64/v4f32 only.");
8659
8660	// TODO: Any other flags to propagate?
8661	SDNodeFlags Flags;
8662	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8663
8664	bool SignedConv = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;
8665	bool FourEltRes = Op.getValueType() == MVT::v4f32;
8666
8667	SDValue Wide = widenVec(DAG, Vec: Src, dl);
8668	EVT WideVT = Wide.getValueType();
8669	unsigned WideNumElts = WideVT.getVectorNumElements();
8670	MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8671
8672	SmallVector<int, `16`> ShuffV;
8673	for (unsigned i = `0`; i < WideNumElts; ++i)
8674	ShuffV.push_back(Elt: i + WideNumElts);
8675
8676	int Stride = FourEltRes ? WideNumElts / `4` : WideNumElts / `2`;
8677	int SaveElts = FourEltRes ? `4` : `2`;
8678	if (Subtarget.isLittleEndian())
8679	for (int i = `0`; i < SaveElts; i++)
8680	ShuffV [i * Stride] = i;
8681	else
8682	for (int i = `1`; i <= SaveElts; i++)
8683	ShuffV [i * Stride - `1`] = i - `1`;
8684
8685	SDValue ShuffleSrc2 =
8686	SignedConv ? DAG.getUNDEF(VT: WideVT) : DAG.getConstant(Val: `0`, DL: dl, VT: WideVT);
8687	SDValue Arrange = DAG.getVectorShuffle(VT: WideVT, dl, N1: Wide, N2: ShuffleSrc2, Mask: ShuffV);
8688
8689	SDValue Extend;
8690	if (SignedConv) {
8691	Arrange = DAG.getBitcast(VT: IntermediateVT, V: Arrange);
8692	EVT ExtVT = Src.getValueType();
8693	if (Subtarget.hasP9Altivec())
8694	ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT.getVectorElementType(),
8695	NumElements: IntermediateVT.getVectorNumElements());
8696
8697	Extend = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: IntermediateVT, N1: Arrange,
8698	N2: DAG.getValueType(ExtVT));
8699	} else
8700	Extend = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntermediateVT, Operand: Arrange);
8701
8702	if (IsStrict)
8703	return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8704	{Op.getOperand(`0`), Extend}, Flags);
8705
8706	return DAG.getNode(Opcode: Opc, DL: dl, VT: Op.getValueType(), Operand: Extend);
8707	}
8708
8709	SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8710	SelectionDAG &DAG) const {
8711	SDLoc dl(Op);
8712	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP \|\|
8713	Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8714	bool IsStrict = Op ->isStrictFPOpcode();
8715	SDValue Src = Op.getOperand(i: IsStrict ? `1` : `0`);
8716	SDValue Chain = IsStrict ? Op.getOperand(i: `0`) : DAG.getEntryNode();
8717
8718	// TODO: Any other flags to propagate?
8719	SDNodeFlags Flags;
8720	Flags.setNoFPExcept(Op ->getFlags().hasNoFPExcept());
8721
8722	EVT InVT = Src.getValueType();
8723	EVT OutVT = Op.getValueType();
8724	if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8725	isOperationCustom(Op: Op.getOpcode(), VT: InVT))
8726	return LowerINT_TO_FPVector(Op, DAG, dl);
8727
8728	// Conversions to f128 are legal.
8729	if (Op.getValueType() == MVT::f128)
8730	return Subtarget.hasP9Vector() ? Op : SDValue ();
8731
8732	// Don't handle ppc_fp128 here; let it be lowered to a libcall.
8733	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8734	return SDValue ();
8735
8736	if (Src.getValueType() == MVT::i1) {
8737	SDValue Sel = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: Op.getValueType(), N1: Src,
8738	N2: DAG.getConstantFP(Val: `1.0`, DL: dl, VT: Op.getValueType()),
8739	N3: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: Op.getValueType()));
8740	if (IsStrict)
8741	return DAG.getMergeValues(Ops: {Sel, Chain}, dl);
8742	else
8743	return Sel;
8744	}
8745
8746	// If we have direct moves, we can do all the conversion, skip the store/load
8747	// however, without FPCVT we can't do most conversions.
8748	if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8749	Subtarget.isPPC64() && Subtarget.hasFPCVT())
8750	return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8751
8752	assert((IsSigned \|\| Subtarget.hasFPCVT()) &&
8753	"UINT_TO_FP is supported only with FPCVT");
8754
8755	if (Src.getValueType() == MVT::i64) {
8756	SDValue SINT = Src;
8757	// When converting to single-precision, we actually need to convert
8758	// to double-precision first and then round to single-precision.
8759	// To avoid double-rounding effects during that operation, we have
8760	// to prepare the input operand. Bits that might be truncated when
8761	// converting to double-precision are replaced by a bit that won't
8762	// be lost at this stage, but is below the single-precision rounding
8763	// position.
8764	//
8765	// However, if -enable-unsafe-fp-math is in effect, accept double
8766	// rounding to avoid the extra overhead.
8767	if (Op.getValueType() == MVT::f32 &&
8768	!Subtarget.hasFPCVT() &&
8769	!DAG.getTarget().Options.UnsafeFPMath) {
8770
8771	// Twiddle input to make sure the low 11 bits are zero. (If this
8772	// is the case, we are guaranteed the value will fit into the 53 bit
8773	// mantissa of an IEEE double-precision value without rounding.)
8774	// If any of those low 11 bits were not zero originally, make sure
8775	// bit 12 (value 2048) is set instead, so that the final rounding
8776	// to single-precision gets the correct result.
8777	SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8778	SINT, DAG.getConstant(`2047`, dl, MVT::i64));
8779	Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8780	Round, DAG.getConstant(`2047`, dl, MVT::i64));
8781	Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8782	Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8783	Round, DAG.getConstant(-`2048`, dl, MVT::i64));
8784
8785	// However, we cannot use that value unconditionally: if the magnitude
8786	// of the input value is small, the bit-twiddling we did above might
8787	// end up visibly changing the output. Fortunately, in that case, we
8788	// don't need to twiddle bits since the original input will convert
8789	// exactly to double-precision floating-point already. Therefore,
8790	// construct a conditional to use the original value if the top 11
8791	// bits are all sign-bit copies, and use the rounded value computed
8792	// above otherwise.
8793	SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8794	SINT, DAG.getConstant(`53`, dl, MVT::i32));
8795	Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8796	Cond, DAG.getConstant(`1`, dl, MVT::i64));
8797	Cond = DAG.getSetCC(
8798	dl,
8799	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8800	Cond, DAG.getConstant(`1`, dl, MVT::i64), ISD::SETUGT);
8801
8802	SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8803	}
8804
8805	ReuseLoadInfo RLI;
8806	SDValue Bits;
8807
8808	MachineFunction &MF = DAG.getMachineFunction();
8809	if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8810	Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8811	RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8812	spliceIntoChain(ResChain: RLI.ResChain, NewResChain: Bits.getValue(R: `1`), DAG);
8813	} else if (Subtarget.hasLFIWAX() &&
8814	canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8815	MachineMemOperand *MMO =
8816	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8817	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8818	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8819	Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8820	DAG.getVTList(MVT::f64, MVT::Other),
8821	Ops, MVT::i32, MMO);
8822	spliceIntoChain(ResChain: RLI.ResChain, NewResChain: Bits.getValue(R: `1`), DAG);
8823	} else if (Subtarget.hasFPCVT() &&
8824	canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8825	MachineMemOperand *MMO =
8826	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8827	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8828	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8829	Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8830	DAG.getVTList(MVT::f64, MVT::Other),
8831	Ops, MVT::i32, MMO);
8832	spliceIntoChain(ResChain: RLI.ResChain, NewResChain: Bits.getValue(R: `1`), DAG);
8833	} else if (((Subtarget.hasLFIWAX() &&
8834	SINT.getOpcode() == ISD::SIGN_EXTEND) \|\|
8835	(Subtarget.hasFPCVT() &&
8836	SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8837	SINT.getOperand(`0`).getValueType() == MVT::i32) {
8838	MachineFrameInfo &MFI = MF.getFrameInfo();
8839	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
8840
8841	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
8842	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8843
8844	SDValue Store = DAG.getStore(Chain, dl, Val: SINT.getOperand(i: `0`), Ptr: FIdx,
8845	PtrInfo: MachinePointerInfo::getFixedStack(
8846	MF&: DAG.getMachineFunction(), FI: FrameIdx));
8847	Chain = Store;
8848
8849	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8850	"Expected an i32 store");
8851
8852	RLI.Ptr = FIdx;
8853	RLI.Chain = Chain;
8854	RLI.MPI =
8855	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8856	RLI.Alignment = Align (`4`);
8857
8858	MachineMemOperand *MMO =
8859	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8860	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8861	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8862	Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8863	PPCISD::LFIWZX : PPCISD::LFIWAX,
8864	dl, DAG.getVTList(MVT::f64, MVT::Other),
8865	Ops, MVT::i32, MMO);
8866	Chain = Bits.getValue(R: `1`);
8867	} else
8868	Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8869
8870	SDValue FP = convertIntToFP(Op, Src: Bits, DAG, Subtarget, Chain);
8871	if (IsStrict)
8872	Chain = FP.getValue(R: `1`);
8873
8874	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8875	if (IsStrict)
8876	FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8877	DAG.getVTList(MVT::f32, MVT::Other),
8878	{Chain, FP, DAG.getIntPtrConstant(`0`, dl)}, Flags);
8879	else
8880	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8881	DAG.getIntPtrConstant(`0`, dl, /isTarget=/true));
8882	}
8883	return FP;
8884	}
8885
8886	assert(Src.getValueType() == MVT::i32 &&
8887	"Unhandled INT_TO_FP type in custom expander!");
8888	// Since we only generate this in 64-bit mode, we can take advantage of
8889	// 64-bit registers. In particular, sign extend the input value into the
8890	// 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8891	// then lfd it and fcfid it.
8892	MachineFunction &MF = DAG.getMachineFunction();
8893	MachineFrameInfo &MFI = MF.getFrameInfo();
8894	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8895
8896	SDValue Ld;
8897	if (Subtarget.hasLFIWAX() \|\| Subtarget.hasFPCVT()) {
8898	ReuseLoadInfo RLI;
8899	bool ReusingLoad;
8900	if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8901	int FrameIdx = MFI.CreateStackObject(Size: `4`, Alignment: Align (`4`), isSpillSlot: false);
8902	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8903
8904	SDValue Store = DAG.getStore(Chain, dl, Val: Src, Ptr: FIdx,
8905	PtrInfo: MachinePointerInfo::getFixedStack(
8906	MF&: DAG.getMachineFunction(), FI: FrameIdx));
8907	Chain = Store;
8908
8909	assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8910	"Expected an i32 store");
8911
8912	RLI.Ptr = FIdx;
8913	RLI.Chain = Chain;
8914	RLI.MPI =
8915	MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx);
8916	RLI.Alignment = Align (`4`);
8917	}
8918
8919	MachineMemOperand *MMO =
8920	MF.getMachineMemOperand(PtrInfo: RLI.MPI, F: MachineMemOperand::MOLoad, Size: `4`,
8921	BaseAlignment: RLI.Alignment, AAInfo: RLI.AAInfo, Ranges: RLI.Ranges);
8922	SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8923	Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8924	DAG.getVTList(MVT::f64, MVT::Other), Ops,
8925	MVT::i32, MMO);
8926	Chain = Ld.getValue(R: `1`);
8927	if (ReusingLoad)
8928	spliceIntoChain(ResChain: RLI.ResChain, NewResChain: Ld.getValue(R: `1`), DAG);
8929	} else {
8930	assert(Subtarget.isPPC64() &&
8931	"i32->FP without LFIWAX supported only on PPC64");
8932
8933	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
8934	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
8935
8936	SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8937
8938	// STD the extended value into the stack slot.
8939	SDValue Store = DAG.getStore(
8940	Chain, dl, Val: Ext64, Ptr: FIdx,
8941	PtrInfo: MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: FrameIdx));
8942	Chain = Store;
8943
8944	// Load the value as a double.
8945	Ld = DAG.getLoad(
8946	MVT::f64, dl, Chain, FIdx,
8947	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8948	Chain = Ld.getValue(R: `1`);
8949	}
8950
8951	// FCFID it and return it.
8952	SDValue FP = convertIntToFP(Op, Src: Ld, DAG, Subtarget, Chain);
8953	if (IsStrict)
8954	Chain = FP.getValue(R: `1`);
8955	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8956	if (IsStrict)
8957	FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8958	DAG.getVTList(MVT::f32, MVT::Other),
8959	{Chain, FP, DAG.getIntPtrConstant(`0`, dl)}, Flags);
8960	else
8961	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8962	DAG.getIntPtrConstant(`0`, dl, /isTarget=/true));
8963	}
8964	return FP;
8965	}
8966
8967	SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8968	SelectionDAG &DAG) const {
8969	SDLoc dl(Op);
8970	/*
8971	The rounding mode is in bits 30:31 of FPSR, and has the following
8972	settings:
8973	00 Round to nearest
8974	01 Round to 0
8975	10 Round to +inf
8976	11 Round to -inf
8977
8978	GET_ROUNDING, on the other hand, expects the following:
8979	-1 Undefined
8980	0 Round to 0
8981	1 Round to nearest
8982	2 Round to +inf
8983	3 Round to -inf
8984
8985	To perform the conversion, we do:
8986	((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8987	*/
8988
8989	MachineFunction &MF = DAG.getMachineFunction();
8990	EVT VT = Op.getValueType();
8991	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
8992
8993	// Save FP Control Word to register
8994	SDValue Chain = Op.getOperand(i: `0`);
8995	SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8996	Chain = MFFS.getValue(R: `1`);
8997
8998	SDValue CWD;
8999	if (isTypeLegal(MVT::i64)) {
9000	CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9001	DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9002	} else {
9003	// Save FP register to stack slot
9004	int SSFI = MF.getFrameInfo().CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
9005	SDValue StackSlot = DAG.getFrameIndex(FI: SSFI, VT: PtrVT);
9006	Chain = DAG.getStore(Chain, dl, Val: MFFS, Ptr: StackSlot, PtrInfo: MachinePointerInfo ());
9007
9008	// Load FP Control Word from low 32 bits of stack slot.
9009	assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9010	"Stack slot adjustment is valid only on big endian subtargets!");
9011	SDValue Four = DAG.getConstant(Val: `4`, DL: dl, VT: PtrVT);
9012	SDValue Addr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: StackSlot, N2: Four);
9013	CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9014	Chain = CWD.getValue(R: `1`);
9015	}
9016
9017	// Transform as necessary
9018	SDValue CWD1 =
9019	DAG.getNode(ISD::AND, dl, MVT::i32,
9020	CWD, DAG.getConstant(`3`, dl, MVT::i32));
9021	SDValue CWD2 =
9022	DAG.getNode(ISD::SRL, dl, MVT::i32,
9023	DAG.getNode(ISD::AND, dl, MVT::i32,
9024	DAG.getNode(ISD::XOR, dl, MVT::i32,
9025	CWD, DAG.getConstant(`3`, dl, MVT::i32)),
9026	DAG.getConstant(`3`, dl, MVT::i32)),
9027	DAG.getConstant(`1`, dl, MVT::i32));
9028
9029	SDValue RetVal =
9030	DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9031
9032	RetVal =
9033	DAG.getNode(Opcode: (VT.getSizeInBits() < `16` ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9034	DL: dl, VT, Operand: RetVal);
9035
9036	return DAG.getMergeValues(Ops: {RetVal, Chain}, dl);
9037	}
9038
9039	SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9040	EVT VT = Op.getValueType();
9041	unsigned BitWidth = VT.getSizeInBits();
9042	SDLoc dl(Op);
9043	assert(Op.getNumOperands() == `3` &&
9044	VT == Op.getOperand(`1`).getValueType() &&
9045	"Unexpected SHL!");
9046
9047	// Expand into a bunch of logical ops. Note that these ops
9048	// depend on the PPC behavior for oversized shift amounts.
9049	SDValue Lo = Op.getOperand(i: `0`);
9050	SDValue Hi = Op.getOperand(i: `1`);
9051	SDValue Amt = Op.getOperand(i: `2`);
9052	EVT AmtVT = Amt.getValueType();
9053
9054	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9055	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9056	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Amt);
9057	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Tmp1);
9058	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR , DL: dl, VT, N1: Tmp2, N2: Tmp3);
9059	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9060	N2: DAG.getConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9061	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Tmp5);
9062	SDValue OutHi = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9063	SDValue OutLo = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Lo, N2: Amt);
9064	SDValue OutOps[] = { OutLo, OutHi };
9065	return DAG.getMergeValues(Ops: OutOps, dl);
9066	}
9067
9068	SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9069	EVT VT = Op.getValueType();
9070	SDLoc dl(Op);
9071	unsigned BitWidth = VT.getSizeInBits();
9072	assert(Op.getNumOperands() == `3` &&
9073	VT == Op.getOperand(`1`).getValueType() &&
9074	"Unexpected SRL!");
9075
9076	// Expand into a bunch of logical ops. Note that these ops
9077	// depend on the PPC behavior for oversized shift amounts.
9078	SDValue Lo = Op.getOperand(i: `0`);
9079	SDValue Hi = Op.getOperand(i: `1`);
9080	SDValue Amt = Op.getOperand(i: `2`);
9081	EVT AmtVT = Amt.getValueType();
9082
9083	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9084	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9085	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9086	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9087	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9088	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9089	N2: DAG.getConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9090	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Tmp5);
9091	SDValue OutLo = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp6);
9092	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Hi, N2: Amt);
9093	SDValue OutOps[] = { OutLo, OutHi };
9094	return DAG.getMergeValues(Ops: OutOps, dl);
9095	}
9096
9097	SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9098	SDLoc dl(Op);
9099	EVT VT = Op.getValueType();
9100	unsigned BitWidth = VT.getSizeInBits();
9101	assert(Op.getNumOperands() == `3` &&
9102	VT == Op.getOperand(`1`).getValueType() &&
9103	"Unexpected SRA!");
9104
9105	// Expand into a bunch of logical ops, followed by a select_cc.
9106	SDValue Lo = Op.getOperand(i: `0`);
9107	SDValue Hi = Op.getOperand(i: `1`);
9108	SDValue Amt = Op.getOperand(i: `2`);
9109	EVT AmtVT = Amt.getValueType();
9110
9111	SDValue Tmp1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT,
9112	N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Amt);
9113	SDValue Tmp2 = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Lo, N2: Amt);
9114	SDValue Tmp3 = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: Hi, N2: Tmp1);
9115	SDValue Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9116	SDValue Tmp5 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: AmtVT, N1: Amt,
9117	N2: DAG.getConstant(Val: -BitWidth, DL: dl, VT: AmtVT));
9118	SDValue Tmp6 = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Tmp5);
9119	SDValue OutHi = DAG.getNode(Opcode: PPCISD::SRA, DL: dl, VT, N1: Hi, N2: Amt);
9120	SDValue OutLo = DAG.getSelectCC(DL: dl, LHS: Tmp5, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: AmtVT),
9121	True: Tmp4, False: Tmp6, Cond: ISD::SETLE);
9122	SDValue OutOps[] = { OutLo, OutHi };
9123	return DAG.getMergeValues(Ops: OutOps, dl);
9124	}
9125
9126	SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9127	SelectionDAG &DAG) const {
9128	SDLoc dl(Op);
9129	EVT VT = Op.getValueType();
9130	unsigned BitWidth = VT.getSizeInBits();
9131
9132	bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9133	SDValue X = Op.getOperand(i: `0`);
9134	SDValue Y = Op.getOperand(i: `1`);
9135	SDValue Z = Op.getOperand(i: `2`);
9136	EVT AmtVT = Z.getValueType();
9137
9138	// fshl: (X << (Z % BW)) \| (Y >> (BW - (Z % BW)))
9139	// fshr: (X << (BW - (Z % BW))) \| (Y >> (Z % BW))
9140	// This is simpler than TargetLowering::expandFunnelShift because we can rely
9141	// on PowerPC shift by BW being well defined.
9142	Z = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Z,
9143	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT: AmtVT));
9144	SDValue SubZ =
9145	DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: AmtVT, N1: DAG.getConstant(Val: BitWidth, DL: dl, VT: AmtVT), N2: Z);
9146	X = DAG.getNode(Opcode: PPCISD::SHL, DL: dl, VT, N1: X, N2: IsFSHL ? Z : SubZ);
9147	Y = DAG.getNode(Opcode: PPCISD::SRL, DL: dl, VT, N1: Y, N2: IsFSHL ? SubZ : Z);
9148	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: X, N2: Y);
9149	}
9150
9151	//===----------------------------------------------------------------------===//
9152	// Vector related lowering.
9153	//
9154
9155	/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9156	/// element size of SplatSize. Cast the result to VT.
9157	static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9158	SelectionDAG &DAG, const SDLoc &dl) {
9159	static const MVT VTys[] = { // canonical VT to use for each size.
9160	MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9161	};
9162
9163	EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-`1`];
9164
9165	// For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9166	if (Val == ((`1LLU` << (SplatSize * `8`)) - `1`)) {
9167	SplatSize = `1`;
9168	Val = `0xFF`;
9169	}
9170
9171	EVT CanonicalVT = VTys[SplatSize-`1`];
9172
9173	// Build a canonical splat for this value.
9174	return DAG.getBitcast(VT: ReqVT, V: DAG.getConstant(Val, DL: dl, VT: CanonicalVT));
9175	}
9176
9177	/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9178	/// specified intrinsic ID.
9179	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9180	const SDLoc &dl, EVT DestVT = MVT::Other) {
9181	if (DestVT == MVT::Other) DestVT = Op.getValueType();
9182	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9183	DAG.getConstant(IID, dl, MVT::i32), Op);
9184	}
9185
9186	/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9187	/// specified intrinsic ID.
9188	static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9189	SelectionDAG &DAG, const SDLoc &dl,
9190	EVT DestVT = MVT::Other) {
9191	if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9192	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9193	DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9194	}
9195
9196	/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9197	/// specified intrinsic ID.
9198	static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9199	SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9200	EVT DestVT = MVT::Other) {
9201	if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9202	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9203	DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9204	}
9205
9206	/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9207	/// amount. The result has the specified value type.
9208	static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9209	SelectionDAG &DAG, const SDLoc &dl) {
9210	// Force LHS/RHS to be the right type.
9211	LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9212	RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9213
9214	int Ops[`16`];
9215	for (unsigned i = `0`; i != `16`; ++i)
9216	Ops[i] = i + Amt;
9217	SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9218	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9219	}
9220
9221	/// Do we have an efficient pattern in a .td file for this node?
9222	///
9223	/// \param V - pointer to the BuildVectorSDNode being matched
9224	/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9225	///
9226	/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9227	/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9228	/// the opposite is true (expansion is beneficial) are:
9229	/// - The node builds a vector out of integers that are not 32 or 64-bits
9230	/// - The node builds a vector out of constants
9231	/// - The node is a "load-and-splat"
9232	/// In all other cases, we will choose to keep the BUILD_VECTOR.
9233	static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9234	bool HasDirectMove,
9235	bool HasP8Vector) {
9236	EVT VecVT = V->getValueType(ResNo: `0`);
9237	bool RightType = VecVT == MVT::v2f64 \|\|
9238	(HasP8Vector && VecVT == MVT::v4f32) \|\|
9239	(HasDirectMove && (VecVT == MVT::v2i64 \|\| VecVT == MVT::v4i32));
9240	if (!RightType)
9241	return false;
9242
9243	bool IsSplat = true;
9244	bool IsLoad = false;
9245	SDValue Op0 = V->getOperand(Num: `0`);
9246
9247	// This function is called in a block that confirms the node is not a constant
9248	// splat. So a constant BUILD_VECTOR here means the vector is built out of
9249	// different constants.
9250	if (V->isConstant())
9251	return false;
9252	for (int i = `0`, e = V->getNumOperands(); i < e; ++i) {
9253	if (V->getOperand(Num: i).isUndef())
9254	return false;
9255	// We want to expand nodes that represent load-and-splat even if the
9256	// loaded value is a floating point truncation or conversion to int.
9257	if (V->getOperand(Num: i).getOpcode() == ISD::LOAD \|\|
9258	(V->getOperand(Num: i).getOpcode() == ISD::FP_ROUND &&
9259	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9260	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_SINT &&
9261	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD) \|\|
9262	(V->getOperand(Num: i).getOpcode() == ISD::FP_TO_UINT &&
9263	V->getOperand(Num: i).getOperand(i: `0`).getOpcode() == ISD::LOAD))
9264	IsLoad = true;
9265	// If the operands are different or the input is not a load and has more
9266	// uses than just this BV node, then it isn't a splat.
9267	if (V->getOperand(Num: i) != Op0 \|\|
9268	(!IsLoad && !V->isOnlyUserOf(N: V->getOperand(Num: i).getNode())))
9269	IsSplat = false;
9270	}
9271	return !(IsSplat && IsLoad);
9272	}
9273
9274	// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9275	SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9276
9277	SDLoc dl(Op);
9278	SDValue Op0 = Op ->getOperand(Num: `0`);
9279
9280	if ((Op.getValueType() != MVT::f128) \|\|
9281	(Op0.getOpcode() != ISD::BUILD_PAIR) \|\|
9282	(Op0.getOperand(`0`).getValueType() != MVT::i64) \|\|
9283	(Op0.getOperand(`1`).getValueType() != MVT::i64))
9284	return SDValue ();
9285
9286	return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(`0`),
9287	Op0.getOperand(`1`));
9288	}
9289
9290	static const SDValue getNormalLoadInput(const* SDValue &Op, bool &IsPermuted) {
9291	const SDValue *InputLoad = &Op;
9292	while (InputLoad->getOpcode() == ISD::BITCAST)
9293	InputLoad = &InputLoad->getOperand(i: `0`);
9294	if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR \|\|
9295	InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9296	IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9297	InputLoad = &InputLoad->getOperand(i: `0`);
9298	}
9299	if (InputLoad->getOpcode() != ISD::LOAD)
9300	return nullptr;
9301	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9302	return ISD::isNormalLoad(N: LD) ? InputLoad : nullptr;
9303	}
9304
9305	// Convert the argument APFloat to a single precision APFloat if there is no
9306	// loss in information during the conversion to single precision APFloat and the
9307	// resulting number is not a denormal number. Return true if successful.
9308	bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9309	APFloat APFloatToConvert = ArgAPFloat;
9310	bool LosesInfo = true;
9311	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9312	losesInfo: &LosesInfo);
9313	bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9314	if (Success)
9315	ArgAPFloat = APFloatToConvert;
9316	return Success;
9317	}
9318
9319	// Bitcast the argument APInt to a double and convert it to a single precision
9320	// APFloat, bitcast the APFloat to an APInt and assign it to the original
9321	// argument if there is no loss in information during the conversion from
9322	// double to single precision APFloat and the resulting number is not a denormal
9323	// number. Return true if successful.
9324	bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9325	double DpValue = ArgAPInt.bitsToDouble();
9326	APFloat APFloatDp(DpValue);
9327	bool Success = convertToNonDenormSingle(ArgAPFloat&: APFloatDp);
9328	if (Success)
9329	ArgAPInt = APFloatDp.bitcastToAPInt();
9330	return Success;
9331	}
9332
9333	// Nondestructive check for convertTonNonDenormSingle.
9334	bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9335	// Only convert if it loses info, since XXSPLTIDP should
9336	// handle the other case.
9337	APFloat APFloatToConvert = ArgAPFloat;
9338	bool LosesInfo = true;
9339	APFloatToConvert.convert(ToSemantics: APFloat::IEEEsingle(), RM: APFloat::rmNearestTiesToEven,
9340	losesInfo: &LosesInfo);
9341
9342	return (!LosesInfo && !APFloatToConvert.isDenormal());
9343	}
9344
9345	static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9346	unsigned &Opcode) {
9347	LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Val: Op.getOperand(i: `0`));
9348	if (!InputNode \|\| !Subtarget.hasVSX() \|\| !ISD::isUNINDEXEDLoad(N: InputNode))
9349	return false;
9350
9351	EVT Ty = Op ->getValueType(ResNo: `0`);
9352	// For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9353	// as we cannot handle extending loads for these types.
9354	if ((Ty == MVT::v2f64 \|\| Ty == MVT::v4f32 \|\| Ty == MVT::v4i32) &&
9355	ISD::isNON_EXTLoad(InputNode))
9356	return true;
9357
9358	EVT MemVT = InputNode->getMemoryVT();
9359	// For v8i16 and v16i8 types, extending loads can be handled as long as the
9360	// memory VT is the same vector element VT type.
9361	// The loads feeding into the v8i16 and v16i8 types will be extending because
9362	// scalar i8/i16 are not legal types.
9363	if ((Ty == MVT::v8i16 \|\| Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9364	(MemVT == Ty.getVectorElementType()))
9365	return true;
9366
9367	if (Ty == MVT::v2i64) {
9368	// Check the extend type, when the input type is i32, and the output vector
9369	// type is v2i64.
9370	if (MemVT == MVT::i32) {
9371	if (ISD::isZEXTLoad(N: InputNode))
9372	Opcode = PPCISD::ZEXT_LD_SPLAT;
9373	if (ISD::isSEXTLoad(N: InputNode))
9374	Opcode = PPCISD::SEXT_LD_SPLAT;
9375	}
9376	return true;
9377	}
9378	return false;
9379	}
9380
9381	// If this is a case we can't handle, return null and let the default
9382	// expansion code take care of it. If we CAN select this case, and if it
9383	// selects to a single instruction, return Op. Otherwise, if we can codegen
9384	// this case more efficiently than a constant pool load, lower it to the
9385	// sequence of ops that should be used.
9386	SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9387	SelectionDAG &DAG) const {
9388	SDLoc dl(Op);
9389	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: Op.getNode());
9390	assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9391
9392	// Check if this is a splat of a constant value.
9393	APInt APSplatBits, APSplatUndef;
9394	unsigned SplatBitSize;
9395	bool HasAnyUndefs;
9396	bool BVNIsConstantSplat =
9397	BVN->isConstantSplat(SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize,
9398	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian());
9399
9400	// If it is a splat of a double, check if we can shrink it to a 32 bit
9401	// non-denormal float which when converted back to double gives us the same
9402	// double. This is to exploit the XXSPLTIDP instruction.
9403	// If we lose precision, we use XXSPLTI32DX.
9404	if (BVNIsConstantSplat && (SplatBitSize == `64`) &&
9405	Subtarget.hasPrefixInstrs()) {
9406	// Check the type first to short-circuit so we don't modify APSplatBits if
9407	// this block isn't executed.
9408	if ((Op->getValueType(`0`) == MVT::v2f64) &&
9409	convertToNonDenormSingle(APSplatBits)) {
9410	SDValue SplatNode = DAG.getNode(
9411	PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9412	DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9413	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9414	} else {
9415	// We may lose precision, so we have to use XXSPLTI32DX.
9416
9417	uint32_t Hi =
9418	(uint32_t)((APSplatBits.getZExtValue() & `0xFFFFFFFF00000000LL`) >> `32`);
9419	uint32_t Lo =
9420	(uint32_t)(APSplatBits.getZExtValue() & `0xFFFFFFFF`);
9421	SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9422
9423	if (!Hi \|\| !Lo)
9424	// If either load is 0, then we should generate XXLXOR to set to 0.
9425	SplatNode = DAG.getTargetConstant(`0`, dl, MVT::v2i64);
9426
9427	if (Hi)
9428	SplatNode = DAG.getNode(
9429	PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9430	DAG.getTargetConstant(`0`, dl, MVT::i32),
9431	DAG.getTargetConstant(Hi, dl, MVT::i32));
9432
9433	if (Lo)
9434	SplatNode =
9435	DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9436	DAG.getTargetConstant(`1`, dl, MVT::i32),
9437	DAG.getTargetConstant(Lo, dl, MVT::i32));
9438
9439	return DAG.getBitcast(VT: Op.getValueType(), V: SplatNode);
9440	}
9441	}
9442
9443	if (!BVNIsConstantSplat \|\| SplatBitSize > `32`) {
9444	unsigned NewOpcode = PPCISD::LD_SPLAT;
9445
9446	// Handle load-and-splat patterns as we have instructions that will do this
9447	// in one go.
9448	if (DAG.isSplatValue(V: Op, AllowUndefs: true) &&
9449	isValidSplatLoad(Subtarget, Op, Opcode&: NewOpcode)) {
9450	const SDValue *InputLoad = &Op.getOperand(i: `0`);
9451	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
9452
9453	// If the input load is an extending load, it will be an i32 -> i64
9454	// extending load and isValidSplatLoad() will update NewOpcode.
9455	unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9456	unsigned ElementSize =
9457	MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? `1` : `2`);
9458
9459	assert(((ElementSize == `2` * MemorySize)
9460	? (NewOpcode == PPCISD::ZEXT_LD_SPLAT \|\|
9461	NewOpcode == PPCISD::SEXT_LD_SPLAT)
9462	: (NewOpcode == PPCISD::LD_SPLAT)) &&
9463	"Unmatched element size and opcode!\n");
9464
9465	// Checking for a single use of this load, we have to check for vector
9466	// width (128 bits) / ElementSize uses (since each operand of the
9467	// BUILD_VECTOR is a separate use of the value.
9468	unsigned NumUsesOfInputLD = `128` / ElementSize;
9469	for (SDValue BVInOp : Op ->ops())
9470	if (BVInOp.isUndef())
9471	NumUsesOfInputLD--;
9472
9473	// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9474	// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9475	// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9476	// 15", but function IsValidSplatLoad() now will only return true when
9477	// the data at index 0 is not nullptr. So we will not get into trouble for
9478	// these cases.
9479	//
9480	// case 1 - lfiwzx/lfiwax
9481	// 1.1: load result is i32 and is sign/zero extend to i64;
9482	// 1.2: build a v2i64 vector type with above loaded value;
9483	// 1.3: the vector has only one value at index 0, others are all undef;
9484	// 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9485	if (NumUsesOfInputLD == `1` &&
9486	(Op->getValueType(`0`) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9487	!Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9488	Subtarget.hasLFIWAX()))
9489	return SDValue ();
9490
9491	// case 2 - lxvr[hb]x
9492	// 2.1: load result is at most i16;
9493	// 2.2: build a vector with above loaded value;
9494	// 2.3: the vector has only one value at index 0, others are all undef;
9495	// 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9496	if (NumUsesOfInputLD == `1` && Subtarget.isLittleEndian() &&
9497	Subtarget.isISA3_1() && ElementSize <= `16`)
9498	return SDValue ();
9499
9500	assert(NumUsesOfInputLD > `0` && "No uses of input LD of a build_vector?");
9501	if (InputLoad->getNode()->hasNUsesOfValue(NUses: NumUsesOfInputLD, Value: `0`) &&
9502	Subtarget.hasVSX()) {
9503	SDValue Ops[] = {
9504	LD->getChain(), // Chain
9505	LD->getBasePtr(), // Ptr
9506	DAG.getValueType(Op.getValueType()) // VT
9507	};
9508	SDValue LdSplt = DAG.getMemIntrinsicNode(
9509	NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9510	LD->getMemoryVT(), LD->getMemOperand());
9511	// Replace all uses of the output chain of the original load with the
9512	// output chain of the new load.
9513	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`),
9514	To: LdSplt.getValue(R: `1`));
9515	return LdSplt;
9516	}
9517	}
9518
9519	// In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9520	// 32-bits can be lowered to VSX instructions under certain conditions.
9521	// Without VSX, there is no pattern more efficient than expanding the node.
9522	if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9523	haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9524	Subtarget.hasP8Vector()))
9525	return Op;
9526	return SDValue ();
9527	}
9528
9529	uint64_t SplatBits = APSplatBits.getZExtValue();
9530	uint64_t SplatUndef = APSplatUndef.getZExtValue();
9531	unsigned SplatSize = SplatBitSize / `8`;
9532
9533	// First, handle single instruction cases.
9534
9535	// All zeros?
9536	if (SplatBits == `0`) {
9537	// Canonicalize all zero vectors to be v4i32.
9538	if (Op.getValueType() != MVT::v4i32 \|\| HasAnyUndefs) {
9539	SDValue Z = DAG.getConstant(`0`, dl, MVT::v4i32);
9540	Op = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Z);
9541	}
9542	return Op;
9543	}
9544
9545	// We have XXSPLTIW for constant splats four bytes wide.
9546	// Given vector length is a multiple of 4, 2-byte splats can be replaced
9547	// with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9548	// make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9549	// turned into a 4-byte splat of 0xABABABAB.
9550	if (Subtarget.hasPrefixInstrs() && SplatSize == `2`)
9551	return getCanonicalConstSplat(Val: SplatBits \| (SplatBits << `16`), SplatSize: SplatSize * `2`,
9552	VT: Op.getValueType(), DAG, dl);
9553
9554	if (Subtarget.hasPrefixInstrs() && SplatSize == `4`)
9555	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9556	dl);
9557
9558	// We have XXSPLTIB for constant splats one byte wide.
9559	if (Subtarget.hasP9Vector() && SplatSize == `1`)
9560	return getCanonicalConstSplat(Val: SplatBits, SplatSize, VT: Op.getValueType(), DAG,
9561	dl);
9562
9563	// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9564	int32_t SextVal= (int32_t(SplatBits << (`32`-SplatBitSize)) >>
9565	(`32`-SplatBitSize));
9566	if (SextVal >= -`16` && SextVal <= `15`)
9567	return getCanonicalConstSplat(Val: SextVal, SplatSize, VT: Op.getValueType(), DAG,
9568	dl);
9569
9570	// Two instruction sequences.
9571
9572	// If this value is in the range [-32,30] and is even, use:
9573	// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9574	// If this value is in the range [17,31] and is odd, use:
9575	// VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9576	// If this value is in the range [-31,-17] and is odd, use:
9577	// VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9578	// Note the last two are three-instruction sequences.
9579	if (SextVal >= -`32` && SextVal <= `31`) {
9580	// To avoid having these optimizations undone by constant folding,
9581	// we convert to a pseudo that will be expanded later into one of
9582	// the above forms.
9583	SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9584	EVT VT = (SplatSize == `1` ? MVT::v16i8 :
9585	(SplatSize == `2` ? MVT::v8i16 : MVT::v4i32));
9586	SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9587	SDValue RetVal = DAG.getNode(Opcode: PPCISD::VADD_SPLAT, DL: dl, VT, N1: Elt, N2: EltSize);
9588	if (VT == Op.getValueType())
9589	return RetVal;
9590	else
9591	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: RetVal);
9592	}
9593
9594	// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9595	// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9596	// for fneg/fabs.
9597	if (SplatSize == `4` && SplatBits == (`0x7FFFFFFF`&~SplatUndef)) {
9598	// Make -1 and vspltisw -1:
9599	SDValue OnesV = getCanonicalConstSplat(-`1`, `4`, MVT::v4i32, DAG, dl);
9600
9601	// Make the VSLW intrinsic, computing 0x8000_0000.
9602	SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9603	OnesV, DAG, dl);
9604
9605	// xor by OnesV to invert it.
9606	Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9607	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9608	}
9609
9610	// Check to see if this is a wide variety of vsplti, binop self cases.*
9611	static const signed char SplatCsts[] = {
9612	-`1`, `1`, -`2`, `2`, -`3`, `3`, -`4`, `4`, -`5`, `5`, -`6`, `6`, -`7`, `7`,
9613	-`8`, `8`, -`9`, `9`, -`10`, `10`, -`11`, `11`, -`12`, `12`, -`13`, `13`, `14`, -`14`, `15`, -`15`, -`16`
9614	};
9615
9616	for (unsigned idx = `0`; idx < std::size(SplatCsts); ++idx) {
9617	// Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9618	// cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9619	int i = SplatCsts[idx];
9620
9621	// Figure out what shift amount will be used by altivec if shifted by i in
9622	// this splat size.
9623	unsigned TypeShiftAmt = i & (SplatBitSize-`1`);
9624
9625	// vsplti + shl self.
9626	if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9627	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9628	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9629	Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, `0`,
9630	Intrinsic::ppc_altivec_vslw
9631	};
9632	Res = BuildIntrinsicOp(IIDs[SplatSize-`1`], Res, Res, DAG, dl);
9633	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9634	}
9635
9636	// vsplti + srl self.
9637	if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9638	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9639	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9640	Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, `0`,
9641	Intrinsic::ppc_altivec_vsrw
9642	};
9643	Res = BuildIntrinsicOp(IIDs[SplatSize-`1`], Res, Res, DAG, dl);
9644	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9645	}
9646
9647	// vsplti + rol self.
9648	if (SextVal == (int)(((unsigned)i << TypeShiftAmt) \|
9649	((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9650	SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9651	static const unsigned IIDs[] = { // Intrinsic to use for each size.
9652	Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, `0`,
9653	Intrinsic::ppc_altivec_vrlw
9654	};
9655	Res = BuildIntrinsicOp(IIDs[SplatSize-`1`], Res, Res, DAG, dl);
9656	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Res);
9657	}
9658
9659	// t = vsplti c, result = vsldoi t, t, 1
9660	if (SextVal == (int)(((unsigned)i << `8`) \| (i < `0` ? `0xFF` : `0`))) {
9661	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9662	unsigned Amt = Subtarget.isLittleEndian() ? `15` : `1`;
9663	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9664	}
9665	// t = vsplti c, result = vsldoi t, t, 2
9666	if (SextVal == (int)(((unsigned)i << `16`) \| (i < `0` ? `0xFFFF` : `0`))) {
9667	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9668	unsigned Amt = Subtarget.isLittleEndian() ? `14` : `2`;
9669	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9670	}
9671	// t = vsplti c, result = vsldoi t, t, 3
9672	if (SextVal == (int)(((unsigned)i << `24`) \| (i < `0` ? `0xFFFFFF` : `0`))) {
9673	SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9674	unsigned Amt = Subtarget.isLittleEndian() ? `13` : `3`;
9675	return BuildVSLDOI(LHS: T, RHS: T, Amt, VT: Op.getValueType(), DAG, dl);
9676	}
9677	}
9678
9679	return SDValue ();
9680	}
9681
9682	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9683	/// the specified operations to build the shuffle.
9684	static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9685	SDValue RHS, SelectionDAG &DAG,
9686	const SDLoc &dl) {
9687	unsigned OpNum = (PFEntry >> `26`) & `0x0F`;
9688	unsigned LHSID = (PFEntry >> `13`) & ((`1` << `13`)-`1`);
9689	unsigned RHSID = (PFEntry >> `0`) & ((`1` << `13`)-`1`);
9690
9691	enum {
9692	OP_COPY = `0`, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9693	OP_VMRGHW,
9694	OP_VMRGLW,
9695	OP_VSPLTISW0,
9696	OP_VSPLTISW1,
9697	OP_VSPLTISW2,
9698	OP_VSPLTISW3,
9699	OP_VSLDOI4,
9700	OP_VSLDOI8,
9701	OP_VSLDOI12
9702	};
9703
9704	if (OpNum == OP_COPY) {
9705	if (LHSID == (`1``9`+`2`)`9`+`3`) return LHS;
9706	assert(LHSID == ((`4``9`+`5`)`9`+`6`)*`9`+`7` && "Illegal OP_COPY!");
9707	return RHS;
9708	}
9709
9710	SDValue OpLHS, OpRHS;
9711	OpLHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9712	OpRHS = GeneratePerfectShuffle(PFEntry: PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9713
9714	int ShufIdxs[`16`];
9715	switch (OpNum) {
9716	default: llvm_unreachable("Unknown i32 permute!");
9717	case OP_VMRGHW:
9718	ShufIdxs[ `0`] = `0`; ShufIdxs[ `1`] = `1`; ShufIdxs[ `2`] = `2`; ShufIdxs[ `3`] = `3`;
9719	ShufIdxs[ `4`] = `16`; ShufIdxs[ `5`] = `17`; ShufIdxs[ `6`] = `18`; ShufIdxs[ `7`] = `19`;
9720	ShufIdxs[ `8`] = `4`; ShufIdxs[ `9`] = `5`; ShufIdxs[`10`] = `6`; ShufIdxs[`11`] = `7`;
9721	ShufIdxs[`12`] = `20`; ShufIdxs[`13`] = `21`; ShufIdxs[`14`] = `22`; ShufIdxs[`15`] = `23`;
9722	break;
9723	case OP_VMRGLW:
9724	ShufIdxs[ `0`] = `8`; ShufIdxs[ `1`] = `9`; ShufIdxs[ `2`] = `10`; ShufIdxs[ `3`] = `11`;
9725	ShufIdxs[ `4`] = `24`; ShufIdxs[ `5`] = `25`; ShufIdxs[ `6`] = `26`; ShufIdxs[ `7`] = `27`;
9726	ShufIdxs[ `8`] = `12`; ShufIdxs[ `9`] = `13`; ShufIdxs[`10`] = `14`; ShufIdxs[`11`] = `15`;
9727	ShufIdxs[`12`] = `28`; ShufIdxs[`13`] = `29`; ShufIdxs[`14`] = `30`; ShufIdxs[`15`] = `31`;
9728	break;
9729	case OP_VSPLTISW0:
9730	for (unsigned i = `0`; i != `16`; ++i)
9731	ShufIdxs[i] = (i&`3`)+`0`;
9732	break;
9733	case OP_VSPLTISW1:
9734	for (unsigned i = `0`; i != `16`; ++i)
9735	ShufIdxs[i] = (i&`3`)+`4`;
9736	break;
9737	case OP_VSPLTISW2:
9738	for (unsigned i = `0`; i != `16`; ++i)
9739	ShufIdxs[i] = (i&`3`)+`8`;
9740	break;
9741	case OP_VSPLTISW3:
9742	for (unsigned i = `0`; i != `16`; ++i)
9743	ShufIdxs[i] = (i&`3`)+`12`;
9744	break;
9745	case OP_VSLDOI4:
9746	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `4`, VT: OpLHS.getValueType(), DAG, dl);
9747	case OP_VSLDOI8:
9748	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `8`, VT: OpLHS.getValueType(), DAG, dl);
9749	case OP_VSLDOI12:
9750	return BuildVSLDOI(LHS: OpLHS, RHS: OpRHS, Amt: `12`, VT: OpLHS.getValueType(), DAG, dl);
9751	}
9752	EVT VT = OpLHS.getValueType();
9753	OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9754	OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9755	SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9756	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: T);
9757	}
9758
9759	/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9760	/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9761	/// SDValue.
9762	SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9763	SelectionDAG &DAG) const {
9764	const unsigned BytesInVector = `16`;
9765	bool IsLE = Subtarget.isLittleEndian();
9766	SDLoc dl(N);
9767	SDValue V1 = N->getOperand(Num: `0`);
9768	SDValue V2 = N->getOperand(Num: `1`);
9769	unsigned ShiftElts = `0`, InsertAtByte = `0`;
9770	bool Swap = false;
9771
9772	// Shifts required to get the byte we want at element 7.
9773	unsigned LittleEndianShifts[] = {`8`, `7`, `6`, `5`, `4`, `3`, `2`, `1`,
9774	`0`, `15`, `14`, `13`, `12`, `11`, `10`, `9`};
9775	unsigned BigEndianShifts[] = {`9`, `10`, `11`, `12`, `13`, `14`, `15`, `0`,
9776	`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`};
9777
9778	ArrayRef<int> Mask = N->getMask();
9779	int OriginalOrder[] = {`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`};
9780
9781	// For each mask element, find out if we're just inserting something
9782	// from V2 into V1 or vice versa.
9783	// Possible permutations inserting an element from V2 into V1:
9784	// X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9785	// 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9786	// ...
9787	// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9788	// Inserting from V1 into V2 will be similar, except mask range will be
9789	// [16,31].
9790
9791	bool FoundCandidate = false;
9792	// If both vector operands for the shuffle are the same vector, the mask
9793	// will contain only elements from the first one and the second one will be
9794	// undef.
9795	unsigned VINSERTBSrcElem = IsLE ? `8` : `7`;
9796	// Go through the mask of half-words to find an element that's being moved
9797	// from one vector to the other.
9798	for (unsigned i = `0`; i < BytesInVector; ++i) {
9799	unsigned CurrentElement = Mask [i];
9800	// If 2nd operand is undefined, we should only look for element 7 in the
9801	// Mask.
9802	if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9803	continue;
9804
9805	bool OtherElementsInOrder = true;
9806	// Examine the other elements in the Mask to see if they're in original
9807	// order.
9808	for (unsigned j = `0`; j < BytesInVector; ++j) {
9809	if (j == i)
9810	continue;
9811	// If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9812	// from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9813	// in which we always assume we're always picking from the 1st operand.
9814	int MaskOffset =
9815	(!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : `0`;
9816	if (Mask [j] != OriginalOrder[j] + MaskOffset) {
9817	OtherElementsInOrder = false;
9818	break;
9819	}
9820	}
9821	// If other elements are in original order, we record the number of shifts
9822	// we need to get the element we want into element 7. Also record which byte
9823	// in the vector we should insert into.
9824	if (OtherElementsInOrder) {
9825	// If 2nd operand is undefined, we assume no shifts and no swapping.
9826	if (V2.isUndef()) {
9827	ShiftElts = `0`;
9828	Swap = false;
9829	} else {
9830	// Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9831	ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & `0xF`]
9832	: BigEndianShifts[CurrentElement & `0xF`];
9833	Swap = CurrentElement < BytesInVector;
9834	}
9835	InsertAtByte = IsLE ? BytesInVector - (i + `1`) : i;
9836	FoundCandidate = true;
9837	break;
9838	}
9839	}
9840
9841	if (!FoundCandidate)
9842	return SDValue ();
9843
9844	// Candidate found, construct the proper SDAG sequence with VINSERTB,
9845	// optionally with VECSHL if shift is required.
9846	if (Swap)
9847	std::swap(a&: V1, b&: V2);
9848	if (V2.isUndef())
9849	V2 = V1;
9850	if (ShiftElts) {
9851	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9852	DAG.getConstant(ShiftElts, dl, MVT::i32));
9853	return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9854	DAG.getConstant(InsertAtByte, dl, MVT::i32));
9855	}
9856	return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9857	DAG.getConstant(InsertAtByte, dl, MVT::i32));
9858	}
9859
9860	/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9861	/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9862	/// SDValue.
9863	SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9864	SelectionDAG &DAG) const {
9865	const unsigned NumHalfWords = `8`;
9866	const unsigned BytesInVector = NumHalfWords * `2`;
9867	// Check that the shuffle is on half-words.
9868	if (!isNByteElemShuffleMask(N, Width: `2`, StepLen: `1`))
9869	return SDValue ();
9870
9871	bool IsLE = Subtarget.isLittleEndian();
9872	SDLoc dl(N);
9873	SDValue V1 = N->getOperand(Num: `0`);
9874	SDValue V2 = N->getOperand(Num: `1`);
9875	unsigned ShiftElts = `0`, InsertAtByte = `0`;
9876	bool Swap = false;
9877
9878	// Shifts required to get the half-word we want at element 3.
9879	unsigned LittleEndianShifts[] = {`4`, `3`, `2`, `1`, `0`, `7`, `6`, `5`};
9880	unsigned BigEndianShifts[] = {`5`, `6`, `7`, `0`, `1`, `2`, `3`, `4`};
9881
9882	uint32_t Mask = `0`;
9883	uint32_t OriginalOrderLow = `0x1234567`;
9884	uint32_t OriginalOrderHigh = `0x89ABCDEF`;
9885	// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9886	// 32-bit space, only need 4-bit nibbles per element.
9887	for (unsigned i = `0`; i < NumHalfWords; ++i) {
9888	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
9889	Mask \|= ((uint32_t)(N->getMaskElt(Idx: i * `2`) / `2`) << MaskShift);
9890	}
9891
9892	// For each mask element, find out if we're just inserting something
9893	// from V2 into V1 or vice versa. Possible permutations inserting an element
9894	// from V2 into V1:
9895	// X, 1, 2, 3, 4, 5, 6, 7
9896	// 0, X, 2, 3, 4, 5, 6, 7
9897	// 0, 1, X, 3, 4, 5, 6, 7
9898	// 0, 1, 2, X, 4, 5, 6, 7
9899	// 0, 1, 2, 3, X, 5, 6, 7
9900	// 0, 1, 2, 3, 4, X, 6, 7
9901	// 0, 1, 2, 3, 4, 5, X, 7
9902	// 0, 1, 2, 3, 4, 5, 6, X
9903	// Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9904
9905	bool FoundCandidate = false;
9906	// Go through the mask of half-words to find an element that's being moved
9907	// from one vector to the other.
9908	for (unsigned i = `0`; i < NumHalfWords; ++i) {
9909	unsigned MaskShift = (NumHalfWords - `1` - i) * `4`;
9910	uint32_t MaskOneElt = (Mask >> MaskShift) & `0xF`;
9911	uint32_t MaskOtherElts = ~(`0xF` << MaskShift);
9912	uint32_t TargetOrder = `0x0`;
9913
9914	// If both vector operands for the shuffle are the same vector, the mask
9915	// will contain only elements from the first one and the second one will be
9916	// undef.
9917	if (V2.isUndef()) {
9918	ShiftElts = `0`;
9919	unsigned VINSERTHSrcElem = IsLE ? `4` : `3`;
9920	TargetOrder = OriginalOrderLow;
9921	Swap = false;
9922	// Skip if not the correct element or mask of other elements don't equal
9923	// to our expected order.
9924	if (MaskOneElt == VINSERTHSrcElem &&
9925	(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9926	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
9927	FoundCandidate = true;
9928	break;
9929	}
9930	} else { // If both operands are defined.
9931	// Target order is [8,15] if the current mask is between [0,7].
9932	TargetOrder =
9933	(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9934	// Skip if mask of other elements don't equal our expected order.
9935	if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9936	// We only need the last 3 bits for the number of shifts.
9937	ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & `0x7`]
9938	: BigEndianShifts[MaskOneElt & `0x7`];
9939	InsertAtByte = IsLE ? BytesInVector - (i + `1`) * `2` : i * `2`;
9940	Swap = MaskOneElt < NumHalfWords;
9941	FoundCandidate = true;
9942	break;
9943	}
9944	}
9945	}
9946
9947	if (!FoundCandidate)
9948	return SDValue ();
9949
9950	// Candidate found, construct the proper SDAG sequence with VINSERTH,
9951	// optionally with VECSHL if shift is required.
9952	if (Swap)
9953	std::swap(a&: V1, b&: V2);
9954	if (V2.isUndef())
9955	V2 = V1;
9956	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9957	if (ShiftElts) {
9958	// Double ShiftElts because we're left shifting on v16i8 type.
9959	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9960	DAG.getConstant(`2` * ShiftElts, dl, MVT::i32));
9961	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9962	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9963	DAG.getConstant(InsertAtByte, dl, MVT::i32));
9964	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9965	}
9966	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9967	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9968	DAG.getConstant(InsertAtByte, dl, MVT::i32));
9969	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9970	}
9971
9972	/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9973	/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9974	/// return the default SDValue.
9975	SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9976	SelectionDAG &DAG) const {
9977	// The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9978	// to v16i8. Peek through the bitcasts to get the actual operands.
9979	SDValue LHS = peekThroughBitcasts(V: SVN->getOperand(Num: `0`));
9980	SDValue RHS = peekThroughBitcasts(V: SVN->getOperand(Num: `1`));
9981
9982	auto ShuffleMask = SVN->getMask();
9983	SDValue VecShuffle(SVN, `0`);
9984	SDLoc DL(SVN);
9985
9986	// Check that we have a four byte shuffle.
9987	if (!isNByteElemShuffleMask(N: SVN, Width: `4`, StepLen: `1`))
9988	return SDValue ();
9989
9990	// Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9991	if (RHS ->getOpcode() != ISD::BUILD_VECTOR) {
9992	std::swap(a&: LHS, b&: RHS);
9993	VecShuffle = peekThroughBitcasts(V: DAG.getCommutedVectorShuffle(SV: *SVN));
9994	ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(Val&: VecShuffle);
9995	if (!CommutedSV)
9996	return SDValue ();
9997	ShuffleMask = CommutedSV->getMask();
9998	}
9999
10000	// Ensure that the RHS is a vector of constants.
10001	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Val: RHS.getNode());
10002	if (!BVN)
10003	return SDValue ();
10004
10005	// Check if RHS is a splat of 4-bytes (or smaller).
10006	APInt APSplatValue, APSplatUndef;
10007	unsigned SplatBitSize;
10008	bool HasAnyUndefs;
10009	if (!BVN->isConstantSplat(SplatValue&: APSplatValue, SplatUndef&: APSplatUndef, SplatBitSize,
10010	HasAnyUndefs, MinSplatBits: `0`, isBigEndian: !Subtarget.isLittleEndian()) \|\|
10011	SplatBitSize > `32`)
10012	return SDValue ();
10013
10014	// Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10015	// The instruction splats a constant C into two words of the source vector
10016	// producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10017	// Thus we check that the shuffle mask is the equivalent of
10018	// <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10019	// Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10020	// within each word are consecutive, so we only need to check the first byte.
10021	SDValue Index;
10022	bool IsLE = Subtarget.isLittleEndian();
10023	if ((ShuffleMask [`0`] == `0` && ShuffleMask [`8`] == `8`) &&
10024	(ShuffleMask [`4`] % `4` == `0` && ShuffleMask [`12`] % `4` == `0` &&
10025	ShuffleMask [`4`] > `15` && ShuffleMask [`12`] > `15`))
10026	Index = DAG.getTargetConstant(IsLE ? `0` : `1`, DL, MVT::i32);
10027	else if ((ShuffleMask [`4`] == `4` && ShuffleMask [`12`] == `12`) &&
10028	(ShuffleMask [`0`] % `4` == `0` && ShuffleMask [`8`] % `4` == `0` &&
10029	ShuffleMask [`0`] > `15` && ShuffleMask [`8`] > `15`))
10030	Index = DAG.getTargetConstant(IsLE ? `1` : `0`, DL, MVT::i32);
10031	else
10032	return SDValue ();
10033
10034	// If the splat is narrower than 32-bits, we need to get the 32-bit value
10035	// for XXSPLTI32DX.
10036	unsigned SplatVal = APSplatValue.getZExtValue();
10037	for (; SplatBitSize < `32`; SplatBitSize <<= `1`)
10038	SplatVal \|= (SplatVal << SplatBitSize);
10039
10040	SDValue SplatNode = DAG.getNode(
10041	PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10042	Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10043	return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10044	}
10045
10046	/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10047	/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10048	/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10049	/// i.e (or (shl x, C1), (srl x, 128-C1)).
10050	SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10051	assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10052	assert(Op.getValueType() == MVT::v1i128 &&
10053	"Only set v1i128 as custom, other type shouldn't reach here!");
10054	SDLoc dl(Op);
10055	SDValue N0 = peekThroughBitcasts(V: Op.getOperand(i: `0`));
10056	SDValue N1 = peekThroughBitcasts(V: Op.getOperand(i: `1`));
10057	unsigned SHLAmt = N1.getConstantOperandVal(i: `0`);
10058	if (SHLAmt % `8` == `0`) {
10059	std::array<int, `16`> Mask;
10060	std::iota(first: Mask.begin(), last: Mask.end(), value: `0`);
10061	std::rotate(first: Mask.begin(), middle: Mask.begin() + SHLAmt / `8`, last: Mask.end());
10062	if (SDValue Shuffle =
10063	DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10064	DAG.getUNDEF(MVT::v16i8), Mask))
10065	return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10066	}
10067	SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10068	SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10069	DAG.getConstant(SHLAmt, dl, MVT::i32));
10070	SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10071	DAG.getConstant(`128` - SHLAmt, dl, MVT::i32));
10072	SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10073	return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10074	}
10075
10076	/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10077	/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10078	/// return the code it can be lowered into. Worst case, it can always be
10079	/// lowered into a vperm.
10080	SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10081	SelectionDAG &DAG) const {
10082	SDLoc dl(Op);
10083	SDValue V1 = Op.getOperand(i: `0`);
10084	SDValue V2 = Op.getOperand(i: `1`);
10085	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10086
10087	// Any nodes that were combined in the target-independent combiner prior
10088	// to vector legalization will not be sent to the target combine. Try to
10089	// combine it here.
10090	if (SDValue NewShuffle = combineVectorShuffle(SVN: SVOp, DAG)) {
10091	if (!isa<ShuffleVectorSDNode>(Val: NewShuffle))
10092	return NewShuffle;
10093	Op = NewShuffle;
10094	SVOp = cast<ShuffleVectorSDNode>(Val&: Op);
10095	V1 = Op.getOperand(i: `0`);
10096	V2 = Op.getOperand(i: `1`);
10097	}
10098	EVT VT = Op.getValueType();
10099	bool isLittleEndian = Subtarget.isLittleEndian();
10100
10101	unsigned ShiftElts, InsertAtByte;
10102	bool Swap = false;
10103
10104	// If this is a load-and-splat, we can do that with a single instruction
10105	// in some cases. However if the load has multiple uses, we don't want to
10106	// combine it because that will just produce multiple loads.
10107	bool IsPermutedLoad = false;
10108	const SDValue *InputLoad = getNormalLoadInput(Op: V1, IsPermuted&: IsPermutedLoad);
10109	if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10110	(PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\| PPC::isSplatShuffleMask(N: SVOp, EltSize: `8`)) &&
10111	InputLoad->hasOneUse()) {
10112	bool IsFourByte = PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`);
10113	int SplatIdx =
10114	PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: IsFourByte ? `4` : `8`, DAG);
10115
10116	// The splat index for permuted loads will be in the left half of the vector
10117	// which is strictly wider than the loaded value by 8 bytes. So we need to
10118	// adjust the splat index to point to the correct address in memory.
10119	if (IsPermutedLoad) {
10120	assert((isLittleEndian \|\| IsFourByte) &&
10121	"Unexpected size for permuted load on big endian target");
10122	SplatIdx += IsFourByte ? `2` : `1`;
10123	assert((SplatIdx < (IsFourByte ? `4` : `2`)) &&
10124	"Splat of a value outside of the loaded memory");
10125	}
10126
10127	LoadSDNode LD = cast<LoadSDNode>(Val: InputLoad);
10128	// For 4-byte load-and-splat, we need Power9.
10129	if ((IsFourByte && Subtarget.hasP9Vector()) \|\| !IsFourByte) {
10130	uint64_t Offset = `0`;
10131	if (IsFourByte)
10132	Offset = isLittleEndian ? (`3` - SplatIdx) * `4` : SplatIdx * `4`;
10133	else
10134	Offset = isLittleEndian ? (`1` - SplatIdx) * `8` : SplatIdx * `8`;
10135
10136	// If the width of the load is the same as the width of the splat,
10137	// loading with an offset would load the wrong memory.
10138	if (LD->getValueType(ResNo: `0`).getSizeInBits() == (IsFourByte ? `32` : `64`))
10139	Offset = `0`;
10140
10141	SDValue BasePtr = LD->getBasePtr();
10142	if (Offset != `0`)
10143	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout()),
10144	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: Offset, DL: dl));
10145	SDValue Ops[] = {
10146	LD->getChain(), // Chain
10147	BasePtr, // BasePtr
10148	DAG.getValueType(Op.getValueType()) // VT
10149	};
10150	SDVTList VTL =
10151	DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10152	SDValue LdSplt =
10153	DAG.getMemIntrinsicNode(Opcode: PPCISD::LD_SPLAT, dl, VTList: VTL,
10154	Ops, MemVT: LD->getMemoryVT(), MMO: LD->getMemOperand());
10155	DAG.ReplaceAllUsesOfValueWith(From: InputLoad->getValue(R: `1`), To: LdSplt.getValue(R: `1`));
10156	if (LdSplt.getValueType() != SVOp->getValueType(ResNo: `0`))
10157	LdSplt = DAG.getBitcast(VT: SVOp->getValueType(ResNo: `0`), V: LdSplt);
10158	return LdSplt;
10159	}
10160	}
10161
10162	// All v2i64 and v2f64 shuffles are legal
10163	if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
10164	return Op;
10165
10166	if (Subtarget.hasP9Vector() &&
10167	PPC::isXXINSERTWMask(N: SVOp, ShiftElts, InsertAtByte, Swap,
10168	IsLE: isLittleEndian)) {
10169	if (V2.isUndef())
10170	V2 = V1;
10171	else if (Swap)
10172	std::swap(a&: V1, b&: V2);
10173	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10174	SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10175	if (ShiftElts) {
10176	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10177	DAG.getConstant(ShiftElts, dl, MVT::i32));
10178	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10179	DAG.getConstant(InsertAtByte, dl, MVT::i32));
10180	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10181	}
10182	SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10183	DAG.getConstant(InsertAtByte, dl, MVT::i32));
10184	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10185	}
10186
10187	if (Subtarget.hasPrefixInstrs()) {
10188	SDValue SplatInsertNode;
10189	if ((SplatInsertNode = lowerToXXSPLTI32DX(SVN: SVOp, DAG)))
10190	return SplatInsertNode;
10191	}
10192
10193	if (Subtarget.hasP9Altivec()) {
10194	SDValue NewISDNode;
10195	if ((NewISDNode = lowerToVINSERTH(N: SVOp, DAG)))
10196	return NewISDNode;
10197
10198	if ((NewISDNode = lowerToVINSERTB(N: SVOp, DAG)))
10199	return NewISDNode;
10200	}
10201
10202	if (Subtarget.hasVSX() &&
10203	PPC::isXXSLDWIShuffleMask(N: SVOp, ShiftElts, Swap, IsLE: isLittleEndian)) {
10204	if (Swap)
10205	std::swap(a&: V1, b&: V2);
10206	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10207	SDValue Conv2 =
10208	DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10209
10210	SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10211	DAG.getConstant(ShiftElts, dl, MVT::i32));
10212	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10213	}
10214
10215	if (Subtarget.hasVSX() &&
10216	PPC::isXXPERMDIShuffleMask(N: SVOp, DM&: ShiftElts, Swap, IsLE: isLittleEndian)) {
10217	if (Swap)
10218	std::swap(a&: V1, b&: V2);
10219	SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10220	SDValue Conv2 =
10221	DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10222
10223	SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10224	DAG.getConstant(ShiftElts, dl, MVT::i32));
10225	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10226	}
10227
10228	if (Subtarget.hasP9Vector()) {
10229	if (PPC::isXXBRHShuffleMask(N: SVOp)) {
10230	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10231	SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10232	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10233	} else if (PPC::isXXBRWShuffleMask(N: SVOp)) {
10234	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10235	SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10236	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10237	} else if (PPC::isXXBRDShuffleMask(N: SVOp)) {
10238	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10239	SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10240	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10241	} else if (PPC::isXXBRQShuffleMask(N: SVOp)) {
10242	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10243	SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10244	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10245	}
10246	}
10247
10248	if (Subtarget.hasVSX()) {
10249	if (V2.isUndef() && PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`)) {
10250	int SplatIdx = PPC::getSplatIdxForPPCMnemonics(N: SVOp, EltSize: `4`, DAG);
10251
10252	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10253	SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10254	DAG.getConstant(SplatIdx, dl, MVT::i32));
10255	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10256	}
10257
10258	// Left shifts of 8 bytes are actually swaps. Convert accordingly.
10259	if (V2.isUndef() && PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) == `8`) {
10260	SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10261	SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10262	return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10263	}
10264	}
10265
10266	// Cases that are handled by instructions that take permute immediates
10267	// (such as vsplt) should be left as VECTOR_SHUFFLE nodes so they can be*
10268	// selected by the instruction selector.
10269	if (V2.isUndef()) {
10270	if (PPC::isSplatShuffleMask(N: SVOp, EltSize: `1`) \|\|
10271	PPC::isSplatShuffleMask(N: SVOp, EltSize: `2`) \|\|
10272	PPC::isSplatShuffleMask(N: SVOp, EltSize: `4`) \|\|
10273	PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10274	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10275	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) != -`1` \|\|
10276	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10277	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10278	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10279	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind: `1`, DAG) \|\|
10280	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind: `1`, DAG) \|\|
10281	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind: `1`, DAG) \|\|
10282	(Subtarget.hasP8Altivec() && (
10283	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind: `1`, DAG) \|\|
10284	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind: `1`, DAG) \|\|
10285	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind: `1`, DAG)))) {
10286	return Op;
10287	}
10288	}
10289
10290	// Altivec has a variety of "shuffle immediates" that take two vector inputs
10291	// and produce a fixed permutation. If any of these match, do not lower to
10292	// VPERM.
10293	unsigned int ShuffleKind = isLittleEndian ? `2` : `0`;
10294	if (PPC::isVPKUWUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10295	PPC::isVPKUHUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10296	PPC::isVSLDOIShuffleMask(N: SVOp, ShuffleKind, DAG) != -`1` \|\|
10297	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10298	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10299	PPC::isVMRGLShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10300	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `1`, ShuffleKind, DAG) \|\|
10301	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `2`, ShuffleKind, DAG) \|\|
10302	PPC::isVMRGHShuffleMask(N: SVOp, UnitSize: `4`, ShuffleKind, DAG) \|\|
10303	(Subtarget.hasP8Altivec() && (
10304	PPC::isVPKUDUMShuffleMask(N: SVOp, ShuffleKind, DAG) \|\|
10305	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: true, ShuffleKind, DAG) \|\|
10306	PPC::isVMRGEOShuffleMask(N: SVOp, CheckEven: false, ShuffleKind, DAG))))
10307	return Op;
10308
10309	// Check to see if this is a shuffle of 4-byte values. If so, we can use our
10310	// perfect shuffle table to emit an optimal matching sequence.
10311	ArrayRef<int> PermMask = SVOp->getMask();
10312
10313	if (!DisablePerfectShuffle && !isLittleEndian) {
10314	unsigned PFIndexes[`4`];
10315	bool isFourElementShuffle = true;
10316	for (unsigned i = `0`; i != `4` && isFourElementShuffle;
10317	++i) { // Element number
10318	unsigned EltNo = `8`; // Start out undef.
10319	for (unsigned j = `0`; j != `4`; ++j) { // Intra-element byte.
10320	if (PermMask [i * `4` + j] < `0`)
10321	continue; // Undef, ignore it.
10322
10323	unsigned ByteSource = PermMask [i * `4` + j];
10324	if ((ByteSource & `3`) != j) {
10325	isFourElementShuffle = false;
10326	break;
10327	}
10328
10329	if (EltNo == `8`) {
10330	EltNo = ByteSource / `4`;
10331	} else if (EltNo != ByteSource / `4`) {
10332	isFourElementShuffle = false;
10333	break;
10334	}
10335	}
10336	PFIndexes[i] = EltNo;
10337	}
10338
10339	// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10340	// perfect shuffle vector to determine if it is cost effective to do this as
10341	// discrete instructions, or whether we should use a vperm.
10342	// For now, we skip this for little endian until such time as we have a
10343	// little-endian perfect shuffle table.
10344	if (isFourElementShuffle) {
10345	// Compute the index in the perfect shuffle table.
10346	unsigned PFTableIndex = PFIndexes[`0`] * `9` * `9` * `9` + PFIndexes[`1`] * `9` * `9` +
10347	PFIndexes[`2`] * `9` + PFIndexes[`3`];
10348
10349	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10350	unsigned Cost = (PFEntry >> `30`);
10351
10352	// Determining when to avoid vperm is tricky. Many things affect the cost
10353	// of vperm, particularly how many times the perm mask needs to be
10354	// computed. For example, if the perm mask can be hoisted out of a loop or
10355	// is already used (perhaps because there are multiple permutes with the
10356	// same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10357	// permute mask out of the loop requires an extra register.
10358	//
10359	// As a compromise, we only emit discrete instructions if the shuffle can
10360	// be generated in 3 or fewer operations. When we have loop information
10361	// available, if this block is within a loop, we should avoid using vperm
10362	// for 3-operation perms and use a constant pool load instead.
10363	if (Cost < `3`)
10364	return GeneratePerfectShuffle(PFEntry, LHS: V1, RHS: V2, DAG, dl);
10365	}
10366	}
10367
10368	// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10369	// vector that will get spilled to the constant pool.
10370	if (V2.isUndef()) V2 = V1;
10371
10372	return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10373	}
10374
10375	SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10376	ArrayRef<int> PermMask, EVT VT,
10377	SDValue V1, SDValue V2) const {
10378	unsigned Opcode = PPCISD::VPERM;
10379	EVT ValType = V1.getValueType();
10380	SDLoc dl(Op);
10381	bool NeedSwap = false;
10382	bool isLittleEndian = Subtarget.isLittleEndian();
10383	bool isPPC64 = Subtarget.isPPC64();
10384
10385	if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10386	(V1 ->hasOneUse() \|\| V2 ->hasOneUse())) {
10387	LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10388	"XXPERM instead\n");
10389	Opcode = PPCISD::XXPERM;
10390
10391	// The second input to XXPERM is also an output so if the second input has
10392	// multiple uses then copying is necessary, as a result we want the
10393	// single-use operand to be used as the second input to prevent copying.
10394	if ((!isLittleEndian && !V2 ->hasOneUse() && V1 ->hasOneUse()) \|\|
10395	(isLittleEndian && !V1 ->hasOneUse() && V2 ->hasOneUse())) {
10396	std::swap(a&: V1, b&: V2);
10397	NeedSwap = !NeedSwap;
10398	}
10399	}
10400
10401	// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10402	// that it is in input element units, not in bytes. Convert now.
10403
10404	// For little endian, the order of the input vectors is reversed, and
10405	// the permutation mask is complemented with respect to 31. This is
10406	// necessary to produce proper semantics with the big-endian-based vperm
10407	// instruction.
10408	EVT EltVT = V1.getValueType().getVectorElementType();
10409	unsigned BytesPerElement = EltVT.getSizeInBits() / `8`;
10410
10411	bool V1HasXXSWAPD = V1 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10412	bool V2HasXXSWAPD = V2 ->getOperand(Num: `0`)->getOpcode() == PPCISD::XXSWAPD;
10413
10414	/*
10415	Vectors will be appended like so: [ V1 \| v2 ]
10416	XXSWAPD on V1:
10417	[ A \| B \| C \| D ] -> [ C \| D \| A \| B ]
10418	0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10419	i.e. index of A, B += 8, and index of C, D -= 8.
10420	XXSWAPD on V2:
10421	[ E \| F \| G \| H ] -> [ G \| H \| E \| F ]
10422	16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10423	i.e. index of E, F += 8, index of G, H -= 8
10424	Swap V1 and V2:
10425	[ V1 \| V2 ] -> [ V2 \| V1 ]
10426	0-15 16-31 0-15 16-31
10427	i.e. index of V1 += 16, index of V2 -= 16
10428	*/
10429
10430	SmallVector<SDValue, `16`> ResultMask;
10431	for (unsigned i = `0`, e = VT.getVectorNumElements(); i != e; ++i) {
10432	unsigned SrcElt = PermMask [i] < `0` ? `0` : PermMask [i];
10433
10434	if (V1HasXXSWAPD) {
10435	if (SrcElt < `8`)
10436	SrcElt += `8`;
10437	else if (SrcElt < `16`)
10438	SrcElt -= `8`;
10439	}
10440	if (V2HasXXSWAPD) {
10441	if (SrcElt > `23`)
10442	SrcElt -= `8`;
10443	else if (SrcElt > `15`)
10444	SrcElt += `8`;
10445	}
10446	if (NeedSwap) {
10447	if (SrcElt < `16`)
10448	SrcElt += `16`;
10449	else
10450	SrcElt -= `16`;
10451	}
10452	for (unsigned j = `0`; j != BytesPerElement; ++j)
10453	if (isLittleEndian)
10454	ResultMask.push_back(
10455	DAG.getConstant(`31` - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10456	else
10457	ResultMask.push_back(
10458	DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10459	}
10460
10461	if (V1HasXXSWAPD) {
10462	dl = SDLoc (V1 ->getOperand(Num: `0`));
10463	V1 = V1 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10464	}
10465	if (V2HasXXSWAPD) {
10466	dl = SDLoc (V2 ->getOperand(Num: `0`));
10467	V2 = V2 ->getOperand(Num: `0`)->getOperand(Num: `1`);
10468	}
10469
10470	if (isPPC64 && (V1HasXXSWAPD \|\| V2HasXXSWAPD)) {
10471	if (ValType != MVT::v2f64)
10472	V1 = DAG.getBitcast(MVT::v2f64, V1);
10473	if (V2.getValueType() != MVT::v2f64)
10474	V2 = DAG.getBitcast(MVT::v2f64, V2);
10475	}
10476
10477	ShufflesHandledWithVPERM ++;
10478	SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10479	LLVM_DEBUG({
10480	ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10481	if (Opcode == PPCISD::XXPERM) {
10482	dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10483	} else {
10484	dbgs() << "Emitting a VPERM for the following shuffle:\n";
10485	}
10486	SVOp->dump();
10487	dbgs() << "With the following permute control vector:\n";
10488	VPermMask.dump();
10489	});
10490
10491	if (Opcode == PPCISD::XXPERM)
10492	VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10493
10494	// Only need to place items backwards in LE,
10495	// the mask was properly calculated.
10496	if (isLittleEndian)
10497	std::swap(a&: V1, b&: V2);
10498
10499	SDValue VPERMNode =
10500	DAG.getNode(Opcode, DL: dl, VT: V1.getValueType(), N1: V1, N2: V2, N3: VPermMask);
10501
10502	VPERMNode = DAG.getBitcast(VT: ValType, V: VPERMNode);
10503	return VPERMNode;
10504	}
10505
10506	/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10507	/// vector comparison. If it is, return true and fill in Opc/isDot with
10508	/// information about the intrinsic.
10509	static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10510	bool &isDot, const PPCSubtarget &Subtarget) {
10511	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `0`);
10512	CompareOpc = -`1`;
10513	isDot = false;
10514	switch (IntrinsicID) {
10515	default:
10516	return false;
10517	// Comparison predicates.
10518	case Intrinsic::ppc_altivec_vcmpbfp_p:
10519	CompareOpc = `966`;
10520	isDot = true;
10521	break;
10522	case Intrinsic::ppc_altivec_vcmpeqfp_p:
10523	CompareOpc = `198`;
10524	isDot = true;
10525	break;
10526	case Intrinsic::ppc_altivec_vcmpequb_p:
10527	CompareOpc = `6`;
10528	isDot = true;
10529	break;
10530	case Intrinsic::ppc_altivec_vcmpequh_p:
10531	CompareOpc = `70`;
10532	isDot = true;
10533	break;
10534	case Intrinsic::ppc_altivec_vcmpequw_p:
10535	CompareOpc = `134`;
10536	isDot = true;
10537	break;
10538	case Intrinsic::ppc_altivec_vcmpequd_p:
10539	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10540	CompareOpc = `199`;
10541	isDot = true;
10542	} else
10543	return false;
10544	break;
10545	case Intrinsic::ppc_altivec_vcmpneb_p:
10546	case Intrinsic::ppc_altivec_vcmpneh_p:
10547	case Intrinsic::ppc_altivec_vcmpnew_p:
10548	case Intrinsic::ppc_altivec_vcmpnezb_p:
10549	case Intrinsic::ppc_altivec_vcmpnezh_p:
10550	case Intrinsic::ppc_altivec_vcmpnezw_p:
10551	if (Subtarget.hasP9Altivec()) {
10552	switch (IntrinsicID) {
10553	default:
10554	llvm_unreachable("Unknown comparison intrinsic.");
10555	case Intrinsic::ppc_altivec_vcmpneb_p:
10556	CompareOpc = `7`;
10557	break;
10558	case Intrinsic::ppc_altivec_vcmpneh_p:
10559	CompareOpc = `71`;
10560	break;
10561	case Intrinsic::ppc_altivec_vcmpnew_p:
10562	CompareOpc = `135`;
10563	break;
10564	case Intrinsic::ppc_altivec_vcmpnezb_p:
10565	CompareOpc = `263`;
10566	break;
10567	case Intrinsic::ppc_altivec_vcmpnezh_p:
10568	CompareOpc = `327`;
10569	break;
10570	case Intrinsic::ppc_altivec_vcmpnezw_p:
10571	CompareOpc = `391`;
10572	break;
10573	}
10574	isDot = true;
10575	} else
10576	return false;
10577	break;
10578	case Intrinsic::ppc_altivec_vcmpgefp_p:
10579	CompareOpc = `454`;
10580	isDot = true;
10581	break;
10582	case Intrinsic::ppc_altivec_vcmpgtfp_p:
10583	CompareOpc = `710`;
10584	isDot = true;
10585	break;
10586	case Intrinsic::ppc_altivec_vcmpgtsb_p:
10587	CompareOpc = `774`;
10588	isDot = true;
10589	break;
10590	case Intrinsic::ppc_altivec_vcmpgtsh_p:
10591	CompareOpc = `838`;
10592	isDot = true;
10593	break;
10594	case Intrinsic::ppc_altivec_vcmpgtsw_p:
10595	CompareOpc = `902`;
10596	isDot = true;
10597	break;
10598	case Intrinsic::ppc_altivec_vcmpgtsd_p:
10599	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10600	CompareOpc = `967`;
10601	isDot = true;
10602	} else
10603	return false;
10604	break;
10605	case Intrinsic::ppc_altivec_vcmpgtub_p:
10606	CompareOpc = `518`;
10607	isDot = true;
10608	break;
10609	case Intrinsic::ppc_altivec_vcmpgtuh_p:
10610	CompareOpc = `582`;
10611	isDot = true;
10612	break;
10613	case Intrinsic::ppc_altivec_vcmpgtuw_p:
10614	CompareOpc = `646`;
10615	isDot = true;
10616	break;
10617	case Intrinsic::ppc_altivec_vcmpgtud_p:
10618	if (Subtarget.hasVSX() \|\| Subtarget.hasP8Altivec()) {
10619	CompareOpc = `711`;
10620	isDot = true;
10621	} else
10622	return false;
10623	break;
10624
10625	case Intrinsic::ppc_altivec_vcmpequq:
10626	case Intrinsic::ppc_altivec_vcmpgtsq:
10627	case Intrinsic::ppc_altivec_vcmpgtuq:
10628	if (!Subtarget.isISA3_1())
10629	return false;
10630	switch (IntrinsicID) {
10631	default:
10632	llvm_unreachable("Unknown comparison intrinsic.");
10633	case Intrinsic::ppc_altivec_vcmpequq:
10634	CompareOpc = `455`;
10635	break;
10636	case Intrinsic::ppc_altivec_vcmpgtsq:
10637	CompareOpc = `903`;
10638	break;
10639	case Intrinsic::ppc_altivec_vcmpgtuq:
10640	CompareOpc = `647`;
10641	break;
10642	}
10643	break;
10644
10645	// VSX predicate comparisons use the same infrastructure
10646	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10647	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10648	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10649	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10650	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10651	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10652	if (Subtarget.hasVSX()) {
10653	switch (IntrinsicID) {
10654	case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10655	CompareOpc = `99`;
10656	break;
10657	case Intrinsic::ppc_vsx_xvcmpgedp_p:
10658	CompareOpc = `115`;
10659	break;
10660	case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10661	CompareOpc = `107`;
10662	break;
10663	case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10664	CompareOpc = `67`;
10665	break;
10666	case Intrinsic::ppc_vsx_xvcmpgesp_p:
10667	CompareOpc = `83`;
10668	break;
10669	case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10670	CompareOpc = `75`;
10671	break;
10672	}
10673	isDot = true;
10674	} else
10675	return false;
10676	break;
10677
10678	// Normal Comparisons.
10679	case Intrinsic::ppc_altivec_vcmpbfp:
10680	CompareOpc = `966`;
10681	break;
10682	case Intrinsic::ppc_altivec_vcmpeqfp:
10683	CompareOpc = `198`;
10684	break;
10685	case Intrinsic::ppc_altivec_vcmpequb:
10686	CompareOpc = `6`;
10687	break;
10688	case Intrinsic::ppc_altivec_vcmpequh:
10689	CompareOpc = `70`;
10690	break;
10691	case Intrinsic::ppc_altivec_vcmpequw:
10692	CompareOpc = `134`;
10693	break;
10694	case Intrinsic::ppc_altivec_vcmpequd:
10695	if (Subtarget.hasP8Altivec())
10696	CompareOpc = `199`;
10697	else
10698	return false;
10699	break;
10700	case Intrinsic::ppc_altivec_vcmpneb:
10701	case Intrinsic::ppc_altivec_vcmpneh:
10702	case Intrinsic::ppc_altivec_vcmpnew:
10703	case Intrinsic::ppc_altivec_vcmpnezb:
10704	case Intrinsic::ppc_altivec_vcmpnezh:
10705	case Intrinsic::ppc_altivec_vcmpnezw:
10706	if (Subtarget.hasP9Altivec())
10707	switch (IntrinsicID) {
10708	default:
10709	llvm_unreachable("Unknown comparison intrinsic.");
10710	case Intrinsic::ppc_altivec_vcmpneb:
10711	CompareOpc = `7`;
10712	break;
10713	case Intrinsic::ppc_altivec_vcmpneh:
10714	CompareOpc = `71`;
10715	break;
10716	case Intrinsic::ppc_altivec_vcmpnew:
10717	CompareOpc = `135`;
10718	break;
10719	case Intrinsic::ppc_altivec_vcmpnezb:
10720	CompareOpc = `263`;
10721	break;
10722	case Intrinsic::ppc_altivec_vcmpnezh:
10723	CompareOpc = `327`;
10724	break;
10725	case Intrinsic::ppc_altivec_vcmpnezw:
10726	CompareOpc = `391`;
10727	break;
10728	}
10729	else
10730	return false;
10731	break;
10732	case Intrinsic::ppc_altivec_vcmpgefp:
10733	CompareOpc = `454`;
10734	break;
10735	case Intrinsic::ppc_altivec_vcmpgtfp:
10736	CompareOpc = `710`;
10737	break;
10738	case Intrinsic::ppc_altivec_vcmpgtsb:
10739	CompareOpc = `774`;
10740	break;
10741	case Intrinsic::ppc_altivec_vcmpgtsh:
10742	CompareOpc = `838`;
10743	break;
10744	case Intrinsic::ppc_altivec_vcmpgtsw:
10745	CompareOpc = `902`;
10746	break;
10747	case Intrinsic::ppc_altivec_vcmpgtsd:
10748	if (Subtarget.hasP8Altivec())
10749	CompareOpc = `967`;
10750	else
10751	return false;
10752	break;
10753	case Intrinsic::ppc_altivec_vcmpgtub:
10754	CompareOpc = `518`;
10755	break;
10756	case Intrinsic::ppc_altivec_vcmpgtuh:
10757	CompareOpc = `582`;
10758	break;
10759	case Intrinsic::ppc_altivec_vcmpgtuw:
10760	CompareOpc = `646`;
10761	break;
10762	case Intrinsic::ppc_altivec_vcmpgtud:
10763	if (Subtarget.hasP8Altivec())
10764	CompareOpc = `711`;
10765	else
10766	return false;
10767	break;
10768	case Intrinsic::ppc_altivec_vcmpequq_p:
10769	case Intrinsic::ppc_altivec_vcmpgtsq_p:
10770	case Intrinsic::ppc_altivec_vcmpgtuq_p:
10771	if (!Subtarget.isISA3_1())
10772	return false;
10773	switch (IntrinsicID) {
10774	default:
10775	llvm_unreachable("Unknown comparison intrinsic.");
10776	case Intrinsic::ppc_altivec_vcmpequq_p:
10777	CompareOpc = `455`;
10778	break;
10779	case Intrinsic::ppc_altivec_vcmpgtsq_p:
10780	CompareOpc = `903`;
10781	break;
10782	case Intrinsic::ppc_altivec_vcmpgtuq_p:
10783	CompareOpc = `647`;
10784	break;
10785	}
10786	isDot = true;
10787	break;
10788	}
10789	return true;
10790	}
10791
10792	/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10793	/// lower, do it, otherwise return null.
10794	SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10795	SelectionDAG &DAG) const {
10796	unsigned IntrinsicID = Op.getConstantOperandVal(i: `0`);
10797
10798	SDLoc dl(Op);
10799
10800	switch (IntrinsicID) {
10801	case Intrinsic::thread_pointer:
10802	// Reads the thread pointer register, used for __builtin_thread_pointer.
10803	if (Subtarget.isPPC64())
10804	return DAG.getRegister(PPC::X13, MVT::i64);
10805	return DAG.getRegister(PPC::R2, MVT::i32);
10806
10807	case Intrinsic::ppc_rldimi: {
10808	assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10809	SDValue Src = Op.getOperand(i: `1`);
10810	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
10811	if (Mask.isZero())
10812	return Op.getOperand(i: `2`);
10813	if (Mask.isAllOnes())
10814	return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(`3`));
10815	uint64_t SH = Op.getConstantOperandVal(i: `3`);
10816	unsigned MB = `0`, ME = `0`;
10817	if (!isRunOfOnes64(Val: Mask.getZExtValue(), MB, ME))
10818	report_fatal_error(reason: "invalid rldimi mask!");
10819	// rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10820	if (ME < `63` - SH) {
10821	Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10822	DAG.getConstant(ME + SH + `1`, dl, MVT::i32));
10823	} else if (ME > `63` - SH) {
10824	Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10825	DAG.getConstant(ME + SH - `63`, dl, MVT::i32));
10826	}
10827	return SDValue(
10828	DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10829	{Op.getOperand(`2`), Src,
10830	DAG.getTargetConstant(`63` - ME, dl, MVT::i32),
10831	DAG.getTargetConstant(MB, dl, MVT::i32)}),
10832	`0`);
10833	}
10834
10835	case Intrinsic::ppc_rlwimi: {
10836	APInt Mask = Op.getConstantOperandAPInt(i: `4`);
10837	if (Mask.isZero())
10838	return Op.getOperand(i: `2`);
10839	if (Mask.isAllOnes())
10840	return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(`1`),
10841	Op.getOperand(`3`));
10842	unsigned MB = `0`, ME = `0`;
10843	if (!isRunOfOnes(Val: Mask.getZExtValue(), MB, ME))
10844	report_fatal_error(reason: "invalid rlwimi mask!");
10845	return SDValue(DAG.getMachineNode(
10846	PPC::RLWIMI, dl, MVT::i32,
10847	{Op.getOperand(`2`), Op.getOperand(`1`), Op.getOperand(`3`),
10848	DAG.getTargetConstant(MB, dl, MVT::i32),
10849	DAG.getTargetConstant(ME, dl, MVT::i32)}),
10850	`0`);
10851	}
10852
10853	case Intrinsic::ppc_rlwnm: {
10854	if (Op.getConstantOperandVal(`3`) == `0`)
10855	return DAG.getConstant(`0`, dl, MVT::i32);
10856	unsigned MB = `0`, ME = `0`;
10857	if (!isRunOfOnes(Val: Op.getConstantOperandVal(i: `3`), MB, ME))
10858	report_fatal_error(reason: "invalid rlwnm mask!");
10859	return SDValue(
10860	DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10861	{Op.getOperand(`1`), Op.getOperand(`2`),
10862	DAG.getTargetConstant(MB, dl, MVT::i32),
10863	DAG.getTargetConstant(ME, dl, MVT::i32)}),
10864	`0`);
10865	}
10866
10867	case Intrinsic::ppc_mma_disassemble_acc: {
10868	if (Subtarget.isISAFuture()) {
10869	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10870	SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10871	ArrayRef(ReturnTypes, `2`),
10872	Op.getOperand(`1`)),
10873	`0`);
10874	SmallVector<SDValue, `4`> RetOps;
10875	SDValue Value = SDValue (WideVec.getNode(), `0`);
10876	SDValue Value2 = SDValue (WideVec.getNode(), `1`);
10877
10878	SDValue Extract;
10879	Extract = DAG.getNode(
10880	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10881	Subtarget.isLittleEndian() ? Value2 : Value,
10882	DAG.getConstant(Subtarget.isLittleEndian() ? `1` : `0`,
10883	dl, getPointerTy(DAG.getDataLayout())));
10884	RetOps.push_back(Elt: Extract);
10885	Extract = DAG.getNode(
10886	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10887	Subtarget.isLittleEndian() ? Value2 : Value,
10888	DAG.getConstant(Subtarget.isLittleEndian() ? `0` : `1`,
10889	dl, getPointerTy(DAG.getDataLayout())));
10890	RetOps.push_back(Elt: Extract);
10891	Extract = DAG.getNode(
10892	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10893	Subtarget.isLittleEndian() ? Value : Value2,
10894	DAG.getConstant(Subtarget.isLittleEndian() ? `1` : `0`,
10895	dl, getPointerTy(DAG.getDataLayout())));
10896	RetOps.push_back(Elt: Extract);
10897	Extract = DAG.getNode(
10898	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10899	Subtarget.isLittleEndian() ? Value : Value2,
10900	DAG.getConstant(Subtarget.isLittleEndian() ? `0` : `1`,
10901	dl, getPointerTy(DAG.getDataLayout())));
10902	RetOps.push_back(Elt: Extract);
10903	return DAG.getMergeValues(Ops: RetOps, dl);
10904	}
10905	[[fallthrough]];
10906	}
10907	case Intrinsic::ppc_vsx_disassemble_pair: {
10908	int NumVecs = `2`;
10909	SDValue WideVec = Op.getOperand(i: `1`);
10910	if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10911	NumVecs = `4`;
10912	WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10913	}
10914	SmallVector<SDValue, `4`> RetOps;
10915	for (int VecNo = `0`; VecNo < NumVecs; VecNo++) {
10916	SDValue Extract = DAG.getNode(
10917	PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10918	DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - `1` - VecNo
10919	: VecNo,
10920	dl, getPointerTy(DAG.getDataLayout())));
10921	RetOps.push_back(Elt: Extract);
10922	}
10923	return DAG.getMergeValues(Ops: RetOps, dl);
10924	}
10925
10926	case Intrinsic::ppc_mma_xxmfacc:
10927	case Intrinsic::ppc_mma_xxmtacc: {
10928	// Allow pre-isa-future subtargets to lower as normal.
10929	if (!Subtarget.isISAFuture())
10930	return SDValue ();
10931	// The intrinsics for xxmtacc and xxmfacc take one argument of
10932	// type v512i1, for future cpu the corresponding wacc instruction
10933	// dmxx[inst\|extf]dmr512 is always generated for type v512i1, negating
10934	// the need to produce the xxm[t\|f]acc.
10935	SDValue WideVec = Op.getOperand(i: `1`);
10936	DAG.ReplaceAllUsesWith(From: Op, To: WideVec);
10937	return SDValue ();
10938	}
10939
10940	case Intrinsic::ppc_unpack_longdouble: {
10941	auto *Idx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
10942	assert(Idx && (Idx->getSExtValue() == `0` \|\| Idx->getSExtValue() == `1`) &&
10943	"Argument of long double unpack must be 0 or 1!");
10944	return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(`1`),
10945	DAG.getConstant(!!(Idx->getSExtValue()), dl,
10946	Idx->getValueType(`0`)));
10947	}
10948
10949	case Intrinsic::ppc_compare_exp_lt:
10950	case Intrinsic::ppc_compare_exp_gt:
10951	case Intrinsic::ppc_compare_exp_eq:
10952	case Intrinsic::ppc_compare_exp_uo: {
10953	unsigned Pred;
10954	switch (IntrinsicID) {
10955	case Intrinsic::ppc_compare_exp_lt:
10956	Pred = PPC::PRED_LT;
10957	break;
10958	case Intrinsic::ppc_compare_exp_gt:
10959	Pred = PPC::PRED_GT;
10960	break;
10961	case Intrinsic::ppc_compare_exp_eq:
10962	Pred = PPC::PRED_EQ;
10963	break;
10964	case Intrinsic::ppc_compare_exp_uo:
10965	Pred = PPC::PRED_UN;
10966	break;
10967	}
10968	return SDValue(
10969	DAG.getMachineNode(
10970	PPC::SELECT_CC_I4, dl, MVT::i32,
10971	{SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10972	Op.getOperand(`1`), Op.getOperand(`2`)),
10973	`0`),
10974	DAG.getConstant(`1`, dl, MVT::i32), DAG.getConstant(`0`, dl, MVT::i32),
10975	DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10976	`0`);
10977	}
10978	case Intrinsic::ppc_test_data_class: {
10979	EVT OpVT = Op.getOperand(i: `1`).getValueType();
10980	unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10981	: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10982	: PPC::XSTSTDCSP);
10983	return SDValue(
10984	DAG.getMachineNode(
10985	PPC::SELECT_CC_I4, dl, MVT::i32,
10986	{SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(`2`),
10987	Op.getOperand(`1`)),
10988	`0`),
10989	DAG.getConstant(`1`, dl, MVT::i32), DAG.getConstant(`0`, dl, MVT::i32),
10990	DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10991	`0`);
10992	}
10993	case Intrinsic::ppc_fnmsub: {
10994	EVT VT = Op.getOperand(i: `1`).getValueType();
10995	if (!Subtarget.hasVSX() \|\| (!Subtarget.hasFloat128() && VT == MVT::f128))
10996	return DAG.getNode(
10997	Opcode: ISD::FNEG, DL: dl, VT,
10998	Operand: DAG.getNode(Opcode: ISD::FMA, DL: dl, VT, N1: Op.getOperand(i: `1`), N2: Op.getOperand(i: `2`),
10999	N3: DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT, Operand: Op.getOperand(i: `3`))));
11000	return DAG.getNode(Opcode: PPCISD::FNMSUB, DL: dl, VT, N1: Op.getOperand(i: `1`),
11001	N2: Op.getOperand(i: `2`), N3: Op.getOperand(i: `3`));
11002	}
11003	case Intrinsic::ppc_convert_f128_to_ppcf128:
11004	case Intrinsic::ppc_convert_ppcf128_to_f128: {
11005	RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11006	? RTLIB::CONVERT_PPCF128_F128
11007	: RTLIB::CONVERT_F128_PPCF128;
11008	MakeLibCallOptions CallOptions;
11009	std::pair<SDValue, SDValue> Result =
11010	makeLibCall(DAG, LC, RetVT: Op.getValueType(), Ops: Op.getOperand(i: `1`), CallOptions,
11011	dl, Chain: SDValue ());
11012	return Result.first;
11013	}
11014	case Intrinsic::ppc_maxfe:
11015	case Intrinsic::ppc_maxfl:
11016	case Intrinsic::ppc_maxfs:
11017	case Intrinsic::ppc_minfe:
11018	case Intrinsic::ppc_minfl:
11019	case Intrinsic::ppc_minfs: {
11020	EVT VT = Op.getValueType();
11021	assert(
11022	all_of(Op ->ops().drop_front(`4`),
11023	[VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11024	"ppc_[max\|min]f[e\|l\|s] must have uniform type arguments");
11025	(void)VT;
11026	ISD::CondCode CC = ISD::SETGT;
11027	if (IntrinsicID == Intrinsic::ppc_minfe \|\|
11028	IntrinsicID == Intrinsic::ppc_minfl \|\|
11029	IntrinsicID == Intrinsic::ppc_minfs)
11030	CC = ISD::SETLT;
11031	unsigned I = Op.getNumOperands() - `2`, Cnt = I;
11032	SDValue Res = Op.getOperand(i: I);
11033	for (--I; Cnt != `0`; --Cnt, I = (--I == `0` ? (Op.getNumOperands() - `1`) : I)) {
11034	Res =
11035	DAG.getSelectCC(DL: dl, LHS: Res, RHS: Op.getOperand(i: I), True: Res, False: Op.getOperand(i: I), Cond: CC);
11036	}
11037	return Res;
11038	}
11039	}
11040
11041	// If this is a lowered altivec predicate compare, CompareOpc is set to the
11042	// opcode number of the comparison.
11043	int CompareOpc;
11044	bool isDot;
11045	if (!getVectorCompareInfo(Intrin: Op, CompareOpc, isDot, Subtarget))
11046	return SDValue (); // Don't custom lower most intrinsics.
11047
11048	// If this is a non-dot comparison, make the VCMP node and we are done.
11049	if (!isDot) {
11050	SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(`2`).getValueType(),
11051	Op.getOperand(`1`), Op.getOperand(`2`),
11052	DAG.getConstant(CompareOpc, dl, MVT::i32));
11053	return DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: Op.getValueType(), Operand: Tmp);
11054	}
11055
11056	// Create the PPCISD altivec 'dot' comparison node.
11057	SDValue Ops[] = {
11058	Op.getOperand(`2`), // LHS
11059	Op.getOperand(`3`), // RHS
11060	DAG.getConstant(CompareOpc, dl, MVT::i32)
11061	};
11062	EVT VTs[] = { Op.getOperand(`2`).getValueType(), MVT::Glue };
11063	SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11064
11065	// Now that we have the comparison, emit a copy from the CR to a GPR.
11066	// This is flagged to the above dot comparison.
11067	SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11068	DAG.getRegister(PPC::CR6, MVT::i32),
11069	CompNode.getValue(`1`));
11070
11071	// Unpack the result based on how the target uses it.
11072	unsigned BitNo; // Bit # of CR6.
11073	bool InvertBit; // Invert result?
11074	switch (Op.getConstantOperandVal(i: `1`)) {
11075	default: // Can't happen, don't crash on invalid number though.
11076	case `0`: // Return the value of the EQ bit of CR6.
11077	BitNo = `0`; InvertBit = false;
11078	break;
11079	case `1`: // Return the inverted value of the EQ bit of CR6.
11080	BitNo = `0`; InvertBit = true;
11081	break;
11082	case `2`: // Return the value of the LT bit of CR6.
11083	BitNo = `2`; InvertBit = false;
11084	break;
11085	case `3`: // Return the inverted value of the LT bit of CR6.
11086	BitNo = `2`; InvertBit = true;
11087	break;
11088	}
11089
11090	// Shift the bit into the low position.
11091	Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11092	DAG.getConstant(`8` - (`3` - BitNo), dl, MVT::i32));
11093	// Isolate the bit.
11094	Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11095	DAG.getConstant(`1`, dl, MVT::i32));
11096
11097	// If we are supposed to, toggle the bit.
11098	if (InvertBit)
11099	Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11100	DAG.getConstant(`1`, dl, MVT::i32));
11101	return Flags;
11102	}
11103
11104	SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11105	SelectionDAG &DAG) const {
11106	// SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11107	// the beginning of the argument list.
11108	int ArgStart = isa<ConstantSDNode>(Val: Op.getOperand(i: `0`)) ? `0` : `1`;
11109	SDLoc DL(Op);
11110	switch (Op.getConstantOperandVal(i: ArgStart)) {
11111	case Intrinsic::ppc_cfence: {
11112	assert(ArgStart == `1` && "llvm.ppc.cfence must carry a chain argument.");
11113	SDValue Val = Op.getOperand(i: ArgStart + `1`);
11114	EVT Ty = Val.getValueType();
11115	if (Ty == MVT::i128) {
11116	// FIXME: Testing one of two paired registers is sufficient to guarantee
11117	// ordering?
11118	Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11119	}
11120	unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11121	EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11122	return SDValue(
11123	DAG.getMachineNode(Opcode, DL, MVT::Other,
11124	DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11125	Op.getOperand(`0`)),
11126	`0`);
11127	}
11128	default:
11129	break;
11130	}
11131	return SDValue ();
11132	}
11133
11134	// Lower scalar BSWAP64 to xxbrd.
11135	SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11136	SDLoc dl(Op);
11137	if (!Subtarget.isPPC64())
11138	return Op;
11139	// MTVSRDD
11140	Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(`0`),
11141	Op.getOperand(`0`));
11142	// XXBRD
11143	Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11144	// MFVSRD
11145	int VectorIndex = `0`;
11146	if (Subtarget.isLittleEndian())
11147	VectorIndex = `1`;
11148	Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11149	DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11150	return Op;
11151	}
11152
11153	// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11154	// compared to a value that is atomically loaded (atomic loads zero-extend).
11155	SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11156	SelectionDAG &DAG) const {
11157	assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11158	"Expecting an atomic compare-and-swap here.");
11159	SDLoc dl(Op);
11160	auto *AtomicNode = cast<AtomicSDNode>(Val: Op.getNode());
11161	EVT MemVT = AtomicNode->getMemoryVT();
11162	if (MemVT.getSizeInBits() >= `32`)
11163	return Op;
11164
11165	SDValue CmpOp = Op.getOperand(i: `2`);
11166	// If this is already correctly zero-extended, leave it alone.
11167	auto HighBits = APInt::getHighBitsSet(numBits: `32`, hiBitsSet: `32` - MemVT.getSizeInBits());
11168	if (DAG.MaskedValueIsZero(Op: CmpOp, Mask: HighBits))
11169	return Op;
11170
11171	// Clear the high bits of the compare operand.
11172	unsigned MaskVal = (`1` << MemVT.getSizeInBits()) - `1`;
11173	SDValue NewCmpOp =
11174	DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11175	DAG.getConstant(MaskVal, dl, MVT::i32));
11176
11177	// Replace the existing compare operand with the properly zero-extended one.
11178	SmallVector<SDValue, `4`> Ops;
11179	for (int i = `0`, e = AtomicNode->getNumOperands(); i < e; i++)
11180	Ops.push_back(Elt: AtomicNode->getOperand(Num: i));
11181	Ops [`2`] = NewCmpOp;
11182	MachineMemOperand *MMO = AtomicNode->getMemOperand();
11183	SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11184	auto NodeTy =
11185	(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11186	return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11187	}
11188
11189	SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11190	SelectionDAG &DAG) const {
11191	AtomicSDNode *N = cast<AtomicSDNode>(Val: Op.getNode());
11192	EVT MemVT = N->getMemoryVT();
11193	assert(MemVT.getSimpleVT() == MVT::i128 &&
11194	"Expect quadword atomic operations");
11195	SDLoc dl(N);
11196	unsigned Opc = N->getOpcode();
11197	switch (Opc) {
11198	case ISD::ATOMIC_LOAD: {
11199	// Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11200	// lowered to ppc instructions by pattern matching instruction selector.
11201	SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11202	SmallVector<SDValue, `4`> Ops{
11203	N->getOperand(`0`),
11204	DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11205	for (int I = `1`, E = N->getNumOperands(); I < E; ++I)
11206	Ops.push_back(Elt: N->getOperand(Num: I));
11207	SDValue LoadedVal = DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_W_CHAIN, dl, VTList: Tys,
11208	Ops, MemVT, MMO: N->getMemOperand());
11209	SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11210	SDValue ValHi =
11211	DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(`1`));
11212	ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11213	DAG.getConstant(`64`, dl, MVT::i32));
11214	SDValue Val =
11215	DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11216	return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11217	{Val, LoadedVal.getValue(`2`)});
11218	}
11219	case ISD::ATOMIC_STORE: {
11220	// Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11221	// lowered to ppc instructions by pattern matching instruction selector.
11222	SDVTList Tys = DAG.getVTList(MVT::Other);
11223	SmallVector<SDValue, `4`> Ops{
11224	N->getOperand(`0`),
11225	DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11226	SDValue Val = N->getOperand(Num: `1`);
11227	SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11228	SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11229	DAG.getConstant(`64`, dl, MVT::i32));
11230	ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11231	Ops.push_back(Elt: ValLo);
11232	Ops.push_back(Elt: ValHi);
11233	Ops.push_back(Elt: N->getOperand(Num: `2`));
11234	return DAG.getMemIntrinsicNode(Opcode: ISD::INTRINSIC_VOID, dl, VTList: Tys, Ops, MemVT,
11235	MMO: N->getMemOperand());
11236	}
11237	default:
11238	llvm_unreachable("Unexpected atomic opcode");
11239	}
11240	}
11241
11242	static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11243	SelectionDAG &DAG,
11244	const PPCSubtarget &Subtarget) {
11245	assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11246
11247	enum DataClassMask {
11248	DC_NAN = `1` << `6`,
11249	DC_NEG_INF = `1` << `4`,
11250	DC_POS_INF = `1` << `5`,
11251	DC_NEG_ZERO = `1` << `2`,
11252	DC_POS_ZERO = `1` << `3`,
11253	DC_NEG_SUBNORM = `1`,
11254	DC_POS_SUBNORM = `1` << `1`,
11255	};
11256
11257	EVT VT = Op.getValueType();
11258
11259	unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11260	: VT == MVT::f64 ? PPC::XSTSTDCDP
11261	: PPC::XSTSTDCSP;
11262
11263	if (Mask == fcAllFlags)
11264	return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11265	if (Mask == `0`)
11266	return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11267
11268	// When it's cheaper or necessary to test reverse flags.
11269	if ((Mask & fcNormal) == fcNormal \|\| Mask == ~fcQNan \|\| Mask == ~fcSNan) {
11270	SDValue Rev = getDataClassTest(Op, Mask: ~Mask, Dl, DAG, Subtarget);
11271	return DAG.getNOT(Dl, Rev, MVT::i1);
11272	}
11273
11274	// Power doesn't support testing whether a value is 'normal'. Test the rest
11275	// first, and test if it's 'not not-normal' with expected sign.
11276	if (Mask & fcNormal) {
11277	SDValue Rev(DAG.getMachineNode(
11278	TestOp, Dl, MVT::i32,
11279	DAG.getTargetConstant(DC_NAN \| DC_NEG_INF \| DC_POS_INF \|
11280	DC_NEG_ZERO \| DC_POS_ZERO \|
11281	DC_NEG_SUBNORM \| DC_POS_SUBNORM,
11282	Dl, MVT::i32),
11283	Op),
11284	`0`);
11285	// Sign are stored in CR bit 0, result are in CR bit 2.
11286	SDValue Sign(
11287	DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11288	DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11289	`0`);
11290	SDValue Normal(DAG.getNOT(
11291	Dl,
11292	SDValue(DAG.getMachineNode(
11293	TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11294	DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11295	`0`),
11296	MVT::i1));
11297	if (Mask & fcPosNormal)
11298	Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11299	SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11300	if (Mask == fcPosNormal \|\| Mask == fcNegNormal)
11301	return Result;
11302
11303	return DAG.getNode(
11304	ISD::OR, Dl, MVT::i1,
11305	getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11306	}
11307
11308	// The instruction doesn't differentiate between signaling or quiet NaN. Test
11309	// the rest first, and test if it 'is NaN and is signaling/quiet'.
11310	if ((Mask & fcNan) == fcQNan \|\| (Mask & fcNan) == fcSNan) {
11311	bool IsQuiet = Mask & fcQNan;
11312	SDValue NanCheck = getDataClassTest(Op, Mask: fcNan, Dl, DAG, Subtarget);
11313
11314	// Quietness is determined by the first bit in fraction field.
11315	uint64_t QuietMask = `0`;
11316	SDValue HighWord;
11317	if (VT == MVT::f128) {
11318	HighWord = DAG.getNode(
11319	ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11320	DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? `3` : `0`, Dl));
11321	QuietMask = `0x8000`;
11322	} else if (VT == MVT::f64) {
11323	if (Subtarget.isPPC64()) {
11324	HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11325	DAG.getBitcast(MVT::i64, Op),
11326	DAG.getConstant(`1`, Dl, MVT::i32));
11327	} else {
11328	SDValue Vec = DAG.getBitcast(
11329	MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11330	HighWord = DAG.getNode(
11331	ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11332	DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? `1` : `0`, Dl));
11333	}
11334	QuietMask = `0x80000`;
11335	} else if (VT == MVT::f32) {
11336	HighWord = DAG.getBitcast(MVT::i32, Op);
11337	QuietMask = `0x400000`;
11338	}
11339	SDValue NanRes = DAG.getSetCC(
11340	Dl, MVT::i1,
11341	DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11342	DAG.getConstant(QuietMask, Dl, MVT::i32)),
11343	DAG.getConstant(`0`, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11344	NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11345	if (Mask == fcQNan \|\| Mask == fcSNan)
11346	return NanRes;
11347
11348	return DAG.getNode(ISD::OR, Dl, MVT::i1,
11349	getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11350	NanRes);
11351	}
11352
11353	unsigned NativeMask = `0`;
11354	if ((Mask & fcNan) == fcNan)
11355	NativeMask \|= DC_NAN;
11356	if (Mask & fcNegInf)
11357	NativeMask \|= DC_NEG_INF;
11358	if (Mask & fcPosInf)
11359	NativeMask \|= DC_POS_INF;
11360	if (Mask & fcNegZero)
11361	NativeMask \|= DC_NEG_ZERO;
11362	if (Mask & fcPosZero)
11363	NativeMask \|= DC_POS_ZERO;
11364	if (Mask & fcNegSubnormal)
11365	NativeMask \|= DC_NEG_SUBNORM;
11366	if (Mask & fcPosSubnormal)
11367	NativeMask \|= DC_POS_SUBNORM;
11368	return SDValue(
11369	DAG.getMachineNode(
11370	TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11371	SDValue(DAG.getMachineNode(
11372	TestOp, Dl, MVT::i32,
11373	DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11374	`0`),
11375	DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11376	`0`);
11377	}
11378
11379	SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11380	SelectionDAG &DAG) const {
11381	assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11382	SDValue LHS = Op.getOperand(i: `0`);
11383	uint64_t RHSC = Op.getConstantOperandVal(i: `1`);
11384	SDLoc Dl(Op);
11385	FPClassTest Category = static_cast<FPClassTest>(RHSC);
11386	return getDataClassTest(Op: LHS, Mask: Category, Dl, DAG, Subtarget);
11387	}
11388
11389	SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11390	SelectionDAG &DAG) const {
11391	SDLoc dl(Op);
11392	// Create a stack slot that is 16-byte aligned.
11393	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11394	int FrameIdx = MFI.CreateStackObject(Size: `16`, Alignment: Align (`16`), isSpillSlot: false);
11395	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
11396	SDValue FIdx = DAG.getFrameIndex(FI: FrameIdx, VT: PtrVT);
11397
11398	// Store the input value into Value#0 of the stack slot.
11399	SDValue Store = DAG.getStore(Chain: DAG.getEntryNode(), dl, Val: Op.getOperand(i: `0`), Ptr: FIdx,
11400	PtrInfo: MachinePointerInfo ());
11401	// Load it out.
11402	return DAG.getLoad(VT: Op.getValueType(), dl, Chain: Store, Ptr: FIdx, PtrInfo: MachinePointerInfo ());
11403	}
11404
11405	SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11406	SelectionDAG &DAG) const {
11407	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11408	"Should only be called for ISD::INSERT_VECTOR_ELT");
11409
11410	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
11411
11412	EVT VT = Op.getValueType();
11413	SDLoc dl(Op);
11414	SDValue V1 = Op.getOperand(i: `0`);
11415	SDValue V2 = Op.getOperand(i: `1`);
11416
11417	if (VT == MVT::v2f64 && C)
11418	return Op;
11419
11420	if (Subtarget.hasP9Vector()) {
11421	// A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11422	// because on P10, it allows this specific insert_vector_elt load pattern to
11423	// utilize the refactored load and store infrastructure in order to exploit
11424	// prefixed loads.
11425	// On targets with inexpensive direct moves (Power9 and up), a
11426	// (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11427	// load since a single precision load will involve conversion to double
11428	// precision on the load followed by another conversion to single precision.
11429	if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11430	(isa<LoadSDNode>(V2))) {
11431	SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11432	SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11433	SDValue InsVecElt =
11434	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11435	BitcastLoad, Op.getOperand(`2`));
11436	return DAG.getBitcast(MVT::v4f32, InsVecElt);
11437	}
11438	}
11439
11440	if (Subtarget.isISA3_1()) {
11441	if ((VT == MVT::v2i64 \|\| VT == MVT::v2f64) && !Subtarget.isPPC64())
11442	return SDValue ();
11443	// On P10, we have legal lowering for constant and variable indices for
11444	// all vectors.
11445	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
11446	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64)
11447	return Op;
11448	}
11449
11450	// Before P10, we have legal lowering for constant indices but not for
11451	// variable ones.
11452	if (!C)
11453	return SDValue ();
11454
11455	// We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11456	if (VT == MVT::v8i16 \|\| VT == MVT::v16i8) {
11457	SDValue Mtvsrz = DAG.getNode(Opcode: PPCISD::MTVSRZ, DL: dl, VT, Operand: V2);
11458	unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / `8`;
11459	unsigned InsertAtElement = C->getZExtValue();
11460	unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11461	if (Subtarget.isLittleEndian()) {
11462	InsertAtByte = (`16` - BytesInEachElement) - InsertAtByte;
11463	}
11464	return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11465	DAG.getConstant(InsertAtByte, dl, MVT::i32));
11466	}
11467	return Op;
11468	}
11469
11470	SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11471	SelectionDAG &DAG) const {
11472	SDLoc dl(Op);
11473	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
11474	SDValue LoadChain = LN->getChain();
11475	SDValue BasePtr = LN->getBasePtr();
11476	EVT VT = Op.getValueType();
11477
11478	if (VT != MVT::v256i1 && VT != MVT::v512i1)
11479	return Op;
11480
11481	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
11482	// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11483	// 2 or 4 vsx registers.
11484	assert((VT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
11485	"Type unsupported without MMA");
11486	assert((VT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
11487	"Type unsupported without paired vector support");
11488	Align Alignment = LN->getAlign();
11489	SmallVector<SDValue, `4`> Loads;
11490	SmallVector<SDValue, `4`> LoadChains;
11491	unsigned NumVecs = VT.getSizeInBits() / `128`;
11492	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
11493	SDValue Load =
11494	DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11495	LN->getPointerInfo().getWithOffset(Idx * `16`),
11496	commonAlignment(Alignment, Idx * `16`),
11497	LN->getMemOperand()->getFlags(), LN->getAAInfo());
11498	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
11499	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
11500	Loads.push_back(Elt: Load);
11501	LoadChains.push_back(Elt: Load.getValue(R: `1`));
11502	}
11503	if (Subtarget.isLittleEndian()) {
11504	std::reverse(first: Loads.begin(), last: Loads.end());
11505	std::reverse(first: LoadChains.begin(), last: LoadChains.end());
11506	}
11507	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11508	SDValue Value =
11509	DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11510	dl, VT, Loads);
11511	SDValue RetOps[] = {Value, TF};
11512	return DAG.getMergeValues(Ops: RetOps, dl);
11513	}
11514
11515	SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11516	SelectionDAG &DAG) const {
11517	SDLoc dl(Op);
11518	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
11519	SDValue StoreChain = SN->getChain();
11520	SDValue BasePtr = SN->getBasePtr();
11521	SDValue Value = SN->getValue();
11522	SDValue Value2 = SN->getValue();
11523	EVT StoreVT = Value.getValueType();
11524
11525	if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11526	return Op;
11527
11528	// Type v256i1 is used for pairs and v512i1 is used for accumulators.
11529	// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11530	// underlying registers individually.
11531	assert((StoreVT != MVT::v512i1 \|\| Subtarget.hasMMA()) &&
11532	"Type unsupported without MMA");
11533	assert((StoreVT != MVT::v256i1 \|\| Subtarget.pairedVectorMemops()) &&
11534	"Type unsupported without paired vector support");
11535	Align Alignment = SN->getAlign();
11536	SmallVector<SDValue, `4`> Stores;
11537	unsigned NumVecs = `2`;
11538	if (StoreVT == MVT::v512i1) {
11539	if (Subtarget.isISAFuture()) {
11540	EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11541	MachineSDNode *ExtNode = DAG.getMachineNode(
11542	PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, `2`), Op.getOperand(`1`));
11543
11544	Value = SDValue (ExtNode, `0`);
11545	Value2 = SDValue (ExtNode, `1`);
11546	} else
11547	Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11548	NumVecs = `4`;
11549	}
11550	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
11551	unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - `1` - Idx : Idx;
11552	SDValue Elt;
11553	if (Subtarget.isISAFuture()) {
11554	VecNum = Subtarget.isLittleEndian() ? `1` - (Idx % `2`) : (Idx % `2`);
11555	Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11556	Idx > `1` ? Value2 : Value,
11557	DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11558	} else
11559	Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11560	DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11561
11562	SDValue Store =
11563	DAG.getStore(Chain: StoreChain, dl, Val: Elt, Ptr: BasePtr,
11564	PtrInfo: SN->getPointerInfo().getWithOffset(O: Idx * `16`),
11565	Alignment: commonAlignment(A: Alignment, Offset: Idx * `16`),
11566	MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo());
11567	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
11568	N2: DAG.getConstant(Val: `16`, DL: dl, VT: BasePtr.getValueType()));
11569	Stores.push_back(Elt: Store);
11570	}
11571	SDValue TF = DAG.getTokenFactor(DL: dl, Vals&: Stores);
11572	return TF;
11573	}
11574
11575	SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11576	SDLoc dl(Op);
11577	if (Op.getValueType() == MVT::v4i32) {
11578	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
11579
11580	SDValue Zero = getCanonicalConstSplat(`0`, `1`, MVT::v4i32, DAG, dl);
11581	// +16 as shift amt.
11582	SDValue Neg16 = getCanonicalConstSplat(-`16`, `4`, MVT::v4i32, DAG, dl);
11583	SDValue RHSSwap = // = vrlw RHS, 16
11584	BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11585
11586	// Shrinkify inputs to v8i16.
11587	LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11588	RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11589	RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11590
11591	// Low parts multiplied together, generating 32-bit results (we ignore the
11592	// top parts).
11593	SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11594	LHS, RHS, DAG, dl, MVT::v4i32);
11595
11596	SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11597	LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11598	// Shift the high parts up 16 bits.
11599	HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11600	Neg16, DAG, dl);
11601	return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11602	} else if (Op.getValueType() == MVT::v16i8) {
11603	SDValue LHS = Op.getOperand(i: `0`), RHS = Op.getOperand(i: `1`);
11604	bool isLittleEndian = Subtarget.isLittleEndian();
11605
11606	// Multiply the even 8-bit parts, producing 16-bit sums.
11607	SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11608	LHS, RHS, DAG, dl, MVT::v8i16);
11609	EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11610
11611	// Multiply the odd 8-bit parts, producing 16-bit sums.
11612	SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11613	LHS, RHS, DAG, dl, MVT::v8i16);
11614	OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11615
11616	// Merge the results together. Because vmuleub and vmuloub are
11617	// instructions with a big-endian bias, we must reverse the
11618	// element numbering and reverse the meaning of "odd" and "even"
11619	// when generating little endian code.
11620	int Ops[`16`];
11621	for (unsigned i = `0`; i != `8`; ++i) {
11622	if (isLittleEndian) {
11623	Ops[i`2` ] = `2`i;
11624	Ops[i`2`+`1`] = `2`i+`16`;
11625	} else {
11626	Ops[i`2` ] = `2`i+`1`;
11627	Ops[i`2`+`1`] = `2`i+`1`+`16`;
11628	}
11629	}
11630	if (isLittleEndian)
11631	return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11632	else
11633	return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11634	} else {
11635	llvm_unreachable("Unknown mul to lower!");
11636	}
11637	}
11638
11639	SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11640	bool IsStrict = Op ->isStrictFPOpcode();
11641	if (Op.getOperand(IsStrict ? `1` : `0`).getValueType() == MVT::f128 &&
11642	!Subtarget.hasP9Vector())
11643	return SDValue ();
11644
11645	return Op;
11646	}
11647
11648	// Custom lowering for fpext vf32 to v2f64
11649	SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11650
11651	assert(Op.getOpcode() == ISD::FP_EXTEND &&
11652	"Should only be called for ISD::FP_EXTEND");
11653
11654	// FIXME: handle extends from half precision float vectors on P9.
11655	// We only want to custom lower an extend from v2f32 to v2f64.
11656	if (Op.getValueType() != MVT::v2f64 \|\|
11657	Op.getOperand(`0`).getValueType() != MVT::v2f32)
11658	return SDValue ();
11659
11660	SDLoc dl(Op);
11661	SDValue Op0 = Op.getOperand(i: `0`);
11662
11663	switch (Op0.getOpcode()) {
11664	default:
11665	return SDValue ();
11666	case ISD::EXTRACT_SUBVECTOR: {
11667	assert(Op0.getNumOperands() == `2` &&
11668	isa<ConstantSDNode>(Op0 ->getOperand(`1`)) &&
11669	"Node should have 2 operands with second one being a constant!");
11670
11671	if (Op0.getOperand(`0`).getValueType() != MVT::v4f32)
11672	return SDValue ();
11673
11674	// Custom lower is only done for high or low doubleword.
11675	int Idx = Op0.getConstantOperandVal(i: `1`);
11676	if (Idx % `2` != `0`)
11677	return SDValue ();
11678
11679	// Since input is v4f32, at this point Idx is either 0 or 2.
11680	// Shift to get the doubleword position we want.
11681	int DWord = Idx >> `1`;
11682
11683	// High and low word positions are different on little endian.
11684	if (Subtarget.isLittleEndian())
11685	DWord ^= `0x1`;
11686
11687	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11688	Op0.getOperand(`0`), DAG.getConstant(DWord, dl, MVT::i32));
11689	}
11690	case ISD::FADD:
11691	case ISD::FMUL:
11692	case ISD::FSUB: {
11693	SDValue NewLoad[`2`];
11694	for (unsigned i = `0`, ie = Op0.getNumOperands(); i != ie; ++i) {
11695	// Ensure both input are loads.
11696	SDValue LdOp = Op0.getOperand(i);
11697	if (LdOp.getOpcode() != ISD::LOAD)
11698	return SDValue ();
11699	// Generate new load node.
11700	LoadSDNode *LD = cast<LoadSDNode>(Val&: LdOp);
11701	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11702	NewLoad[i] = DAG.getMemIntrinsicNode(
11703	PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11704	LD->getMemoryVT(), LD->getMemOperand());
11705	}
11706	SDValue NewOp =
11707	DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[`0`],
11708	NewLoad[`1`], Op0.getNode()->getFlags());
11709	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11710	DAG.getConstant(`0`, dl, MVT::i32));
11711	}
11712	case ISD::LOAD: {
11713	LoadSDNode *LD = cast<LoadSDNode>(Val&: Op0);
11714	SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11715	SDValue NewLd = DAG.getMemIntrinsicNode(
11716	PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11717	LD->getMemoryVT(), LD->getMemOperand());
11718	return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11719	DAG.getConstant(`0`, dl, MVT::i32));
11720	}
11721	}
11722	llvm_unreachable("ERROR:Should return for all cases within swtich.");
11723	}
11724
11725	/// LowerOperation - Provide custom lowering hooks for some operations.
11726	///
11727	SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11728	switch (Op.getOpcode()) {
11729	default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11730	case ISD::FPOW: return lowerPow(Op, DAG);
11731	case ISD::FSIN: return lowerSin(Op, DAG);
11732	case ISD::FCOS: return lowerCos(Op, DAG);
11733	case ISD::FLOG: return lowerLog(Op, DAG);
11734	case ISD::FLOG10: return lowerLog10(Op, DAG);
11735	case ISD::FEXP: return lowerExp(Op, DAG);
11736	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11737	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11738	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11739	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11740	case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11741	case ISD::STRICT_FSETCC:
11742	case ISD::STRICT_FSETCCS:
11743	case ISD::SETCC: return LowerSETCC(Op, DAG);
11744	case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11745	case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11746
11747	case ISD::INLINEASM:
11748	case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11749	// Variable argument lowering.
11750	case ISD::VASTART: return LowerVASTART(Op, DAG);
11751	case ISD::VAARG: return LowerVAARG(Op, DAG);
11752	case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11753
11754	case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11755	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11756	case ISD::GET_DYNAMIC_AREA_OFFSET:
11757	return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11758
11759	// Exception handling lowering.
11760	case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11761	case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11762	case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11763
11764	case ISD::LOAD: return LowerLOAD(Op, DAG);
11765	case ISD::STORE: return LowerSTORE(Op, DAG);
11766	case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11767	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11768	case ISD::STRICT_FP_TO_UINT:
11769	case ISD::STRICT_FP_TO_SINT:
11770	case ISD::FP_TO_UINT:
11771	case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, dl: SDLoc (Op));
11772	case ISD::STRICT_UINT_TO_FP:
11773	case ISD::STRICT_SINT_TO_FP:
11774	case ISD::UINT_TO_FP:
11775	case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11776	case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11777
11778	// Lower 64-bit shifts.
11779	case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11780	case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11781	case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11782
11783	case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11784	case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11785
11786	// Vector-related lowering.
11787	case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11788	case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11789	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11790	case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11791	case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11792	case ISD::MUL: return LowerMUL(Op, DAG);
11793	case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11794	case ISD::STRICT_FP_ROUND:
11795	case ISD::FP_ROUND:
11796	return LowerFP_ROUND(Op, DAG);
11797	case ISD::ROTL: return LowerROTL(Op, DAG);
11798
11799	// For counter-based loop handling.
11800	case ISD::INTRINSIC_W_CHAIN: return SDValue ();
11801
11802	case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11803
11804	// Frame & Return address.
11805	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11806	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11807
11808	case ISD::INTRINSIC_VOID:
11809	return LowerINTRINSIC_VOID(Op, DAG);
11810	case ISD::BSWAP:
11811	return LowerBSWAP(Op, DAG);
11812	case ISD::ATOMIC_CMP_SWAP:
11813	return LowerATOMIC_CMP_SWAP(Op, DAG);
11814	case ISD::ATOMIC_STORE:
11815	return LowerATOMIC_LOAD_STORE(Op, DAG);
11816	case ISD::IS_FPCLASS:
11817	return LowerIS_FPCLASS(Op, DAG);
11818	}
11819	}
11820
11821	void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11822	SmallVectorImpl<SDValue>&Results,
11823	SelectionDAG &DAG) const {
11824	SDLoc dl(N);
11825	switch (N->getOpcode()) {
11826	default:
11827	llvm_unreachable("Do not know how to custom type legalize this operation!");
11828	case ISD::ATOMIC_LOAD: {
11829	SDValue Res = LowerATOMIC_LOAD_STORE(Op: SDValue (N, `0`), DAG);
11830	Results.push_back(Elt: Res);
11831	Results.push_back(Elt: Res.getValue(R: `1`));
11832	break;
11833	}
11834	case ISD::READCYCLECOUNTER: {
11835	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11836	SDValue RTB = DAG.getNode(Opcode: PPCISD::READ_TIME_BASE, DL: dl, VTList: VTs, N: N->getOperand(Num: `0`));
11837
11838	Results.push_back(
11839	DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(`1`)));
11840	Results.push_back(Elt: RTB.getValue(R: `2`));
11841	break;
11842	}
11843	case ISD::INTRINSIC_W_CHAIN: {
11844	if (N->getConstantOperandVal(`1`) != Intrinsic::loop_decrement)
11845	break;
11846
11847	assert(N->getValueType(`0`) == MVT::i1 &&
11848	"Unexpected result type for CTR decrement intrinsic");
11849	EVT SVT = getSetCCResultType(DL: DAG.getDataLayout(), C&: *DAG.getContext(),
11850	VT: N->getValueType(ResNo: `0`));
11851	SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11852	SDValue NewInt = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: VTs, N1: N->getOperand(Num: `0`),
11853	N2: N->getOperand(Num: `1`));
11854
11855	Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11856	Results.push_back(Elt: NewInt.getValue(R: `1`));
11857	break;
11858	}
11859	case ISD::INTRINSIC_WO_CHAIN: {
11860	switch (N->getConstantOperandVal(Num: `0`)) {
11861	case Intrinsic::ppc_pack_longdouble:
11862	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11863	N->getOperand(`2`), N->getOperand(`1`)));
11864	break;
11865	case Intrinsic::ppc_maxfe:
11866	case Intrinsic::ppc_minfe:
11867	case Intrinsic::ppc_fnmsub:
11868	case Intrinsic::ppc_convert_f128_to_ppcf128:
11869	Results.push_back(Elt: LowerINTRINSIC_WO_CHAIN(Op: SDValue (N, `0`), DAG));
11870	break;
11871	}
11872	break;
11873	}
11874	case ISD::VAARG: {
11875	if (!Subtarget.isSVR4ABI() \|\| Subtarget.isPPC64())
11876	return;
11877
11878	EVT VT = N->getValueType(ResNo: `0`);
11879
11880	if (VT == MVT::i64) {
11881	SDValue NewNode = LowerVAARG(Op: SDValue (N, `1`), DAG);
11882
11883	Results.push_back(Elt: NewNode);
11884	Results.push_back(Elt: NewNode.getValue(R: `1`));
11885	}
11886	return;
11887	}
11888	case ISD::STRICT_FP_TO_SINT:
11889	case ISD::STRICT_FP_TO_UINT:
11890	case ISD::FP_TO_SINT:
11891	case ISD::FP_TO_UINT: {
11892	// LowerFP_TO_INT() can only handle f32 and f64.
11893	if (N->getOperand(N->isStrictFPOpcode() ? `1` : `0`).getValueType() ==
11894	MVT::ppcf128)
11895	return;
11896	SDValue LoweredValue = LowerFP_TO_INT(Op: SDValue (N, `0`), DAG, dl);
11897	Results.push_back(Elt: LoweredValue);
11898	if (N->isStrictFPOpcode())
11899	Results.push_back(Elt: LoweredValue.getValue(R: `1`));
11900	return;
11901	}
11902	case ISD::TRUNCATE: {
11903	if (!N->getValueType(ResNo: `0`).isVector())
11904	return;
11905	SDValue Lowered = LowerTRUNCATEVector(Op: SDValue (N, `0`), DAG);
11906	if (Lowered)
11907	Results.push_back(Elt: Lowered);
11908	return;
11909	}
11910	case ISD::FSHL:
11911	case ISD::FSHR:
11912	// Don't handle funnel shifts here.
11913	return;
11914	case ISD::BITCAST:
11915	// Don't handle bitcast here.
11916	return;
11917	case ISD::FP_EXTEND:
11918	SDValue Lowered = LowerFP_EXTEND(Op: SDValue (N, `0`), DAG);
11919	if (Lowered)
11920	Results.push_back(Elt: Lowered);
11921	return;
11922	}
11923	}
11924
11925	//===----------------------------------------------------------------------===//
11926	// Other Lowering Code
11927	//===----------------------------------------------------------------------===//
11928
11929	static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
11930	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11931	Function *Func = Intrinsic::getDeclaration(M, id: Id);
11932	return Builder.CreateCall(Callee: Func, Args: {});
11933	}
11934
11935	// The mappings for emitLeading/TrailingFence is taken from
11936	// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11937	Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
11938	Instruction *Inst,
11939	AtomicOrdering Ord) const {
11940	if (Ord == AtomicOrdering::SequentiallyConsistent)
11941	return callIntrinsic(Builder, Intrinsic::ppc_sync);
11942	if (isReleaseOrStronger(Ord))
11943	return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11944	return nullptr;
11945	}
11946
11947	Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
11948	Instruction *Inst,
11949	AtomicOrdering Ord) const {
11950	if (Inst->hasAtomicLoad() && isAcquireOrStronger(AO: Ord)) {
11951	// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11952	// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11953	// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11954	if (isa<LoadInst>(Inst))
11955	return Builder.CreateCall(
11956	Intrinsic::getDeclaration(
11957	Builder.GetInsertBlock()->getParent()->getParent(),
11958	Intrinsic::ppc_cfence, {Inst->getType()}),
11959	{Inst});
11960	// FIXME: Can use isync for rmw operation.
11961	return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11962	}
11963	return nullptr;
11964	}
11965
11966	MachineBasicBlock *
11967	PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11968	unsigned AtomicSize,
11969	unsigned BinOpcode,
11970	unsigned CmpOpcode,
11971	unsigned CmpPred) const {
11972	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11973	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11974
11975	auto LoadMnemonic = PPC::LDARX;
11976	auto StoreMnemonic = PPC::STDCX;
11977	switch (AtomicSize) {
11978	default:
11979	llvm_unreachable("Unexpected size of atomic entity");
11980	case `1`:
11981	LoadMnemonic = PPC::LBARX;
11982	StoreMnemonic = PPC::STBCX;
11983	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11984	break;
11985	case `2`:
11986	LoadMnemonic = PPC::LHARX;
11987	StoreMnemonic = PPC::STHCX;
11988	assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11989	break;
11990	case `4`:
11991	LoadMnemonic = PPC::LWARX;
11992	StoreMnemonic = PPC::STWCX;
11993	break;
11994	case `8`:
11995	LoadMnemonic = PPC::LDARX;
11996	StoreMnemonic = PPC::STDCX;
11997	break;
11998	}
11999
12000	const BasicBlock *LLVM_BB = BB->getBasicBlock();
12001	MachineFunction *F = BB->getParent();
12002	MachineFunction::iterator It = ++BB->getIterator();
12003
12004	Register dest = MI.getOperand(i: `0`).getReg();
12005	Register ptrA = MI.getOperand(i: `1`).getReg();
12006	Register ptrB = MI.getOperand(i: `2`).getReg();
12007	Register incr = MI.getOperand(i: `3`).getReg();
12008	DebugLoc dl = MI.getDebugLoc();
12009
12010	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12011	MachineBasicBlock *loop2MBB =
12012	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
12013	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12014	F->insert(MBBI: It, MBB: loopMBB);
12015	if (CmpOpcode)
12016	F->insert(MBBI: It, MBB: loop2MBB);
12017	F->insert(MBBI: It, MBB: exitMBB);
12018	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
12019	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
12020	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12021
12022	MachineRegisterInfo &RegInfo = F->getRegInfo();
12023	Register TmpReg = (!BinOpcode) ? incr :
12024	RegInfo.createVirtualRegister( AtomicSize == `8` ? &PPC::G8RCRegClass
12025	: &PPC::GPRCRegClass);
12026
12027	// thisMBB:
12028	// ...
12029	// fallthrough --> loopMBB
12030	BB->addSuccessor(Succ: loopMBB);
12031
12032	// loopMBB:
12033	// l[wd]arx dest, ptr
12034	// add r0, dest, incr
12035	// st[wd]cx. r0, ptr
12036	// bne- loopMBB
12037	// fallthrough --> exitMBB
12038
12039	// For max/min...
12040	// loopMBB:
12041	// l[wd]arx dest, ptr
12042	// cmpl?[wd] dest, incr
12043	// bgt exitMBB
12044	// loop2MBB:
12045	// st[wd]cx. dest, ptr
12046	// bne- loopMBB
12047	// fallthrough --> exitMBB
12048
12049	BB = loopMBB;
12050	BuildMI(BB, dl, TII->get(Opcode: LoadMnemonic), dest)
12051	.addReg(ptrA).addReg(ptrB);
12052	if (BinOpcode)
12053	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: BinOpcode), DestReg: TmpReg).addReg(RegNo: incr).addReg(RegNo: dest);
12054	if (CmpOpcode) {
12055	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12056	// Signed comparisons of byte or halfword values must be sign-extended.
12057	if (CmpOpcode == PPC::CMPW && AtomicSize < `4`) {
12058	Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12059	BuildMI(BB, dl, TII->get(AtomicSize == `1` ? PPC::EXTSB : PPC::EXTSH),
12060	ExtReg).addReg(dest);
12061	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: ExtReg).addReg(RegNo: incr);
12062	} else
12063	BuildMI(BB, MIMD: dl, MCID: TII->get(Opcode: CmpOpcode), DestReg: CrReg).addReg(RegNo: dest).addReg(RegNo: incr);
12064
12065	BuildMI(BB, dl, TII->get(PPC::BCC))
12066	.addImm(CmpPred)
12067	.addReg(CrReg)
12068	.addMBB(exitMBB);
12069	BB->addSuccessor(Succ: loop2MBB);
12070	BB->addSuccessor(Succ: exitMBB);
12071	BB = loop2MBB;
12072	}
12073	BuildMI(BB, dl, TII->get(Opcode: StoreMnemonic))
12074	.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12075	BuildMI(BB, dl, TII->get(PPC::BCC))
12076	.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12077	BB->addSuccessor(Succ: loopMBB);
12078	BB->addSuccessor(Succ: exitMBB);
12079
12080	// exitMBB:
12081	// ...
12082	BB = exitMBB;
12083	return BB;
12084	}
12085
12086	static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12087	switch(MI.getOpcode()) {
12088	default:
12089	return false;
12090	case PPC::COPY:
12091	return TII->isSignExtended(Reg: MI.getOperand(i: `1`).getReg(),
12092	MRI: &MI.getMF()->getRegInfo());
12093	case PPC::LHA:
12094	case PPC::LHA8:
12095	case PPC::LHAU:
12096	case PPC::LHAU8:
12097	case PPC::LHAUX:
12098	case PPC::LHAUX8:
12099	case PPC::LHAX:
12100	case PPC::LHAX8:
12101	case PPC::LWA:
12102	case PPC::LWAUX:
12103	case PPC::LWAX:
12104	case PPC::LWAX_32:
12105	case PPC::LWA_32:
12106	case PPC::PLHA:
12107	case PPC::PLHA8:
12108	case PPC::PLHA8pc:
12109	case PPC::PLHApc:
12110	case PPC::PLWA:
12111	case PPC::PLWA8:
12112	case PPC::PLWA8pc:
12113	case PPC::PLWApc:
12114	case PPC::EXTSB:
12115	case PPC::EXTSB8:
12116	case PPC::EXTSB8_32_64:
12117	case PPC::EXTSB8_rec:
12118	case PPC::EXTSB_rec:
12119	case PPC::EXTSH:
12120	case PPC::EXTSH8:
12121	case PPC::EXTSH8_32_64:
12122	case PPC::EXTSH8_rec:
12123	case PPC::EXTSH_rec:
12124	case PPC::EXTSW:
12125	case PPC::EXTSWSLI:
12126	case PPC::EXTSWSLI_32_64:
12127	case PPC::EXTSWSLI_32_64_rec:
12128	case PPC::EXTSWSLI_rec:
12129	case PPC::EXTSW_32:
12130	case PPC::EXTSW_32_64:
12131	case PPC::EXTSW_32_64_rec:
12132	case PPC::EXTSW_rec:
12133	case PPC::SRAW:
12134	case PPC::SRAWI:
12135	case PPC::SRAWI_rec:
12136	case PPC::SRAW_rec:
12137	return true;
12138	}
12139	return false;
12140	}
12141
12142	MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12143	MachineInstr &MI, MachineBasicBlock *BB,
12144	bool is8bit, // operation
12145	unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12146	// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12147	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12148
12149	// If this is a signed comparison and the value being compared is not known
12150	// to be sign extended, sign extend it here.
12151	DebugLoc dl = MI.getDebugLoc();
12152	MachineFunction *F = BB->getParent();
12153	MachineRegisterInfo &RegInfo = F->getRegInfo();
12154	Register incr = MI.getOperand(i: `3`).getReg();
12155	bool IsSignExtended =
12156	incr.isVirtual() && isSignExtended(MI&: *RegInfo.getVRegDef(Reg: incr), TII);
12157
12158	if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12159	Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12160	BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12161	.addReg(MI.getOperand(`3`).getReg());
12162	MI.getOperand(i: `3`).setReg(ValueReg);
12163	incr = ValueReg;
12164	}
12165	// If we support part-word atomic mnemonics, just use them
12166	if (Subtarget.hasPartwordAtomics())
12167	return EmitAtomicBinary(MI, BB, AtomicSize: is8bit ? `1` : `2`, BinOpcode, CmpOpcode,
12168	CmpPred);
12169
12170	// In 64 bit mode we have to use 64 bits for addresses, even though the
12171	// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12172	// registers without caring whether they're 32 or 64, but here we're
12173	// doing actual arithmetic on the addresses.
12174	bool is64bit = Subtarget.isPPC64();
12175	bool isLittleEndian = Subtarget.isLittleEndian();
12176	unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12177
12178	const BasicBlock *LLVM_BB = BB->getBasicBlock();
12179	MachineFunction::iterator It = ++BB->getIterator();
12180
12181	Register dest = MI.getOperand(i: `0`).getReg();
12182	Register ptrA = MI.getOperand(i: `1`).getReg();
12183	Register ptrB = MI.getOperand(i: `2`).getReg();
12184
12185	MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12186	MachineBasicBlock *loop2MBB =
12187	CmpOpcode ? F->CreateMachineBasicBlock(BB: LLVM_BB) : nullptr;
12188	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12189	F->insert(MBBI: It, MBB: loopMBB);
12190	if (CmpOpcode)
12191	F->insert(MBBI: It, MBB: loop2MBB);
12192	F->insert(MBBI: It, MBB: exitMBB);
12193	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
12194	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
12195	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12196
12197	const TargetRegisterClass *RC =
12198	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12199	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12200
12201	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
12202	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12203	Register ShiftReg =
12204	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
12205	Register Incr2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12206	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12207	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12208	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12209	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12210	Register Tmp3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12211	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
12212	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12213	Register SrwDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12214	Register Ptr1Reg;
12215	Register TmpReg =
12216	(!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
12217
12218	// thisMBB:
12219	// ...
12220	// fallthrough --> loopMBB
12221	BB->addSuccessor(Succ: loopMBB);
12222
12223	// The 4-byte load must be aligned, while a char or short may be
12224	// anywhere in the word. Hence all this nasty bookkeeping code.
12225	// add ptr1, ptrA, ptrB [copy if ptrA==0]
12226	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12227	// xori shift, shift1, 24 [16]
12228	// rlwinm ptr, ptr1, 0, 0, 29
12229	// slw incr2, incr, shift
12230	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12231	// slw mask, mask2, shift
12232	// loopMBB:
12233	// lwarx tmpDest, ptr
12234	// add tmp, tmpDest, incr2
12235	// andc tmp2, tmpDest, mask
12236	// and tmp3, tmp, mask
12237	// or tmp4, tmp3, tmp2
12238	// stwcx. tmp4, ptr
12239	// bne- loopMBB
12240	// fallthrough --> exitMBB
12241	// srw SrwDest, tmpDest, shift
12242	// rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12243	if (ptrA != ZeroReg) {
12244	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
12245	BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12246	.addReg(ptrA)
12247	.addReg(ptrB);
12248	} else {
12249	Ptr1Reg = ptrB;
12250	}
12251	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
12252	// mode.
12253	BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12254	.addReg(Ptr1Reg, `0`, is64bit ? PPC::sub_32 : `0`)
12255	.addImm(`3`)
12256	.addImm(`27`)
12257	.addImm(is8bit ? `28` : `27`);
12258	if (!isLittleEndian)
12259	BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12260	.addReg(Shift1Reg)
12261	.addImm(is8bit ? `24` : `16`);
12262	if (is64bit)
12263	BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12264	.addReg(Ptr1Reg)
12265	.addImm(`0`)
12266	.addImm(`61`);
12267	else
12268	BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12269	.addReg(Ptr1Reg)
12270	.addImm(`0`)
12271	.addImm(`0`)
12272	.addImm(`29`);
12273	BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12274	if (is8bit)
12275	BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(`255`);
12276	else {
12277	BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(`0`);
12278	BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12279	.addReg(Mask3Reg)
12280	.addImm(`65535`);
12281	}
12282	BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12283	.addReg(Mask2Reg)
12284	.addReg(ShiftReg);
12285
12286	BB = loopMBB;
12287	BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12288	.addReg(ZeroReg)
12289	.addReg(PtrReg);
12290	if (BinOpcode)
12291	BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12292	.addReg(Incr2Reg)
12293	.addReg(TmpDestReg);
12294	BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12295	.addReg(TmpDestReg)
12296	.addReg(MaskReg);
12297	BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12298	if (CmpOpcode) {
12299	// For unsigned comparisons, we can directly compare the shifted values.
12300	// For signed comparisons we shift and sign extend.
12301	Register SReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12302	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12303	BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12304	.addReg(TmpDestReg)
12305	.addReg(MaskReg);
12306	unsigned ValueReg = SReg;
12307	unsigned CmpReg = Incr2Reg;
12308	if (CmpOpcode == PPC::CMPW) {
12309	ValueReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12310	BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12311	.addReg(SReg)
12312	.addReg(ShiftReg);
12313	Register ValueSReg = RegInfo.createVirtualRegister(RegClass: GPRC);
12314	BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12315	.addReg(ValueReg);
12316	ValueReg = ValueSReg;
12317	CmpReg = incr;
12318	}
12319	BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12320	BuildMI(BB, dl, TII->get(PPC::BCC))
12321	.addImm(CmpPred)
12322	.addReg(CrReg)
12323	.addMBB(exitMBB);
12324	BB->addSuccessor(Succ: loop2MBB);
12325	BB->addSuccessor(Succ: exitMBB);
12326	BB = loop2MBB;
12327	}
12328	BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12329	BuildMI(BB, dl, TII->get(PPC::STWCX))
12330	.addReg(Tmp4Reg)
12331	.addReg(ZeroReg)
12332	.addReg(PtrReg);
12333	BuildMI(BB, dl, TII->get(PPC::BCC))
12334	.addImm(PPC::PRED_NE)
12335	.addReg(PPC::CR0)
12336	.addMBB(loopMBB);
12337	BB->addSuccessor(Succ: loopMBB);
12338	BB->addSuccessor(Succ: exitMBB);
12339
12340	// exitMBB:
12341	// ...
12342	BB = exitMBB;
12343	// Since the shift amount is not a constant, we need to clear
12344	// the upper bits with a separate RLWINM.
12345	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12346	.addReg(SrwDestReg)
12347	.addImm(`0`)
12348	.addImm(is8bit ? `24` : `16`)
12349	.addImm(`31`);
12350	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12351	.addReg(TmpDestReg)
12352	.addReg(ShiftReg);
12353	return BB;
12354	}
12355
12356	llvm::MachineBasicBlock *
12357	PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
12358	MachineBasicBlock MBB) const* {
12359	DebugLoc DL = MI.getDebugLoc();
12360	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12361	const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12362
12363	MachineFunction *MF = MBB->getParent();
12364	MachineRegisterInfo &MRI = MF->getRegInfo();
12365
12366	const BasicBlock *BB = MBB->getBasicBlock();
12367	MachineFunction::iterator I = ++MBB->getIterator();
12368
12369	Register DstReg = MI.getOperand(i: `0`).getReg();
12370	const TargetRegisterClass *RC = MRI.getRegClass(Reg: DstReg);
12371	assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12372	Register mainDstReg = MRI.createVirtualRegister(RegClass: RC);
12373	Register restoreDstReg = MRI.createVirtualRegister(RegClass: RC);
12374
12375	MVT PVT = getPointerTy(DL: MF->getDataLayout());
12376	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
12377	"Invalid Pointer Size!");
12378	// For v = setjmp(buf), we generate
12379	//
12380	// thisMBB:
12381	// SjLjSetup mainMBB
12382	// bl mainMBB
12383	// v_restore = 1
12384	// b sinkMBB
12385	//
12386	// mainMBB:
12387	// buf[LabelOffset] = LR
12388	// v_main = 0
12389	//
12390	// sinkMBB:
12391	// v = phi(main, restore)
12392	//
12393
12394	MachineBasicBlock *thisMBB = MBB;
12395	MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12396	MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12397	MF->insert(MBBI: I, MBB: mainMBB);
12398	MF->insert(MBBI: I, MBB: sinkMBB);
12399
12400	MachineInstrBuilder MIB;
12401
12402	// Transfer the remainder of BB and its successor edges to sinkMBB.
12403	sinkMBB->splice(Where: sinkMBB->begin(), Other: MBB,
12404	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
12405	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
12406
12407	// Note that the structure of the jmp_buf used here is not compatible
12408	// with that used by libc, and is not designed to be. Specifically, it
12409	// stores only those 'reserved' registers that LLVM does not otherwise
12410	// understand how to spill. Also, by convention, by the time this
12411	// intrinsic is called, Clang has already stored the frame address in the
12412	// first slot of the buffer and stack address in the third. Following the
12413	// X86 target code, we'll store the jump address in the second slot. We also
12414	// need to save the TOC pointer (R2) to handle jumps between shared
12415	// libraries, and that will be stored in the fourth slot. The thread
12416	// identifier (R13) is not affected.
12417
12418	// thisMBB:
12419	const int64_t LabelOffset = `1` * PVT.getStoreSize();
12420	const int64_t TOCOffset = `3` * PVT.getStoreSize();
12421	const int64_t BPOffset = `4` * PVT.getStoreSize();
12422
12423	// Prepare IP either in reg.
12424	const TargetRegisterClass *PtrRC = getRegClassFor(VT: PVT);
12425	Register LabelReg = MRI.createVirtualRegister(RegClass: PtrRC);
12426	Register BufReg = MI.getOperand(i: `1`).getReg();
12427
12428	if (Subtarget.is64BitELFABI()) {
12429	setUsesTOCBasePtr(*MBB->getParent());
12430	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12431	.addReg(PPC::X2)
12432	.addImm(TOCOffset)
12433	.addReg(BufReg)
12434	.cloneMemRefs(MI);
12435	}
12436
12437	// Naked functions never have a base pointer, and so we use r1. For all
12438	// other functions, this decision must be delayed until during PEI.
12439	unsigned BaseReg;
12440	if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12441	BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12442	else
12443	BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12444
12445	MIB = BuildMI(*thisMBB, MI, DL,
12446	TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12447	.addReg(BaseReg)
12448	.addImm(BPOffset)
12449	.addReg(BufReg)
12450	.cloneMemRefs(MI);
12451
12452	// Setup
12453	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12454	MIB.addRegMask(Mask: TRI->getNoPreservedMask());
12455
12456	BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(`1`);
12457
12458	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12459	.addMBB(mainMBB);
12460	MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12461
12462	thisMBB->addSuccessor(Succ: mainMBB, Prob: BranchProbability::getZero());
12463	thisMBB->addSuccessor(Succ: sinkMBB, Prob: BranchProbability::getOne());
12464
12465	// mainMBB:
12466	// mainDstReg = 0
12467	MIB =
12468	BuildMI(mainMBB, DL,
12469	TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12470
12471	// Store IP
12472	if (Subtarget.isPPC64()) {
12473	MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12474	.addReg(LabelReg)
12475	.addImm(LabelOffset)
12476	.addReg(BufReg);
12477	} else {
12478	MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12479	.addReg(LabelReg)
12480	.addImm(LabelOffset)
12481	.addReg(BufReg);
12482	}
12483	MIB.cloneMemRefs(OtherMI: MI);
12484
12485	BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(`0`);
12486	mainMBB->addSuccessor(Succ: sinkMBB);
12487
12488	// sinkMBB:
12489	BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12490	TII->get(PPC::PHI), DstReg)
12491	.addReg(mainDstReg).addMBB(mainMBB)
12492	.addReg(restoreDstReg).addMBB(thisMBB);
12493
12494	MI.eraseFromParent();
12495	return sinkMBB;
12496	}
12497
12498	MachineBasicBlock *
12499	PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
12500	MachineBasicBlock MBB) const* {
12501	DebugLoc DL = MI.getDebugLoc();
12502	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12503
12504	MachineFunction *MF = MBB->getParent();
12505	MachineRegisterInfo &MRI = MF->getRegInfo();
12506
12507	MVT PVT = getPointerTy(DL: MF->getDataLayout());
12508	assert((PVT == MVT::i64 \|\| PVT == MVT::i32) &&
12509	"Invalid Pointer Size!");
12510
12511	const TargetRegisterClass *RC =
12512	(PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12513	Register Tmp = MRI.createVirtualRegister(RegClass: RC);
12514	// Since FP is only updated here but NOT referenced, it's treated as GPR.
12515	unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12516	unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12517	unsigned BP =
12518	(PVT == MVT::i64)
12519	? PPC::X30
12520	: (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12521	: PPC::R30);
12522
12523	MachineInstrBuilder MIB;
12524
12525	const int64_t LabelOffset = `1` * PVT.getStoreSize();
12526	const int64_t SPOffset = `2` * PVT.getStoreSize();
12527	const int64_t TOCOffset = `3` * PVT.getStoreSize();
12528	const int64_t BPOffset = `4` * PVT.getStoreSize();
12529
12530	Register BufReg = MI.getOperand(i: `0`).getReg();
12531
12532	// Reload FP (the jumped-to function may not have had a
12533	// frame pointer, and if so, then its r31 will be restored
12534	// as necessary).
12535	if (PVT == MVT::i64) {
12536	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12537	.addImm(`0`)
12538	.addReg(BufReg);
12539	} else {
12540	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12541	.addImm(`0`)
12542	.addReg(BufReg);
12543	}
12544	MIB.cloneMemRefs(OtherMI: MI);
12545
12546	// Reload IP
12547	if (PVT == MVT::i64) {
12548	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12549	.addImm(LabelOffset)
12550	.addReg(BufReg);
12551	} else {
12552	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12553	.addImm(LabelOffset)
12554	.addReg(BufReg);
12555	}
12556	MIB.cloneMemRefs(OtherMI: MI);
12557
12558	// Reload SP
12559	if (PVT == MVT::i64) {
12560	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12561	.addImm(SPOffset)
12562	.addReg(BufReg);
12563	} else {
12564	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12565	.addImm(SPOffset)
12566	.addReg(BufReg);
12567	}
12568	MIB.cloneMemRefs(OtherMI: MI);
12569
12570	// Reload BP
12571	if (PVT == MVT::i64) {
12572	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12573	.addImm(BPOffset)
12574	.addReg(BufReg);
12575	} else {
12576	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12577	.addImm(BPOffset)
12578	.addReg(BufReg);
12579	}
12580	MIB.cloneMemRefs(OtherMI: MI);
12581
12582	// Reload TOC
12583	if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12584	setUsesTOCBasePtr(*MBB->getParent());
12585	MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12586	.addImm(TOCOffset)
12587	.addReg(BufReg)
12588	.cloneMemRefs(MI);
12589	}
12590
12591	// Jump
12592	BuildMI(*MBB, MI, DL,
12593	TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12594	BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12595
12596	MI.eraseFromParent();
12597	return MBB;
12598	}
12599
12600	bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
12601	// If the function specifically requests inline stack probes, emit them.
12602	if (MF.getFunction().hasFnAttribute(Kind: "probe-stack"))
12603	return MF.getFunction().getFnAttribute(Kind: "probe-stack").getValueAsString() ==
12604	"inline-asm";
12605	return false;
12606	}
12607
12608	unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
12609	const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12610	unsigned StackAlign = TFI->getStackAlignment();
12611	assert(StackAlign >= `1` && isPowerOf2_32(StackAlign) &&
12612	"Unexpected stack alignment");
12613	// The default stack probe size is 4096 if the function has no
12614	// stack-probe-size attribute.
12615	const Function &Fn = MF.getFunction();
12616	unsigned StackProbeSize =
12617	Fn.getFnAttributeAsParsedInteger(Kind: "stack-probe-size", Default: `4096`);
12618	// Round down to the stack alignment.
12619	StackProbeSize &= ~(StackAlign - `1`);
12620	return StackProbeSize ? StackProbeSize : StackAlign;
12621	}
12622
12623	// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12624	// into three phases. In the first phase, it uses pseudo instruction
12625	// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12626	// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12627	// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12628	// MaxCallFrameSize so that it can calculate correct data area pointer.
12629	MachineBasicBlock *
12630	PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12631	MachineBasicBlock MBB) const* {
12632	const bool isPPC64 = Subtarget.isPPC64();
12633	MachineFunction *MF = MBB->getParent();
12634	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12635	DebugLoc DL = MI.getDebugLoc();
12636	const unsigned ProbeSize = getStackProbeSize(MF: *MF);
12637	const BasicBlock *ProbedBB = MBB->getBasicBlock();
12638	MachineRegisterInfo &MRI = MF->getRegInfo();
12639	// The CFG of probing stack looks as
12640	// +-----+
12641	// \| MBB \|
12642	// +--+--+
12643	// \|
12644	// +----v----+
12645	// +--->+ TestMBB +---+
12646	// \| +----+----+ \|
12647	// \| \| \|
12648	// \| +-----v----+ \|
12649	// +---+ BlockMBB \| \|
12650	// +----------+ \|
12651	// \|
12652	// +---------+ \|
12653	// \| TailMBB +<--+
12654	// +---------+
12655	// In MBB, calculate previous frame pointer and final stack pointer.
12656	// In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12657	// TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12658	// TailMBB is spliced via \p MI.
12659	MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
12660	MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
12661	MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(BB: ProbedBB);
12662
12663	MachineFunction::iterator MBBIter = ++MBB->getIterator();
12664	MF->insert(MBBI: MBBIter, MBB: TestMBB);
12665	MF->insert(MBBI: MBBIter, MBB: BlockMBB);
12666	MF->insert(MBBI: MBBIter, MBB: TailMBB);
12667
12668	const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12669	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12670
12671	Register DstReg = MI.getOperand(i: `0`).getReg();
12672	Register NegSizeReg = MI.getOperand(i: `1`).getReg();
12673	Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12674	Register FinalStackPtr = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12675	Register FramePointer = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12676	Register ActualNegSizeReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12677
12678	// Since value of NegSizeReg might be realigned in prologepilog, insert a
12679	// PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12680	// NegSize.
12681	unsigned ProbeOpc;
12682	if (!MRI.hasOneNonDBGUse(NegSizeReg))
12683	ProbeOpc =
12684	isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12685	else
12686	// By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12687	// and NegSizeReg will be allocated in the same phyreg to avoid
12688	// redundant copy when NegSizeReg has only one use which is current MI and
12689	// will be replaced by PREPARE_PROBED_ALLOCA then.
12690	ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12691	: PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12692	BuildMI(BB&: *MBB, I&: {MI}, MIMD: DL, MCID: TII->get(Opcode: ProbeOpc), DestReg: FramePointer)
12693	.addDef(RegNo: ActualNegSizeReg)
12694	.addReg(RegNo: NegSizeReg)
12695	.add(MO: MI.getOperand(i: `2`))
12696	.add(MO: MI.getOperand(i: `3`));
12697
12698	// Calculate final stack pointer, which equals to SP + ActualNegSize.
12699	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12700	FinalStackPtr)
12701	.addReg(SPReg)
12702	.addReg(ActualNegSizeReg);
12703
12704	// Materialize a scratch register for update.
12705	int64_t NegProbeSize = -(int64_t)ProbeSize;
12706	assert(isInt<`32`>(NegProbeSize) && "Unhandled probe size!");
12707	Register ScratchReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12708	if (!isInt<`16`>(x: NegProbeSize)) {
12709	Register TempReg = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12710	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12711	.addImm(NegProbeSize >> `16`);
12712	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12713	ScratchReg)
12714	.addReg(TempReg)
12715	.addImm(NegProbeSize & `0xFFFF`);
12716	} else
12717	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12718	.addImm(NegProbeSize);
12719
12720	{
12721	// Probing leading residual part.
12722	Register Div = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12723	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12724	.addReg(ActualNegSizeReg)
12725	.addReg(ScratchReg);
12726	Register Mul = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12727	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12728	.addReg(Div)
12729	.addReg(ScratchReg);
12730	Register NegMod = MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12731	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12732	.addReg(Mul)
12733	.addReg(ActualNegSizeReg);
12734	BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12735	.addReg(FramePointer)
12736	.addReg(SPReg)
12737	.addReg(NegMod);
12738	}
12739
12740	{
12741	// Remaining part should be multiple of ProbeSize.
12742	Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12743	BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12744	.addReg(SPReg)
12745	.addReg(FinalStackPtr);
12746	BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12747	.addImm(PPC::PRED_EQ)
12748	.addReg(CmpResult)
12749	.addMBB(TailMBB);
12750	TestMBB->addSuccessor(Succ: BlockMBB);
12751	TestMBB->addSuccessor(Succ: TailMBB);
12752	}
12753
12754	{
12755	// Touch the block.
12756	// \|P...\|P...\|P...
12757	BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12758	.addReg(FramePointer)
12759	.addReg(SPReg)
12760	.addReg(ScratchReg);
12761	BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12762	BlockMBB->addSuccessor(Succ: TestMBB);
12763	}
12764
12765	// Calculation of MaxCallFrameSize is deferred to prologepilog, use
12766	// DYNAREAOFFSET pseudo instruction to get the future result.
12767	Register MaxCallFrameSizeReg =
12768	MRI.createVirtualRegister(RegClass: isPPC64 ? G8RC : GPRC);
12769	BuildMI(TailMBB, DL,
12770	TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12771	MaxCallFrameSizeReg)
12772	.add(MI.getOperand(`2`))
12773	.add(MI.getOperand(`3`));
12774	BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12775	.addReg(SPReg)
12776	.addReg(MaxCallFrameSizeReg);
12777
12778	// Splice instructions after MI to TailMBB.
12779	TailMBB->splice(Where: TailMBB->end(), Other: MBB,
12780	From: std::next(x: MachineBasicBlock::iterator (MI)), To: MBB->end());
12781	TailMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
12782	MBB->addSuccessor(Succ: TestMBB);
12783
12784	// Delete the pseudo instruction.
12785	MI.eraseFromParent();
12786
12787	++NumDynamicAllocaProbed;
12788	return TailMBB;
12789	}
12790
12791	static bool IsSelectCC(MachineInstr &MI) {
12792	switch (MI.getOpcode()) {
12793	case PPC::SELECT_CC_I4:
12794	case PPC::SELECT_CC_I8:
12795	case PPC::SELECT_CC_F4:
12796	case PPC::SELECT_CC_F8:
12797	case PPC::SELECT_CC_F16:
12798	case PPC::SELECT_CC_VRRC:
12799	case PPC::SELECT_CC_VSFRC:
12800	case PPC::SELECT_CC_VSSRC:
12801	case PPC::SELECT_CC_VSRC:
12802	case PPC::SELECT_CC_SPE4:
12803	case PPC::SELECT_CC_SPE:
12804	return true;
12805	default:
12806	return false;
12807	}
12808	}
12809
12810	static bool IsSelect(MachineInstr &MI) {
12811	switch (MI.getOpcode()) {
12812	case PPC::SELECT_I4:
12813	case PPC::SELECT_I8:
12814	case PPC::SELECT_F4:
12815	case PPC::SELECT_F8:
12816	case PPC::SELECT_F16:
12817	case PPC::SELECT_SPE:
12818	case PPC::SELECT_SPE4:
12819	case PPC::SELECT_VRRC:
12820	case PPC::SELECT_VSFRC:
12821	case PPC::SELECT_VSSRC:
12822	case PPC::SELECT_VSRC:
12823	return true;
12824	default:
12825	return false;
12826	}
12827	}
12828
12829	MachineBasicBlock *
12830	PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12831	MachineBasicBlock BB) const* {
12832	if (MI.getOpcode() == TargetOpcode::STACKMAP \|\|
12833	MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12834	if (Subtarget.is64BitELFABI() &&
12835	MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12836	!Subtarget.isUsingPCRelativeCalls()) {
12837	// Call lowering should have added an r2 operand to indicate a dependence
12838	// on the TOC base pointer value. It can't however, because there is no
12839	// way to mark the dependence as implicit there, and so the stackmap code
12840	// will confuse it with a regular operand. Instead, add the dependence
12841	// here.
12842	MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12843	}
12844
12845	return emitPatchPoint(MI, MBB: BB);
12846	}
12847
12848	if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 \|\|
12849	MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12850	return emitEHSjLjSetJmp(MI, MBB: BB);
12851	} else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 \|\|
12852	MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12853	return emitEHSjLjLongJmp(MI, MBB: BB);
12854	}
12855
12856	const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12857
12858	// To "insert" these instructions we actually have to insert their
12859	// control-flow patterns.
12860	const BasicBlock *LLVM_BB = BB->getBasicBlock();
12861	MachineFunction::iterator It = ++BB->getIterator();
12862
12863	MachineFunction *F = BB->getParent();
12864	MachineRegisterInfo &MRI = F->getRegInfo();
12865
12866	if (Subtarget.hasISEL() &&
12867	(MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
12868	MI.getOpcode() == PPC::SELECT_CC_I8 \|\|
12869	MI.getOpcode() == PPC::SELECT_I4 \|\| MI.getOpcode() == PPC::SELECT_I8)) {
12870	SmallVector<MachineOperand, `2`> Cond;
12871	if (MI.getOpcode() == PPC::SELECT_CC_I4 \|\|
12872	MI.getOpcode() == PPC::SELECT_CC_I8)
12873	Cond.push_back(Elt: MI.getOperand(i: `4`));
12874	else
12875	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
12876	Cond.push_back(Elt: MI.getOperand(i: `1`));
12877
12878	DebugLoc dl = MI.getDebugLoc();
12879	TII->insertSelect(MBB&: *BB, I: MI, DL: dl, DstReg: MI.getOperand(i: `0`).getReg(), Cond,
12880	TrueReg: MI.getOperand(i: `2`).getReg(), FalseReg: MI.getOperand(i: `3`).getReg());
12881	} else if (IsSelectCC(MI) \|\| IsSelect(MI)) {
12882	// The incoming instruction knows the destination vreg to set, the
12883	// condition code register to branch on, the true/false values to
12884	// select between, and a branch opcode to use.
12885
12886	// thisMBB:
12887	// ...
12888	// TrueVal = ...
12889	// cmpTY ccX, r1, r2
12890	// bCC sinkMBB
12891	// fallthrough --> copy0MBB
12892	MachineBasicBlock *thisMBB = BB;
12893	MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12894	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12895	DebugLoc dl = MI.getDebugLoc();
12896	F->insert(MBBI: It, MBB: copy0MBB);
12897	F->insert(MBBI: It, MBB: sinkMBB);
12898
12899	// Set the call frame size on entry to the new basic blocks.
12900	// See https://reviews.llvm.org/D156113.
12901	unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12902	copy0MBB->setCallFrameSize(CallFrameSize);
12903	sinkMBB->setCallFrameSize(CallFrameSize);
12904
12905	// Transfer the remainder of BB and its successor edges to sinkMBB.
12906	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
12907	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
12908	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12909
12910	// Next, add the true and fallthrough blocks as its successors.
12911	BB->addSuccessor(Succ: copy0MBB);
12912	BB->addSuccessor(Succ: sinkMBB);
12913
12914	if (IsSelect(MI)) {
12915	BuildMI(BB, dl, TII->get(PPC::BC))
12916	.addReg(MI.getOperand(`1`).getReg())
12917	.addMBB(sinkMBB);
12918	} else {
12919	unsigned SelectPred = MI.getOperand(i: `4`).getImm();
12920	BuildMI(BB, dl, TII->get(PPC::BCC))
12921	.addImm(SelectPred)
12922	.addReg(MI.getOperand(`1`).getReg())
12923	.addMBB(sinkMBB);
12924	}
12925
12926	// copy0MBB:
12927	// %FalseValue = ...
12928	// # fallthrough to sinkMBB
12929	BB = copy0MBB;
12930
12931	// Update machine-CFG edges
12932	BB->addSuccessor(Succ: sinkMBB);
12933
12934	// sinkMBB:
12935	// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12936	// ...
12937	BB = sinkMBB;
12938	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(`0`).getReg())
12939	.addReg(MI.getOperand(`3`).getReg())
12940	.addMBB(copy0MBB)
12941	.addReg(MI.getOperand(`2`).getReg())
12942	.addMBB(thisMBB);
12943	} else if (MI.getOpcode() == PPC::ReadTB) {
12944	// To read the 64-bit time-base register on a 32-bit target, we read the
12945	// two halves. Should the counter have wrapped while it was being read, we
12946	// need to try again.
12947	// ...
12948	// readLoop:
12949	// mfspr Rx,TBU # load from TBU
12950	// mfspr Ry,TB # load from TB
12951	// mfspr Rz,TBU # load from TBU
12952	// cmpw crX,Rx,Rz # check if 'old'='new'
12953	// bne readLoop # branch if they're not equal
12954	// ...
12955
12956	MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12957	MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
12958	DebugLoc dl = MI.getDebugLoc();
12959	F->insert(MBBI: It, MBB: readMBB);
12960	F->insert(MBBI: It, MBB: sinkMBB);
12961
12962	// Transfer the remainder of BB and its successor edges to sinkMBB.
12963	sinkMBB->splice(Where: sinkMBB->begin(), Other: BB,
12964	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
12965	sinkMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
12966
12967	BB->addSuccessor(Succ: readMBB);
12968	BB = readMBB;
12969
12970	MachineRegisterInfo &RegInfo = F->getRegInfo();
12971	Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12972	Register LoReg = MI.getOperand(i: `0`).getReg();
12973	Register HiReg = MI.getOperand(i: `1`).getReg();
12974
12975	BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(`269`);
12976	BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(`268`);
12977	BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(`269`);
12978
12979	Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12980
12981	BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12982	.addReg(HiReg)
12983	.addReg(ReadAgainReg);
12984	BuildMI(BB, dl, TII->get(PPC::BCC))
12985	.addImm(PPC::PRED_NE)
12986	.addReg(CmpReg)
12987	.addMBB(readMBB);
12988
12989	BB->addSuccessor(Succ: readMBB);
12990	BB->addSuccessor(Succ: sinkMBB);
12991	} else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12992	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12993	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12994	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12995	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12996	BB = EmitAtomicBinary(MI, BB, `4`, PPC::ADD4);
12997	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12998	BB = EmitAtomicBinary(MI, BB, `8`, PPC::ADD8);
12999
13000	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13001	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13002	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13003	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13004	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13005	BB = EmitAtomicBinary(MI, BB, `4`, PPC::AND);
13006	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13007	BB = EmitAtomicBinary(MI, BB, `8`, PPC::AND8);
13008
13009	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13010	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13011	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13012	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13013	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13014	BB = EmitAtomicBinary(MI, BB, `4`, PPC::OR);
13015	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13016	BB = EmitAtomicBinary(MI, BB, `8`, PPC::OR8);
13017
13018	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13019	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13020	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13021	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13022	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13023	BB = EmitAtomicBinary(MI, BB, `4`, PPC::XOR);
13024	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13025	BB = EmitAtomicBinary(MI, BB, `8`, PPC::XOR8);
13026
13027	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13028	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13029	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13030	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13031	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13032	BB = EmitAtomicBinary(MI, BB, `4`, PPC::NAND);
13033	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13034	BB = EmitAtomicBinary(MI, BB, `8`, PPC::NAND8);
13035
13036	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13037	BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13038	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13039	BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13040	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13041	BB = EmitAtomicBinary(MI, BB, `4`, PPC::SUBF);
13042	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13043	BB = EmitAtomicBinary(MI, BB, `8`, PPC::SUBF8);
13044
13045	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13046	BB = EmitPartwordAtomicBinary(MI, BB, true, `0`, PPC::CMPW, PPC::PRED_LT);
13047	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13048	BB = EmitPartwordAtomicBinary(MI, BB, false, `0`, PPC::CMPW, PPC::PRED_LT);
13049	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13050	BB = EmitAtomicBinary(MI, BB, `4`, `0`, PPC::CMPW, PPC::PRED_LT);
13051	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13052	BB = EmitAtomicBinary(MI, BB, `8`, `0`, PPC::CMPD, PPC::PRED_LT);
13053
13054	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13055	BB = EmitPartwordAtomicBinary(MI, BB, true, `0`, PPC::CMPW, PPC::PRED_GT);
13056	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13057	BB = EmitPartwordAtomicBinary(MI, BB, false, `0`, PPC::CMPW, PPC::PRED_GT);
13058	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13059	BB = EmitAtomicBinary(MI, BB, `4`, `0`, PPC::CMPW, PPC::PRED_GT);
13060	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13061	BB = EmitAtomicBinary(MI, BB, `8`, `0`, PPC::CMPD, PPC::PRED_GT);
13062
13063	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13064	BB = EmitPartwordAtomicBinary(MI, BB, true, `0`, PPC::CMPLW, PPC::PRED_LT);
13065	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13066	BB = EmitPartwordAtomicBinary(MI, BB, false, `0`, PPC::CMPLW, PPC::PRED_LT);
13067	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13068	BB = EmitAtomicBinary(MI, BB, `4`, `0`, PPC::CMPLW, PPC::PRED_LT);
13069	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13070	BB = EmitAtomicBinary(MI, BB, `8`, `0`, PPC::CMPLD, PPC::PRED_LT);
13071
13072	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13073	BB = EmitPartwordAtomicBinary(MI, BB, true, `0`, PPC::CMPLW, PPC::PRED_GT);
13074	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13075	BB = EmitPartwordAtomicBinary(MI, BB, false, `0`, PPC::CMPLW, PPC::PRED_GT);
13076	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13077	BB = EmitAtomicBinary(MI, BB, `4`, `0`, PPC::CMPLW, PPC::PRED_GT);
13078	else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13079	BB = EmitAtomicBinary(MI, BB, `8`, `0`, PPC::CMPLD, PPC::PRED_GT);
13080
13081	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13082	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: true, BinOpcode: `0`);
13083	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13084	BB = EmitPartwordAtomicBinary(MI, BB, is8bit: false, BinOpcode: `0`);
13085	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13086	BB = EmitAtomicBinary(MI, BB, AtomicSize: `4`, BinOpcode: `0`);
13087	else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13088	BB = EmitAtomicBinary(MI, BB, AtomicSize: `8`, BinOpcode: `0`);
13089	else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 \|\|
13090	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 \|\|
13091	(Subtarget.hasPartwordAtomics() &&
13092	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) \|\|
13093	(Subtarget.hasPartwordAtomics() &&
13094	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13095	bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13096
13097	auto LoadMnemonic = PPC::LDARX;
13098	auto StoreMnemonic = PPC::STDCX;
13099	switch (MI.getOpcode()) {
13100	default:
13101	llvm_unreachable("Compare and swap of unknown size");
13102	case PPC::ATOMIC_CMP_SWAP_I8:
13103	LoadMnemonic = PPC::LBARX;
13104	StoreMnemonic = PPC::STBCX;
13105	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13106	break;
13107	case PPC::ATOMIC_CMP_SWAP_I16:
13108	LoadMnemonic = PPC::LHARX;
13109	StoreMnemonic = PPC::STHCX;
13110	assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13111	break;
13112	case PPC::ATOMIC_CMP_SWAP_I32:
13113	LoadMnemonic = PPC::LWARX;
13114	StoreMnemonic = PPC::STWCX;
13115	break;
13116	case PPC::ATOMIC_CMP_SWAP_I64:
13117	LoadMnemonic = PPC::LDARX;
13118	StoreMnemonic = PPC::STDCX;
13119	break;
13120	}
13121	MachineRegisterInfo &RegInfo = F->getRegInfo();
13122	Register dest = MI.getOperand(i: `0`).getReg();
13123	Register ptrA = MI.getOperand(i: `1`).getReg();
13124	Register ptrB = MI.getOperand(i: `2`).getReg();
13125	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13126	Register oldval = MI.getOperand(i: `3`).getReg();
13127	Register newval = MI.getOperand(i: `4`).getReg();
13128	DebugLoc dl = MI.getDebugLoc();
13129
13130	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13131	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13132	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13133	F->insert(MBBI: It, MBB: loop1MBB);
13134	F->insert(MBBI: It, MBB: loop2MBB);
13135	F->insert(MBBI: It, MBB: exitMBB);
13136	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13137	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13138	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13139
13140	// thisMBB:
13141	// ...
13142	// fallthrough --> loopMBB
13143	BB->addSuccessor(Succ: loop1MBB);
13144
13145	// loop1MBB:
13146	// l[bhwd]arx dest, ptr
13147	// cmp[wd] dest, oldval
13148	// bne- exitBB
13149	// loop2MBB:
13150	// st[bhwd]cx. newval, ptr
13151	// bne- loopMBB
13152	// b exitBB
13153	// exitBB:
13154	BB = loop1MBB;
13155	BuildMI(BB, dl, TII->get(Opcode: LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13156	BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13157	.addReg(dest)
13158	.addReg(oldval);
13159	BuildMI(BB, dl, TII->get(PPC::BCC))
13160	.addImm(PPC::PRED_NE)
13161	.addReg(CrReg)
13162	.addMBB(exitMBB);
13163	BB->addSuccessor(Succ: loop2MBB);
13164	BB->addSuccessor(Succ: exitMBB);
13165
13166	BB = loop2MBB;
13167	BuildMI(BB, dl, TII->get(Opcode: StoreMnemonic))
13168	.addReg(newval)
13169	.addReg(ptrA)
13170	.addReg(ptrB);
13171	BuildMI(BB, dl, TII->get(PPC::BCC))
13172	.addImm(PPC::PRED_NE)
13173	.addReg(PPC::CR0)
13174	.addMBB(loop1MBB);
13175	BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13176	BB->addSuccessor(Succ: loop1MBB);
13177	BB->addSuccessor(Succ: exitMBB);
13178
13179	// exitMBB:
13180	// ...
13181	BB = exitMBB;
13182	} else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 \|\|
13183	MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13184	// We must use 64-bit registers for addresses when targeting 64-bit,
13185	// since we're actually doing arithmetic on them. Other registers
13186	// can be 32-bit.
13187	bool is64bit = Subtarget.isPPC64();
13188	bool isLittleEndian = Subtarget.isLittleEndian();
13189	bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13190
13191	Register dest = MI.getOperand(i: `0`).getReg();
13192	Register ptrA = MI.getOperand(i: `1`).getReg();
13193	Register ptrB = MI.getOperand(i: `2`).getReg();
13194	Register oldval = MI.getOperand(i: `3`).getReg();
13195	Register newval = MI.getOperand(i: `4`).getReg();
13196	DebugLoc dl = MI.getDebugLoc();
13197
13198	MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13199	MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13200	MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
13201	F->insert(MBBI: It, MBB: loop1MBB);
13202	F->insert(MBBI: It, MBB: loop2MBB);
13203	F->insert(MBBI: It, MBB: exitMBB);
13204	exitMBB->splice(Where: exitMBB->begin(), Other: BB,
13205	From: std::next(x: MachineBasicBlock::iterator (MI)), To: BB->end());
13206	exitMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
13207
13208	MachineRegisterInfo &RegInfo = F->getRegInfo();
13209	const TargetRegisterClass *RC =
13210	is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13211	const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13212
13213	Register PtrReg = RegInfo.createVirtualRegister(RegClass: RC);
13214	Register Shift1Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13215	Register ShiftReg =
13216	isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RegClass: GPRC);
13217	Register NewVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13218	Register NewVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13219	Register OldVal2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13220	Register OldVal3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13221	Register MaskReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13222	Register Mask2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13223	Register Mask3Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13224	Register Tmp2Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13225	Register Tmp4Reg = RegInfo.createVirtualRegister(RegClass: GPRC);
13226	Register TmpDestReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13227	Register Ptr1Reg;
13228	Register TmpReg = RegInfo.createVirtualRegister(RegClass: GPRC);
13229	Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13230	Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13231	// thisMBB:
13232	// ...
13233	// fallthrough --> loopMBB
13234	BB->addSuccessor(Succ: loop1MBB);
13235
13236	// The 4-byte load must be aligned, while a char or short may be
13237	// anywhere in the word. Hence all this nasty bookkeeping code.
13238	// add ptr1, ptrA, ptrB [copy if ptrA==0]
13239	// rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13240	// xori shift, shift1, 24 [16]
13241	// rlwinm ptr, ptr1, 0, 0, 29
13242	// slw newval2, newval, shift
13243	// slw oldval2, oldval,shift
13244	// li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13245	// slw mask, mask2, shift
13246	// and newval3, newval2, mask
13247	// and oldval3, oldval2, mask
13248	// loop1MBB:
13249	// lwarx tmpDest, ptr
13250	// and tmp, tmpDest, mask
13251	// cmpw tmp, oldval3
13252	// bne- exitBB
13253	// loop2MBB:
13254	// andc tmp2, tmpDest, mask
13255	// or tmp4, tmp2, newval3
13256	// stwcx. tmp4, ptr
13257	// bne- loop1MBB
13258	// b exitBB
13259	// exitBB:
13260	// srw dest, tmpDest, shift
13261	if (ptrA != ZeroReg) {
13262	Ptr1Reg = RegInfo.createVirtualRegister(RegClass: RC);
13263	BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13264	.addReg(ptrA)
13265	.addReg(ptrB);
13266	} else {
13267	Ptr1Reg = ptrB;
13268	}
13269
13270	// We need use 32-bit subregister to avoid mismatch register class in 64-bit
13271	// mode.
13272	BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13273	.addReg(Ptr1Reg, `0`, is64bit ? PPC::sub_32 : `0`)
13274	.addImm(`3`)
13275	.addImm(`27`)
13276	.addImm(is8bit ? `28` : `27`);
13277	if (!isLittleEndian)
13278	BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13279	.addReg(Shift1Reg)
13280	.addImm(is8bit ? `24` : `16`);
13281	if (is64bit)
13282	BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13283	.addReg(Ptr1Reg)
13284	.addImm(`0`)
13285	.addImm(`61`);
13286	else
13287	BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13288	.addReg(Ptr1Reg)
13289	.addImm(`0`)
13290	.addImm(`0`)
13291	.addImm(`29`);
13292	BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13293	.addReg(newval)
13294	.addReg(ShiftReg);
13295	BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13296	.addReg(oldval)
13297	.addReg(ShiftReg);
13298	if (is8bit)
13299	BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(`255`);
13300	else {
13301	BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(`0`);
13302	BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13303	.addReg(Mask3Reg)
13304	.addImm(`65535`);
13305	}
13306	BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13307	.addReg(Mask2Reg)
13308	.addReg(ShiftReg);
13309	BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13310	.addReg(NewVal2Reg)
13311	.addReg(MaskReg);
13312	BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13313	.addReg(OldVal2Reg)
13314	.addReg(MaskReg);
13315
13316	BB = loop1MBB;
13317	BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13318	.addReg(ZeroReg)
13319	.addReg(PtrReg);
13320	BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13321	.addReg(TmpDestReg)
13322	.addReg(MaskReg);
13323	BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13324	.addReg(TmpReg)
13325	.addReg(OldVal3Reg);
13326	BuildMI(BB, dl, TII->get(PPC::BCC))
13327	.addImm(PPC::PRED_NE)
13328	.addReg(CrReg)
13329	.addMBB(exitMBB);
13330	BB->addSuccessor(Succ: loop2MBB);
13331	BB->addSuccessor(Succ: exitMBB);
13332
13333	BB = loop2MBB;
13334	BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13335	.addReg(TmpDestReg)
13336	.addReg(MaskReg);
13337	BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13338	.addReg(Tmp2Reg)
13339	.addReg(NewVal3Reg);
13340	BuildMI(BB, dl, TII->get(PPC::STWCX))
13341	.addReg(Tmp4Reg)
13342	.addReg(ZeroReg)
13343	.addReg(PtrReg);
13344	BuildMI(BB, dl, TII->get(PPC::BCC))
13345	.addImm(PPC::PRED_NE)
13346	.addReg(PPC::CR0)
13347	.addMBB(loop1MBB);
13348	BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13349	BB->addSuccessor(Succ: loop1MBB);
13350	BB->addSuccessor(Succ: exitMBB);
13351
13352	// exitMBB:
13353	// ...
13354	BB = exitMBB;
13355	BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13356	.addReg(TmpReg)
13357	.addReg(ShiftReg);
13358	} else if (MI.getOpcode() == PPC::FADDrtz) {
13359	// This pseudo performs an FADD with rounding mode temporarily forced
13360	// to round-to-zero. We emit this via custom inserter since the FPSCR
13361	// is not modeled at the SelectionDAG level.
13362	Register Dest = MI.getOperand(i: `0`).getReg();
13363	Register Src1 = MI.getOperand(i: `1`).getReg();
13364	Register Src2 = MI.getOperand(i: `2`).getReg();
13365	DebugLoc dl = MI.getDebugLoc();
13366
13367	MachineRegisterInfo &RegInfo = F->getRegInfo();
13368	Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13369
13370	// Save FPSCR value.
13371	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13372
13373	// Set rounding mode to round-to-zero.
13374	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13375	.addImm(`31`)
13376	.addReg(PPC::RM, RegState::ImplicitDefine);
13377
13378	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13379	.addImm(`30`)
13380	.addReg(PPC::RM, RegState::ImplicitDefine);
13381
13382	// Perform addition.
13383	auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13384	.addReg(Src1)
13385	.addReg(Src2);
13386	if (MI.getFlag(Flag: MachineInstr::NoFPExcept))
13387	MIB.setMIFlag(MachineInstr::NoFPExcept);
13388
13389	// Restore FPSCR value.
13390	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(`1`).addReg(MFFSReg);
13391	} else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
13392	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT \|\|
13393	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
13394	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13395	unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 \|\|
13396	MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13397	? PPC::ANDI8_rec
13398	: PPC::ANDI_rec;
13399	bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT \|\|
13400	MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13401
13402	MachineRegisterInfo &RegInfo = F->getRegInfo();
13403	Register Dest = RegInfo.createVirtualRegister(
13404	Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13405
13406	DebugLoc Dl = MI.getDebugLoc();
13407	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode), DestReg: Dest)
13408	.addReg(RegNo: MI.getOperand(i: `1`).getReg())
13409	.addImm(Val: `1`);
13410	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13411	MI.getOperand(`0`).getReg())
13412	.addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13413	} else if (MI.getOpcode() == PPC::TCHECK_RET) {
13414	DebugLoc Dl = MI.getDebugLoc();
13415	MachineRegisterInfo &RegInfo = F->getRegInfo();
13416	Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13417	BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13418	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::COPY),
13419	DestReg: MI.getOperand(i: `0`).getReg())
13420	.addReg(RegNo: CRReg);
13421	} else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13422	DebugLoc Dl = MI.getDebugLoc();
13423	unsigned Imm = MI.getOperand(i: `1`).getImm();
13424	BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13425	BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13426	MI.getOperand(`0`).getReg())
13427	.addReg(PPC::CR0EQ);
13428	} else if (MI.getOpcode() == PPC::SETRNDi) {
13429	DebugLoc dl = MI.getDebugLoc();
13430	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
13431
13432	// Save FPSCR value.
13433	if (MRI.use_empty(RegNo: OldFPSCRReg))
13434	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
13435	else
13436	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13437
13438	// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13439	// the following settings:
13440	// 00 Round to nearest
13441	// 01 Round to 0
13442	// 10 Round to +inf
13443	// 11 Round to -inf
13444
13445	// When the operand is immediate, using the two least significant bits of
13446	// the immediate to set the bits 62:63 of FPSCR.
13447	unsigned Mode = MI.getOperand(i: `1`).getImm();
13448	BuildMI(*BB, MI, dl, TII->get((Mode & `1`) ? PPC::MTFSB1 : PPC::MTFSB0))
13449	.addImm(`31`)
13450	.addReg(PPC::RM, RegState::ImplicitDefine);
13451
13452	BuildMI(*BB, MI, dl, TII->get((Mode & `2`) ? PPC::MTFSB1 : PPC::MTFSB0))
13453	.addImm(`30`)
13454	.addReg(PPC::RM, RegState::ImplicitDefine);
13455	} else if (MI.getOpcode() == PPC::SETRND) {
13456	DebugLoc dl = MI.getDebugLoc();
13457
13458	// Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13459	// or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13460	// If the target doesn't have DirectMove, we should use stack to do the
13461	// conversion, because the target doesn't have the instructions like mtvsrd
13462	// or mfvsrd to do this conversion directly.
13463	auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13464	if (Subtarget.hasDirectMove()) {
13465	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg)
13466	.addReg(RegNo: SrcReg);
13467	} else {
13468	// Use stack to do the register copy.
13469	unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13470	MachineRegisterInfo &RegInfo = F->getRegInfo();
13471	const TargetRegisterClass *RC = RegInfo.getRegClass(Reg: SrcReg);
13472	if (RC == &PPC::F8RCRegClass) {
13473	// Copy register from F8RCRegClass to G8RCRegclass.
13474	assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13475	"Unsupported RegClass.");
13476
13477	StoreOp = PPC::STFD;
13478	LoadOp = PPC::LD;
13479	} else {
13480	// Copy register from G8RCRegClass to F8RCRegclass.
13481	assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13482	(RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13483	"Unsupported RegClass.");
13484	}
13485
13486	MachineFrameInfo &MFI = F->getFrameInfo();
13487	int FrameIdx = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
13488
13489	MachineMemOperand *MMOStore = F->getMachineMemOperand(
13490	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
13491	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
13492	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
13493
13494	// Store the SrcReg into the stack.
13495	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: StoreOp))
13496	.addReg(RegNo: SrcReg)
13497	.addImm(Val: `0`)
13498	.addFrameIndex(Idx: FrameIdx)
13499	.addMemOperand(MMO: MMOStore);
13500
13501	MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13502	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *F, FI: FrameIdx, Offset: `0`),
13503	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
13504	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
13505
13506	// Load from the stack where SrcReg is stored, and save to DestReg,
13507	// so we have done the RegClass conversion from RegClass::SrcReg to
13508	// RegClass::DestReg.
13509	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: LoadOp), DestReg)
13510	.addImm(Val: `0`)
13511	.addFrameIndex(Idx: FrameIdx)
13512	.addMemOperand(MMO: MMOLoad);
13513	}
13514	};
13515
13516	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
13517
13518	// Save FPSCR value.
13519	BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13520
13521	// When the operand is gprc register, use two least significant bits of the
13522	// register and mtfsf instruction to set the bits 62:63 of FPSCR.
13523	//
13524	// copy OldFPSCRTmpReg, OldFPSCRReg
13525	// (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13526	// rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13527	// copy NewFPSCRReg, NewFPSCRTmpReg
13528	// mtfsf 255, NewFPSCRReg
13529	MachineOperand SrcOp = MI.getOperand(i: `1`);
13530	MachineRegisterInfo &RegInfo = F->getRegInfo();
13531	Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13532
13533	copyRegFromG8RCOrF8RC (OldFPSCRTmpReg, OldFPSCRReg);
13534
13535	Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13536	Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13537
13538	// The first operand of INSERT_SUBREG should be a register which has
13539	// subregisters, we only care about its RegClass, so we should use an
13540	// IMPLICIT_DEF register.
13541	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: ImDefReg);
13542	BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13543	.addReg(ImDefReg)
13544	.add(SrcOp)
13545	.addImm(`1`);
13546
13547	Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13548	BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13549	.addReg(OldFPSCRTmpReg)
13550	.addReg(ExtSrcReg)
13551	.addImm(`0`)
13552	.addImm(`62`);
13553
13554	Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13555	copyRegFromG8RCOrF8RC (NewFPSCRReg, NewFPSCRTmpReg);
13556
13557	// The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13558	// bits of FPSCR.
13559	BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13560	.addImm(`255`)
13561	.addReg(NewFPSCRReg)
13562	.addImm(`0`)
13563	.addImm(`0`);
13564	} else if (MI.getOpcode() == PPC::SETFLM) {
13565	DebugLoc Dl = MI.getDebugLoc();
13566
13567	// Result of setflm is previous FPSCR content, so we need to save it first.
13568	Register OldFPSCRReg = MI.getOperand(i: `0`).getReg();
13569	if (MRI.use_empty(RegNo: OldFPSCRReg))
13570	BuildMI(BB&: *BB, I&: MI, MIMD: Dl, MCID: TII->get(Opcode: TargetOpcode::IMPLICIT_DEF), DestReg: OldFPSCRReg);
13571	else
13572	BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13573
13574	// Put bits in 32:63 to FPSCR.
13575	Register NewFPSCRReg = MI.getOperand(i: `1`).getReg();
13576	BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13577	.addImm(`255`)
13578	.addReg(NewFPSCRReg)
13579	.addImm(`0`)
13580	.addImm(`0`);
13581	} else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 \|\|
13582	MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13583	return emitProbedAlloca(MI, MBB: BB);
13584	} else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13585	DebugLoc DL = MI.getDebugLoc();
13586	Register Src = MI.getOperand(i: `2`).getReg();
13587	Register Lo = MI.getOperand(i: `0`).getReg();
13588	Register Hi = MI.getOperand(i: `1`).getReg();
13589	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13590	.addDef(Lo)
13591	.addUse(Src, `0`, PPC::sub_gp8_x1);
13592	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13593	.addDef(Hi)
13594	.addUse(Src, `0`, PPC::sub_gp8_x0);
13595	} else if (MI.getOpcode() == PPC::LQX_PSEUDO \|\|
13596	MI.getOpcode() == PPC::STQX_PSEUDO) {
13597	DebugLoc DL = MI.getDebugLoc();
13598	// Ptr is used as the ptr_rc_no_r0 part
13599	// of LQ/STQ's memory operand and adding result of RA and RB,
13600	// so it has to be g8rc_and_g8rc_nox0.
13601	Register Ptr =
13602	F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13603	Register Val = MI.getOperand(i: `0`).getReg();
13604	Register RA = MI.getOperand(i: `1`).getReg();
13605	Register RB = MI.getOperand(i: `2`).getReg();
13606	BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13607	BuildMI(*BB, MI, DL,
13608	MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13609	: TII->get(PPC::STQ))
13610	.addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : `0`)
13611	.addImm(`0`)
13612	.addReg(Ptr);
13613	} else {
13614	llvm_unreachable("Unexpected instr type to insert");
13615	}
13616
13617	MI.eraseFromParent(); // The pseudo instruction is gone now.
13618	return BB;
13619	}
13620
13621	//===----------------------------------------------------------------------===//
13622	// Target Optimization Hooks
13623	//===----------------------------------------------------------------------===//
13624
13625	static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13626	// For the estimates, convergence is quadratic, so we essentially double the
13627	// number of digits correct after every iteration. For both FRE and FRSQRTE,
13628	// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13629	// this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13630	int RefinementSteps = Subtarget.hasRecipPrec() ? `1` : `3`;
13631	if (VT.getScalarType() == MVT::f64)
13632	RefinementSteps++;
13633	return RefinementSteps;
13634	}
13635
13636	SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13637	const DenormalMode &Mode) const {
13638	// We only have VSX Vector Test for software Square Root.
13639	EVT VT = Op.getValueType();
13640	if (!isTypeLegal(MVT::i1) \|\|
13641	(VT != MVT::f64 &&
13642	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX())))
13643	return TargetLowering::getSqrtInputTest(Operand: Op, DAG, Mode);
13644
13645	SDLoc DL(Op);
13646	// The output register of FTSQRT is CR field.
13647	SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13648	// ftsqrt BF,FRB
13649	// Let e_b be the unbiased exponent of the double-precision
13650	// floating-point operand in register FRB.
13651	// fe_flag is set to 1 if either of the following conditions occurs.
13652	// - The double-precision floating-point operand in register FRB is a zero,
13653	// a NaN, or an infinity, or a negative value.
13654	// - e_b is less than or equal to -970.
13655	// Otherwise fe_flag is set to 0.
13656	// Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13657	// not eligible for iteration. (zero/negative/infinity/nan or unbiased
13658	// exponent is less than -970)
13659	SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13660	return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13661	FTSQRT, SRIdxVal),
13662	`0`);
13663	}
13664
13665	SDValue
13666	PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13667	SelectionDAG &DAG) const {
13668	// We only have VSX Vector Square Root.
13669	EVT VT = Op.getValueType();
13670	if (VT != MVT::f64 &&
13671	((VT != MVT::v2f64 && VT != MVT::v4f32) \|\| !Subtarget.hasVSX()))
13672	return TargetLowering::getSqrtResultForDenormInput(Operand: Op, DAG);
13673
13674	return DAG.getNode(Opcode: PPCISD::FSQRT, DL: SDLoc (Op), VT, Operand: Op);
13675	}
13676
13677	SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13678	int Enabled, int &RefinementSteps,
13679	bool &UseOneConstNR,
13680	bool Reciprocal) const {
13681	EVT VT = Operand.getValueType();
13682	if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) \|\|
13683	(VT == MVT::f64 && Subtarget.hasFRSQRTE()) \|\|
13684	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
13685	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
13686	if (RefinementSteps == ReciprocalEstimate::Unspecified)
13687	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13688
13689	// The Newton-Raphson computation with a single constant does not provide
13690	// enough accuracy on some CPUs.
13691	UseOneConstNR = !Subtarget.needsTwoConstNR();
13692	return DAG.getNode(Opcode: PPCISD::FRSQRTE, DL: SDLoc (Operand), VT, Operand);
13693	}
13694	return SDValue ();
13695	}
13696
13697	SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13698	int Enabled,
13699	int &RefinementSteps) const {
13700	EVT VT = Operand.getValueType();
13701	if ((VT == MVT::f32 && Subtarget.hasFRES()) \|\|
13702	(VT == MVT::f64 && Subtarget.hasFRE()) \|\|
13703	(VT == MVT::v4f32 && Subtarget.hasAltivec()) \|\|
13704	(VT == MVT::v2f64 && Subtarget.hasVSX())) {
13705	if (RefinementSteps == ReciprocalEstimate::Unspecified)
13706	RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13707	return DAG.getNode(Opcode: PPCISD::FRE, DL: SDLoc (Operand), VT, Operand);
13708	}
13709	return SDValue ();
13710	}
13711
13712	unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13713	// Note: This functionality is used only when unsafe-fp-math is enabled, and
13714	// on cores with reciprocal estimates (which are used when unsafe-fp-math is
13715	// enabled for division), this functionality is redundant with the default
13716	// combiner logic (once the division -> reciprocal/multiply transformation
13717	// has taken place). As a result, this matters more for older cores than for
13718	// newer ones.
13719
13720	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
13721	// reciprocal if there are two or more FDIVs (for embedded cores with only
13722	// one FP pipeline) for three or more FDIVs (for generic OOO cores).
13723	switch (Subtarget.getCPUDirective()) {
13724	default:
13725	return `3`;
13726	case PPC::DIR_440:
13727	case PPC::DIR_A2:
13728	case PPC::DIR_E500:
13729	case PPC::DIR_E500mc:
13730	case PPC::DIR_E5500:
13731	return `2`;
13732	}
13733	}
13734
13735	// isConsecutiveLSLoc needs to work even if all adds have not yet been
13736	// collapsed, and so we need to look through chains of them.
13737	static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
13738	int64_t& Offset, SelectionDAG &DAG) {
13739	if (DAG.isBaseWithConstantOffset(Op: Loc)) {
13740	Base = Loc.getOperand(i: `0`);
13741	Offset += cast<ConstantSDNode>(Val: Loc.getOperand(i: `1`))->getSExtValue();
13742
13743	// The base might itself be a base plus an offset, and if so, accumulate
13744	// that as well.
13745	getBaseWithConstantOffset(Loc: Loc.getOperand(i: `0`), Base, Offset, DAG);
13746	}
13747	}
13748
13749	static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13750	unsigned Bytes, int Dist,
13751	SelectionDAG &DAG) {
13752	if (VT.getSizeInBits() / `8` != Bytes)
13753	return false;
13754
13755	SDValue BaseLoc = Base->getBasePtr();
13756	if (Loc.getOpcode() == ISD::FrameIndex) {
13757	if (BaseLoc.getOpcode() != ISD::FrameIndex)
13758	return false;
13759	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13760	int FI = cast<FrameIndexSDNode>(Val&: Loc)->getIndex();
13761	int BFI = cast<FrameIndexSDNode>(Val&: BaseLoc)->getIndex();
13762	int FS = MFI.getObjectSize(ObjectIdx: FI);
13763	int BFS = MFI.getObjectSize(ObjectIdx: BFI);
13764	if (FS != BFS \|\| FS != (int)Bytes) return false;
13765	return MFI.getObjectOffset(ObjectIdx: FI) == (MFI.getObjectOffset(ObjectIdx: BFI) + Dist*Bytes);
13766	}
13767
13768	SDValue Base1 = Loc, Base2 = BaseLoc;
13769	int64_t Offset1 = `0`, Offset2 = `0`;
13770	getBaseWithConstantOffset(Loc, Base&: Base1, Offset&: Offset1, DAG);
13771	getBaseWithConstantOffset(Loc: BaseLoc, Base&: Base2, Offset&: Offset2, DAG);
13772	if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13773	return true;
13774
13775	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13776	const GlobalValue GV1 = nullptr*;
13777	const GlobalValue GV2 = nullptr*;
13778	Offset1 = `0`;
13779	Offset2 = `0`;
13780	bool isGA1 = TLI.isGAPlusOffset(N: Loc.getNode(), GA&: GV1, Offset&: Offset1);
13781	bool isGA2 = TLI.isGAPlusOffset(N: BaseLoc.getNode(), GA&: GV2, Offset&: Offset2);
13782	if (isGA1 && isGA2 && GV1 == GV2)
13783	return Offset1 == (Offset2 + Dist*Bytes);
13784	return false;
13785	}
13786
13787	// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13788	// not enforce equality of the chain operands.
13789	static bool isConsecutiveLS(SDNode N, LSBaseSDNode Base,
13790	unsigned Bytes, int Dist,
13791	SelectionDAG &DAG) {
13792	if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Val: N)) {
13793	EVT VT = LS->getMemoryVT();
13794	SDValue Loc = LS->getBasePtr();
13795	return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13796	}
13797
13798	if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13799	EVT VT;
13800	switch (N->getConstantOperandVal(Num: `1`)) {
13801	default: return false;
13802	case Intrinsic::ppc_altivec_lvx:
13803	case Intrinsic::ppc_altivec_lvxl:
13804	case Intrinsic::ppc_vsx_lxvw4x:
13805	case Intrinsic::ppc_vsx_lxvw4x_be:
13806	VT = MVT::v4i32;
13807	break;
13808	case Intrinsic::ppc_vsx_lxvd2x:
13809	case Intrinsic::ppc_vsx_lxvd2x_be:
13810	VT = MVT::v2f64;
13811	break;
13812	case Intrinsic::ppc_altivec_lvebx:
13813	VT = MVT::i8;
13814	break;
13815	case Intrinsic::ppc_altivec_lvehx:
13816	VT = MVT::i16;
13817	break;
13818	case Intrinsic::ppc_altivec_lvewx:
13819	VT = MVT::i32;
13820	break;
13821	}
13822
13823	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `2`), VT, Base, Bytes, Dist, DAG);
13824	}
13825
13826	if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13827	EVT VT;
13828	switch (N->getConstantOperandVal(Num: `1`)) {
13829	default: return false;
13830	case Intrinsic::ppc_altivec_stvx:
13831	case Intrinsic::ppc_altivec_stvxl:
13832	case Intrinsic::ppc_vsx_stxvw4x:
13833	VT = MVT::v4i32;
13834	break;
13835	case Intrinsic::ppc_vsx_stxvd2x:
13836	VT = MVT::v2f64;
13837	break;
13838	case Intrinsic::ppc_vsx_stxvw4x_be:
13839	VT = MVT::v4i32;
13840	break;
13841	case Intrinsic::ppc_vsx_stxvd2x_be:
13842	VT = MVT::v2f64;
13843	break;
13844	case Intrinsic::ppc_altivec_stvebx:
13845	VT = MVT::i8;
13846	break;
13847	case Intrinsic::ppc_altivec_stvehx:
13848	VT = MVT::i16;
13849	break;
13850	case Intrinsic::ppc_altivec_stvewx:
13851	VT = MVT::i32;
13852	break;
13853	}
13854
13855	return isConsecutiveLSLoc(Loc: N->getOperand(Num: `3`), VT, Base, Bytes, Dist, DAG);
13856	}
13857
13858	return false;
13859	}
13860
13861	// Return true is there is a nearyby consecutive load to the one provided
13862	// (regardless of alignment). We search up and down the chain, looking though
13863	// token factors and other loads (but nothing else). As a result, a true result
13864	// indicates that it is safe to create a new consecutive load adjacent to the
13865	// load provided.
13866	static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
13867	SDValue Chain = LD->getChain();
13868	EVT VT = LD->getMemoryVT();
13869
13870	SmallSet<SDNode *, `16`> LoadRoots;
13871	SmallVector<SDNode *, `8`> Queue(`1`, Chain.getNode());
13872	SmallSet<SDNode *, `16`> Visited;
13873
13874	// First, search up the chain, branching to follow all token-factor operands.
13875	// If we find a consecutive load, then we're done, otherwise, record all
13876	// nodes just above the top-level loads and token factors.
13877	while (!Queue.empty()) {
13878	SDNode *ChainNext = Queue.pop_back_val();
13879	if (!Visited.insert(Ptr: ChainNext).second)
13880	continue;
13881
13882	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: ChainNext)) {
13883	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
13884	return true;
13885
13886	if (!Visited.count(Ptr: ChainLD->getChain().getNode()))
13887	Queue.push_back(Elt: ChainLD->getChain().getNode());
13888	} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13889	for (const SDUse &O : ChainNext->ops())
13890	if (!Visited.count(Ptr: O.getNode()))
13891	Queue.push_back(Elt: O.getNode());
13892	} else
13893	LoadRoots.insert(Ptr: ChainNext);
13894	}
13895
13896	// Second, search down the chain, starting from the top-level nodes recorded
13897	// in the first phase. These top-level nodes are the nodes just above all
13898	// loads and token factors. Starting with their uses, recursively look though
13899	// all loads (just the chain uses) and token factors to find a consecutive
13900	// load.
13901	Visited.clear();
13902	Queue.clear();
13903
13904	for (SDNode *I : LoadRoots) {
13905	Queue.push_back(Elt: I);
13906
13907	while (!Queue.empty()) {
13908	SDNode *LoadRoot = Queue.pop_back_val();
13909	if (!Visited.insert(Ptr: LoadRoot).second)
13910	continue;
13911
13912	if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(Val: LoadRoot))
13913	if (isConsecutiveLS(N: ChainLD, Base: LD, Bytes: VT.getStoreSize(), Dist: `1`, DAG))
13914	return true;
13915
13916	for (SDNode *U : LoadRoot->uses())
13917	if (((isa<MemSDNode>(Val: U) &&
13918	cast<MemSDNode>(Val: U)->getChain().getNode() == LoadRoot) \|\|
13919	U->getOpcode() == ISD::TokenFactor) &&
13920	!Visited.count(Ptr: U))
13921	Queue.push_back(Elt: U);
13922	}
13923	}
13924
13925	return false;
13926	}
13927
13928	/// This function is called when we have proved that a SETCC node can be replaced
13929	/// by subtraction (and other supporting instructions) so that the result of
13930	/// comparison is kept in a GPR instead of CR. This function is purely for
13931	/// codegen purposes and has some flags to guide the codegen process.
13932	static SDValue generateEquivalentSub(SDNode N, int* Size, bool Complement,
13933	bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13934	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13935
13936	// Zero extend the operands to the largest legal integer. Originally, they
13937	// must be of a strictly smaller size.
13938	auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(`0`),
13939	DAG.getConstant(Size, DL, MVT::i32));
13940	auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(`1`),
13941	DAG.getConstant(Size, DL, MVT::i32));
13942
13943	// Swap if needed. Depends on the condition code.
13944	if (Swap)
13945	std::swap(Op0, Op1);
13946
13947	// Subtract extended integers.
13948	auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13949
13950	// Move the sign bit to the least significant position and zero out the rest.
13951	// Now the least significant bit carries the result of original comparison.
13952	auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13953	DAG.getConstant(Size - `1`, DL, MVT::i32));
13954	auto Final = Shifted;
13955
13956	// Complement the result if needed. Based on the condition code.
13957	if (Complement)
13958	Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13959	DAG.getConstant(`1`, DL, MVT::i64));
13960
13961	return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13962	}
13963
13964	SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13965	DAGCombinerInfo &DCI) const {
13966	assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13967
13968	SelectionDAG &DAG = DCI.DAG;
13969	SDLoc DL(N);
13970
13971	// Size of integers being compared has a critical role in the following
13972	// analysis, so we prefer to do this when all types are legal.
13973	if (!DCI.isAfterLegalizeDAG())
13974	return SDValue ();
13975
13976	// If all users of SETCC extend its value to a legal integer type
13977	// then we replace SETCC with a subtraction
13978	for (const SDNode *U : N->uses())
13979	if (U->getOpcode() != ISD::ZERO_EXTEND)
13980	return SDValue ();
13981
13982	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
13983	auto OpSize = N->getOperand(Num: `0`).getValueSizeInBits();
13984
13985	unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13986
13987	if (OpSize < Size) {
13988	switch (CC) {
13989	default: break;
13990	case ISD::SETULT:
13991	return generateEquivalentSub(N, Size, Complement: false, Swap: false, DL, DAG);
13992	case ISD::SETULE:
13993	return generateEquivalentSub(N, Size, Complement: true, Swap: true, DL, DAG);
13994	case ISD::SETUGT:
13995	return generateEquivalentSub(N, Size, Complement: false, Swap: true, DL, DAG);
13996	case ISD::SETUGE:
13997	return generateEquivalentSub(N, Size, Complement: true, Swap: false, DL, DAG);
13998	}
13999	}
14000
14001	return SDValue ();
14002	}
14003
14004	SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14005	DAGCombinerInfo &DCI) const {
14006	SelectionDAG &DAG = DCI.DAG;
14007	SDLoc dl(N);
14008
14009	assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14010	// If we're tracking CR bits, we need to be careful that we don't have:
14011	// trunc(binary-ops(zext(x), zext(y)))
14012	// or
14013	// trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14014	// such that we're unnecessarily moving things into GPRs when it would be
14015	// better to keep them in CR bits.
14016
14017	// Note that trunc here can be an actual i1 trunc, or can be the effective
14018	// truncation that comes from a setcc or select_cc.
14019	if (N->getOpcode() == ISD::TRUNCATE &&
14020	N->getValueType(`0`) != MVT::i1)
14021	return SDValue ();
14022
14023	if (N->getOperand(`0`).getValueType() != MVT::i32 &&
14024	N->getOperand(`0`).getValueType() != MVT::i64)
14025	return SDValue ();
14026
14027	if (N->getOpcode() == ISD::SETCC \|\|
14028	N->getOpcode() == ISD::SELECT_CC) {
14029	// If we're looking at a comparison, then we need to make sure that the
14030	// high bits (all except for the first) don't matter the result.
14031	ISD::CondCode CC =
14032	cast<CondCodeSDNode>(Val: N->getOperand(
14033	Num: N->getOpcode() == ISD::SETCC ? `2` : `4`))->get();
14034	unsigned OpBits = N->getOperand(Num: `0`).getValueSizeInBits();
14035
14036	if (ISD::isSignedIntSetCC(Code: CC)) {
14037	if (DAG.ComputeNumSignBits(Op: N->getOperand(Num: `0`)) != OpBits \|\|
14038	DAG.ComputeNumSignBits(Op: N->getOperand(Num: `1`)) != OpBits)
14039	return SDValue ();
14040	} else if (ISD::isUnsignedIntSetCC(Code: CC)) {
14041	if (!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `0`),
14042	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)) \|\|
14043	!DAG.MaskedValueIsZero(Op: N->getOperand(Num: `1`),
14044	Mask: APInt::getHighBitsSet(numBits: OpBits, hiBitsSet: OpBits-`1`)))
14045	return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14046	: SDValue ());
14047	} else {
14048	// This is neither a signed nor an unsigned comparison, just make sure
14049	// that the high bits are equal.
14050	KnownBits Op1Known = DAG.computeKnownBits(Op: N->getOperand(Num: `0`));
14051	KnownBits Op2Known = DAG.computeKnownBits(Op: N->getOperand(Num: `1`));
14052
14053	// We don't really care about what is known about the first bit (if
14054	// anything), so pretend that it is known zero for both to ensure they can
14055	// be compared as constants.
14056	Op1Known.Zero.setBit(`0`); Op1Known.One.clearBit(BitPosition: `0`);
14057	Op2Known.Zero.setBit(`0`); Op2Known.One.clearBit(BitPosition: `0`);
14058
14059	if (!Op1Known.isConstant() \|\| !Op2Known.isConstant() \|\|
14060	Op1Known.getConstant() != Op2Known.getConstant())
14061	return SDValue ();
14062	}
14063	}
14064
14065	// We now know that the higher-order bits are irrelevant, we just need to
14066	// make sure that all of the intermediate operations are bit operations, and
14067	// all inputs are extensions.
14068	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
14069	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
14070	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
14071	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
14072	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC &&
14073	N->getOperand(Num: `0`).getOpcode() != ISD::TRUNCATE &&
14074	N->getOperand(Num: `0`).getOpcode() != ISD::SIGN_EXTEND &&
14075	N->getOperand(Num: `0`).getOpcode() != ISD::ZERO_EXTEND &&
14076	N->getOperand(Num: `0`).getOpcode() != ISD::ANY_EXTEND)
14077	return SDValue ();
14078
14079	if ((N->getOpcode() == ISD::SETCC \|\| N->getOpcode() == ISD::SELECT_CC) &&
14080	N->getOperand(Num: `1`).getOpcode() != ISD::AND &&
14081	N->getOperand(Num: `1`).getOpcode() != ISD::OR &&
14082	N->getOperand(Num: `1`).getOpcode() != ISD::XOR &&
14083	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT &&
14084	N->getOperand(Num: `1`).getOpcode() != ISD::SELECT_CC &&
14085	N->getOperand(Num: `1`).getOpcode() != ISD::TRUNCATE &&
14086	N->getOperand(Num: `1`).getOpcode() != ISD::SIGN_EXTEND &&
14087	N->getOperand(Num: `1`).getOpcode() != ISD::ZERO_EXTEND &&
14088	N->getOperand(Num: `1`).getOpcode() != ISD::ANY_EXTEND)
14089	return SDValue ();
14090
14091	SmallVector<SDValue, `4`> Inputs;
14092	SmallVector<SDValue, `8`> BinOps, PromOps;
14093	SmallPtrSet<SDNode *, `16`> Visited;
14094
14095	for (unsigned i = `0`; i < `2`; ++i) {
14096	if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
14097	N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
14098	N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14099	N->getOperand(i).getOperand(`0`).getValueType() == MVT::i1) \|\|
14100	isa<ConstantSDNode>(N->getOperand(i)))
14101	Inputs.push_back(Elt: N->getOperand(Num: i));
14102	else
14103	BinOps.push_back(Elt: N->getOperand(Num: i));
14104
14105	if (N->getOpcode() == ISD::TRUNCATE)
14106	break;
14107	}
14108
14109	// Visit all inputs, collect all binary operations (and, or, xor and
14110	// select) that are all fed by extensions.
14111	while (!BinOps.empty()) {
14112	SDValue BinOp = BinOps.pop_back_val();
14113
14114	if (!Visited.insert(Ptr: BinOp.getNode()).second)
14115	continue;
14116
14117	PromOps.push_back(Elt: BinOp);
14118
14119	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
14120	// The condition of the select is not promoted.
14121	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
14122	continue;
14123	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
14124	continue;
14125
14126	if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
14127	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
14128	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14129	BinOp.getOperand(i).getOperand(`0`).getValueType() == MVT::i1) \|\|
14130	isa<ConstantSDNode>(BinOp.getOperand(i))) {
14131	Inputs.push_back(Elt: BinOp.getOperand(i));
14132	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
14133	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
14134	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
14135	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
14136	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC \|\|
14137	BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
14138	BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND \|\|
14139	BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND \|\|
14140	BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14141	BinOps.push_back(Elt: BinOp.getOperand(i));
14142	} else {
14143	// We have an input that is not an extension or another binary
14144	// operation; we'll abort this transformation.
14145	return SDValue ();
14146	}
14147	}
14148	}
14149
14150	// Make sure that this is a self-contained cluster of operations (which
14151	// is not quite the same thing as saying that everything has only one
14152	// use).
14153	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14154	if (isa<ConstantSDNode>(Val: Inputs [i]))
14155	continue;
14156
14157	for (const SDNode *User : Inputs [i].getNode()->uses()) {
14158	if (User != N && !Visited.count(Ptr: User))
14159	return SDValue ();
14160
14161	// Make sure that we're not going to promote the non-output-value
14162	// operand(s) or SELECT or SELECT_CC.
14163	// FIXME: Although we could sometimes handle this, and it does occur in
14164	// practice that one of the condition inputs to the select is also one of
14165	// the outputs, we currently can't deal with this.
14166	if (User->getOpcode() == ISD::SELECT) {
14167	if (User->getOperand(Num: `0`) == Inputs [i])
14168	return SDValue ();
14169	} else if (User->getOpcode() == ISD::SELECT_CC) {
14170	if (User->getOperand(Num: `0`) == Inputs [i] \|\|
14171	User->getOperand(Num: `1`) == Inputs [i])
14172	return SDValue ();
14173	}
14174	}
14175	}
14176
14177	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
14178	for (const SDNode *User : PromOps [i].getNode()->uses()) {
14179	if (User != N && !Visited.count(Ptr: User))
14180	return SDValue ();
14181
14182	// Make sure that we're not going to promote the non-output-value
14183	// operand(s) or SELECT or SELECT_CC.
14184	// FIXME: Although we could sometimes handle this, and it does occur in
14185	// practice that one of the condition inputs to the select is also one of
14186	// the outputs, we currently can't deal with this.
14187	if (User->getOpcode() == ISD::SELECT) {
14188	if (User->getOperand(Num: `0`) == PromOps [i])
14189	return SDValue ();
14190	} else if (User->getOpcode() == ISD::SELECT_CC) {
14191	if (User->getOperand(Num: `0`) == PromOps [i] \|\|
14192	User->getOperand(Num: `1`) == PromOps [i])
14193	return SDValue ();
14194	}
14195	}
14196	}
14197
14198	// Replace all inputs with the extension operand.
14199	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14200	// Constants may have users outside the cluster of to-be-promoted nodes,
14201	// and so we need to replace those as we do the promotions.
14202	if (isa<ConstantSDNode>(Val: Inputs [i]))
14203	continue;
14204	else
14205	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: Inputs [i].getOperand(i: `0`));
14206	}
14207
14208	std::list<HandleSDNode> PromOpHandles;
14209	for (auto &PromOp : PromOps)
14210	PromOpHandles.emplace_back(args&: PromOp);
14211
14212	// Replace all operations (these are all the same, but have a different
14213	// (i1) return type). DAG.getNode will validate that the types of
14214	// a binary operator match, so go through the list in reverse so that
14215	// we've likely promoted both operands first. Any intermediate truncations or
14216	// extensions disappear.
14217	while (!PromOpHandles.empty()) {
14218	SDValue PromOp = PromOpHandles.back().getValue();
14219	PromOpHandles.pop_back();
14220
14221	if (PromOp.getOpcode() == ISD::TRUNCATE \|\|
14222	PromOp.getOpcode() == ISD::SIGN_EXTEND \|\|
14223	PromOp.getOpcode() == ISD::ZERO_EXTEND \|\|
14224	PromOp.getOpcode() == ISD::ANY_EXTEND) {
14225	if (!isa<ConstantSDNode>(PromOp.getOperand(`0`)) &&
14226	PromOp.getOperand(`0`).getValueType() != MVT::i1) {
14227	// The operand is not yet ready (see comment below).
14228	PromOpHandles.emplace_front(args&: PromOp);
14229	continue;
14230	}
14231
14232	SDValue RepValue = PromOp.getOperand(i: `0`);
14233	if (isa<ConstantSDNode>(RepValue))
14234	RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14235
14236	DAG.ReplaceAllUsesOfValueWith(From: PromOp, To: RepValue);
14237	continue;
14238	}
14239
14240	unsigned C;
14241	switch (PromOp.getOpcode()) {
14242	default: C = `0`; break;
14243	case ISD::SELECT: C = `1`; break;
14244	case ISD::SELECT_CC: C = `2`; break;
14245	}
14246
14247	if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14248	PromOp.getOperand(C).getValueType() != MVT::i1) \|\|
14249	(!isa<ConstantSDNode>(PromOp.getOperand(C+`1`)) &&
14250	PromOp.getOperand(C+`1`).getValueType() != MVT::i1)) {
14251	// The to-be-promoted operands of this node have not yet been
14252	// promoted (this should be rare because we're going through the
14253	// list backward, but if one of the operands has several users in
14254	// this cluster of to-be-promoted nodes, it is possible).
14255	PromOpHandles.emplace_front(args&: PromOp);
14256	continue;
14257	}
14258
14259	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->op_begin(),
14260	PromOp.getNode()->op_end());
14261
14262	// If there are any constant inputs, make sure they're replaced now.
14263	for (unsigned i = `0`; i < `2`; ++i)
14264	if (isa<ConstantSDNode>(Ops[C+i]))
14265	Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14266
14267	DAG.ReplaceAllUsesOfValueWith(PromOp,
14268	DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14269	}
14270
14271	// Now we're left with the initial truncation itself.
14272	if (N->getOpcode() == ISD::TRUNCATE)
14273	return N->getOperand(Num: `0`);
14274
14275	// Otherwise, this is a comparison. The operands to be compared have just
14276	// changed type (to i1), but everything else is the same.
14277	return SDValue (N, `0`);
14278	}
14279
14280	SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14281	DAGCombinerInfo &DCI) const {
14282	SelectionDAG &DAG = DCI.DAG;
14283	SDLoc dl(N);
14284
14285	// If we're tracking CR bits, we need to be careful that we don't have:
14286	// zext(binary-ops(trunc(x), trunc(y)))
14287	// or
14288	// zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14289	// such that we're unnecessarily moving things into CR bits that can more
14290	// efficiently stay in GPRs. Note that if we're not certain that the high
14291	// bits are set as required by the final extension, we still may need to do
14292	// some masking to get the proper behavior.
14293
14294	// This same functionality is important on PPC64 when dealing with
14295	// 32-to-64-bit extensions; these occur often when 32-bit values are used as
14296	// the return values of functions. Because it is so similar, it is handled
14297	// here as well.
14298
14299	if (N->getValueType(`0`) != MVT::i32 &&
14300	N->getValueType(`0`) != MVT::i64)
14301	return SDValue ();
14302
14303	if (!((N->getOperand(`0`).getValueType() == MVT::i1 && Subtarget.useCRBits()) \|\|
14304	(N->getOperand(`0`).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14305	return SDValue ();
14306
14307	if (N->getOperand(Num: `0`).getOpcode() != ISD::AND &&
14308	N->getOperand(Num: `0`).getOpcode() != ISD::OR &&
14309	N->getOperand(Num: `0`).getOpcode() != ISD::XOR &&
14310	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT &&
14311	N->getOperand(Num: `0`).getOpcode() != ISD::SELECT_CC)
14312	return SDValue ();
14313
14314	SmallVector<SDValue, `4`> Inputs;
14315	SmallVector<SDValue, `8`> BinOps(`1`, N->getOperand(Num: `0`)), PromOps;
14316	SmallPtrSet<SDNode *, `16`> Visited;
14317
14318	// Visit all inputs, collect all binary operations (and, or, xor and
14319	// select) that are all fed by truncations.
14320	while (!BinOps.empty()) {
14321	SDValue BinOp = BinOps.pop_back_val();
14322
14323	if (!Visited.insert(Ptr: BinOp.getNode()).second)
14324	continue;
14325
14326	PromOps.push_back(Elt: BinOp);
14327
14328	for (unsigned i = `0`, ie = BinOp.getNumOperands(); i != ie; ++i) {
14329	// The condition of the select is not promoted.
14330	if (BinOp.getOpcode() == ISD::SELECT && i == `0`)
14331	continue;
14332	if (BinOp.getOpcode() == ISD::SELECT_CC && i != `2` && i != `3`)
14333	continue;
14334
14335	if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE \|\|
14336	isa<ConstantSDNode>(Val: BinOp.getOperand(i))) {
14337	Inputs.push_back(Elt: BinOp.getOperand(i));
14338	} else if (BinOp.getOperand(i).getOpcode() == ISD::AND \|\|
14339	BinOp.getOperand(i).getOpcode() == ISD::OR \|\|
14340	BinOp.getOperand(i).getOpcode() == ISD::XOR \|\|
14341	BinOp.getOperand(i).getOpcode() == ISD::SELECT \|\|
14342	BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14343	BinOps.push_back(Elt: BinOp.getOperand(i));
14344	} else {
14345	// We have an input that is not a truncation or another binary
14346	// operation; we'll abort this transformation.
14347	return SDValue ();
14348	}
14349	}
14350	}
14351
14352	// The operands of a select that must be truncated when the select is
14353	// promoted because the operand is actually part of the to-be-promoted set.
14354	DenseMap<SDNode *, EVT> SelectTruncOp[`2`];
14355
14356	// Make sure that this is a self-contained cluster of operations (which
14357	// is not quite the same thing as saying that everything has only one
14358	// use).
14359	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14360	if (isa<ConstantSDNode>(Val: Inputs [i]))
14361	continue;
14362
14363	for (SDNode *User : Inputs [i].getNode()->uses()) {
14364	if (User != N && !Visited.count(Ptr: User))
14365	return SDValue ();
14366
14367	// If we're going to promote the non-output-value operand(s) or SELECT or
14368	// SELECT_CC, record them for truncation.
14369	if (User->getOpcode() == ISD::SELECT) {
14370	if (User->getOperand(Num: `0`) == Inputs [i])
14371	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
14372	y: User->getOperand(Num: `0`).getValueType()));
14373	} else if (User->getOpcode() == ISD::SELECT_CC) {
14374	if (User->getOperand(Num: `0`) == Inputs [i])
14375	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
14376	y: User->getOperand(Num: `0`).getValueType()));
14377	if (User->getOperand(Num: `1`) == Inputs [i])
14378	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
14379	y: User->getOperand(Num: `1`).getValueType()));
14380	}
14381	}
14382	}
14383
14384	for (unsigned i = `0`, ie = PromOps.size(); i != ie; ++i) {
14385	for (SDNode *User : PromOps [i].getNode()->uses()) {
14386	if (User != N && !Visited.count(Ptr: User))
14387	return SDValue ();
14388
14389	// If we're going to promote the non-output-value operand(s) or SELECT or
14390	// SELECT_CC, record them for truncation.
14391	if (User->getOpcode() == ISD::SELECT) {
14392	if (User->getOperand(Num: `0`) == PromOps [i])
14393	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
14394	y: User->getOperand(Num: `0`).getValueType()));
14395	} else if (User->getOpcode() == ISD::SELECT_CC) {
14396	if (User->getOperand(Num: `0`) == PromOps [i])
14397	SelectTruncOp[`0`].insert(KV: std::make_pair(x&: User,
14398	y: User->getOperand(Num: `0`).getValueType()));
14399	if (User->getOperand(Num: `1`) == PromOps [i])
14400	SelectTruncOp[`1`].insert(KV: std::make_pair(x&: User,
14401	y: User->getOperand(Num: `1`).getValueType()));
14402	}
14403	}
14404	}
14405
14406	unsigned PromBits = N->getOperand(Num: `0`).getValueSizeInBits();
14407	bool ReallyNeedsExt = false;
14408	if (N->getOpcode() != ISD::ANY_EXTEND) {
14409	// If all of the inputs are not already sign/zero extended, then
14410	// we'll still need to do that at the end.
14411	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14412	if (isa<ConstantSDNode>(Val: Inputs [i]))
14413	continue;
14414
14415	unsigned OpBits =
14416	Inputs [i].getOperand(i: `0`).getValueSizeInBits();
14417	assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14418
14419	if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14420	!DAG.MaskedValueIsZero(Op: Inputs [i].getOperand(i: `0`),
14421	Mask: APInt::getHighBitsSet(numBits: OpBits,
14422	hiBitsSet: OpBits-PromBits))) \|\|
14423	(N->getOpcode() == ISD::SIGN_EXTEND &&
14424	DAG.ComputeNumSignBits(Op: Inputs [i].getOperand(i: `0`)) <
14425	(OpBits-(PromBits-`1`)))) {
14426	ReallyNeedsExt = true;
14427	break;
14428	}
14429	}
14430	}
14431
14432	// Replace all inputs, either with the truncation operand, or a
14433	// truncation or extension to the final output type.
14434	for (unsigned i = `0`, ie = Inputs.size(); i != ie; ++i) {
14435	// Constant inputs need to be replaced with the to-be-promoted nodes that
14436	// use them because they might have users outside of the cluster of
14437	// promoted nodes.
14438	if (isa<ConstantSDNode>(Val: Inputs [i]))
14439	continue;
14440
14441	SDValue InSrc = Inputs [i].getOperand(i: `0`);
14442	if (Inputs [i].getValueType() == N->getValueType(ResNo: `0`))
14443	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i], To: InSrc);
14444	else if (N->getOpcode() == ISD::SIGN_EXTEND)
14445	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
14446	To: DAG.getSExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
14447	else if (N->getOpcode() == ISD::ZERO_EXTEND)
14448	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
14449	To: DAG.getZExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
14450	else
14451	DAG.ReplaceAllUsesOfValueWith(From: Inputs [i],
14452	To: DAG.getAnyExtOrTrunc(Op: InSrc, DL: dl, VT: N->getValueType(ResNo: `0`)));
14453	}
14454
14455	std::list<HandleSDNode> PromOpHandles;
14456	for (auto &PromOp : PromOps)
14457	PromOpHandles.emplace_back(args&: PromOp);
14458
14459	// Replace all operations (these are all the same, but have a different
14460	// (promoted) return type). DAG.getNode will validate that the types of
14461	// a binary operator match, so go through the list in reverse so that
14462	// we've likely promoted both operands first.
14463	while (!PromOpHandles.empty()) {
14464	SDValue PromOp = PromOpHandles.back().getValue();
14465	PromOpHandles.pop_back();
14466
14467	unsigned C;
14468	switch (PromOp.getOpcode()) {
14469	default: C = `0`; break;
14470	case ISD::SELECT: C = `1`; break;
14471	case ISD::SELECT_CC: C = `2`; break;
14472	}
14473
14474	if ((!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C)) &&
14475	PromOp.getOperand(i: C).getValueType() != N->getValueType(ResNo: `0`)) \|\|
14476	(!isa<ConstantSDNode>(Val: PromOp.getOperand(i: C+`1`)) &&
14477	PromOp.getOperand(i: C+`1`).getValueType() != N->getValueType(ResNo: `0`))) {
14478	// The to-be-promoted operands of this node have not yet been
14479	// promoted (this should be rare because we're going through the
14480	// list backward, but if one of the operands has several users in
14481	// this cluster of to-be-promoted nodes, it is possible).
14482	PromOpHandles.emplace_front(args&: PromOp);
14483	continue;
14484	}
14485
14486	// For SELECT and SELECT_CC nodes, we do a similar check for any
14487	// to-be-promoted comparison inputs.
14488	if (PromOp.getOpcode() == ISD::SELECT \|\|
14489	PromOp.getOpcode() == ISD::SELECT_CC) {
14490	if ((SelectTruncOp[`0`].count(Val: PromOp.getNode()) &&
14491	PromOp.getOperand(i: `0`).getValueType() != N->getValueType(ResNo: `0`)) \|\|
14492	(SelectTruncOp[`1`].count(Val: PromOp.getNode()) &&
14493	PromOp.getOperand(i: `1`).getValueType() != N->getValueType(ResNo: `0`))) {
14494	PromOpHandles.emplace_front(args&: PromOp);
14495	continue;
14496	}
14497	}
14498
14499	SmallVector<SDValue, `3`> Ops(PromOp.getNode()->op_begin(),
14500	PromOp.getNode()->op_end());
14501
14502	// If this node has constant inputs, then they'll need to be promoted here.
14503	for (unsigned i = `0`; i < `2`; ++i) {
14504	if (!isa<ConstantSDNode>(Val: Ops [C+i]))
14505	continue;
14506	if (Ops [C+i].getValueType() == N->getValueType(ResNo: `0`))
14507	continue;
14508
14509	if (N->getOpcode() == ISD::SIGN_EXTEND)
14510	Ops [C+i] = DAG.getSExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
14511	else if (N->getOpcode() == ISD::ZERO_EXTEND)
14512	Ops [C+i] = DAG.getZExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
14513	else
14514	Ops [C+i] = DAG.getAnyExtOrTrunc(Op: Ops [C+i], DL: dl, VT: N->getValueType(ResNo: `0`));
14515	}
14516
14517	// If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14518	// truncate them again to the original value type.
14519	if (PromOp.getOpcode() == ISD::SELECT \|\|
14520	PromOp.getOpcode() == ISD::SELECT_CC) {
14521	auto SI0 = SelectTruncOp[`0`].find(Val: PromOp.getNode());
14522	if (SI0 != SelectTruncOp[`0`].end())
14523	Ops [`0`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI0 ->second, Operand: Ops [`0`]);
14524	auto SI1 = SelectTruncOp[`1`].find(Val: PromOp.getNode());
14525	if (SI1 != SelectTruncOp[`1`].end())
14526	Ops [`1`] = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SI1 ->second, Operand: Ops [`1`]);
14527	}
14528
14529	DAG.ReplaceAllUsesOfValueWith(From: PromOp,
14530	To: DAG.getNode(Opcode: PromOp.getOpcode(), DL: dl, VT: N->getValueType(ResNo: `0`), Ops));
14531	}
14532
14533	// Now we're left with the initial extension itself.
14534	if (!ReallyNeedsExt)
14535	return N->getOperand(Num: `0`);
14536
14537	// To zero extend, just mask off everything except for the first bit (in the
14538	// i1 case).
14539	if (N->getOpcode() == ISD::ZERO_EXTEND)
14540	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`),
14541	N2: DAG.getConstant(Val: APInt::getLowBitsSet(
14542	numBits: N->getValueSizeInBits(ResNo: `0`), loBitsSet: PromBits),
14543	DL: dl, VT: N->getValueType(ResNo: `0`)));
14544
14545	assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14546	"Invalid extension type");
14547	EVT ShiftAmountTy = getShiftAmountTy(LHSTy: N->getValueType(ResNo: `0`), DL: DAG.getDataLayout());
14548	SDValue ShiftCst =
14549	DAG.getConstant(Val: N->getValueSizeInBits(ResNo: `0`) - PromBits, DL: dl, VT: ShiftAmountTy);
14550	return DAG.getNode(
14551	Opcode: ISD::SRA, DL: dl, VT: N->getValueType(ResNo: `0`),
14552	N1: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`), N2: ShiftCst),
14553	N2: ShiftCst);
14554	}
14555
14556	SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14557	DAGCombinerInfo &DCI) const {
14558	assert(N->getOpcode() == ISD::SETCC &&
14559	"Should be called with a SETCC node");
14560
14561	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
14562	if (CC == ISD::SETNE \|\| CC == ISD::SETEQ) {
14563	SDValue LHS = N->getOperand(Num: `0`);
14564	SDValue RHS = N->getOperand(Num: `1`);
14565
14566	// If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14567	if (LHS.getOpcode() == ISD::SUB && isNullConstant(V: LHS.getOperand(i: `0`)) &&
14568	LHS.hasOneUse())
14569	std::swap(a&: LHS, b&: RHS);
14570
14571	// x == 0-y --> x+y == 0
14572	// x != 0-y --> x+y != 0
14573	if (RHS.getOpcode() == ISD::SUB && isNullConstant(V: RHS.getOperand(i: `0`)) &&
14574	RHS.hasOneUse()) {
14575	SDLoc DL(N);
14576	SelectionDAG &DAG = DCI.DAG;
14577	EVT VT = N->getValueType(ResNo: `0`);
14578	EVT OpVT = LHS.getValueType();
14579	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: OpVT, N1: LHS, N2: RHS.getOperand(i: `1`));
14580	return DAG.getSetCC(DL, VT, LHS: Add, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond: CC);
14581	}
14582	}
14583
14584	return DAGCombineTruncBoolExt(N, DCI);
14585	}
14586
14587	// Is this an extending load from an f32 to an f64?
14588	static bool isFPExtLoad(SDValue Op) {
14589	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14590	return LD->getExtensionType() == ISD::EXTLOAD &&
14591	Op.getValueType() == MVT::f64;
14592	return false;
14593	}
14594
14595	/// Reduces the number of fp-to-int conversion when building a vector.
14596	///
14597	/// If this vector is built out of floating to integer conversions,
14598	/// transform it to a vector built out of floating point values followed by a
14599	/// single floating to integer conversion of the vector.
14600	/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14601	/// becomes (fptosi (build_vector ($A, $B, ...)))
14602	SDValue PPCTargetLowering::
14603	combineElementTruncationToVectorTruncation(SDNode *N,
14604	DAGCombinerInfo &DCI) const {
14605	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14606	"Should be called with a BUILD_VECTOR node");
14607
14608	SelectionDAG &DAG = DCI.DAG;
14609	SDLoc dl(N);
14610
14611	SDValue FirstInput = N->getOperand(Num: `0`);
14612	assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14613	"The input operand must be an fp-to-int conversion.");
14614
14615	// This combine happens after legalization so the fp_to_[su]i nodes are
14616	// already converted to PPCSISD nodes.
14617	unsigned FirstConversion = FirstInput.getOperand(i: `0`).getOpcode();
14618	if (FirstConversion == PPCISD::FCTIDZ \|\|
14619	FirstConversion == PPCISD::FCTIDUZ \|\|
14620	FirstConversion == PPCISD::FCTIWZ \|\|
14621	FirstConversion == PPCISD::FCTIWUZ) {
14622	bool IsSplat = true;
14623	bool Is32Bit = FirstConversion == PPCISD::FCTIWZ \|\|
14624	FirstConversion == PPCISD::FCTIWUZ;
14625	EVT SrcVT = FirstInput.getOperand(i: `0`).getValueType();
14626	SmallVector<SDValue, `4`> Ops;
14627	EVT TargetVT = N->getValueType(ResNo: `0`);
14628	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
14629	SDValue NextOp = N->getOperand(Num: i);
14630	if (NextOp.getOpcode() != PPCISD::MFVSR)
14631	return SDValue ();
14632	unsigned NextConversion = NextOp.getOperand(i: `0`).getOpcode();
14633	if (NextConversion != FirstConversion)
14634	return SDValue ();
14635	// If we are converting to 32-bit integers, we need to add an FP_ROUND.
14636	// This is not valid if the input was originally double precision. It is
14637	// also not profitable to do unless this is an extending load in which
14638	// case doing this combine will allow us to combine consecutive loads.
14639	if (Is32Bit && !isFPExtLoad(Op: NextOp.getOperand(i: `0`).getOperand(i: `0`)))
14640	return SDValue ();
14641	if (N->getOperand(Num: i) != FirstInput)
14642	IsSplat = false;
14643	}
14644
14645	// If this is a splat, we leave it as-is since there will be only a single
14646	// fp-to-int conversion followed by a splat of the integer. This is better
14647	// for 32-bit and smaller ints and neutral for 64-bit ints.
14648	if (IsSplat)
14649	return SDValue ();
14650
14651	// Now that we know we have the right type of node, get its operands
14652	for (int i = `0`, e = N->getNumOperands(); i < e; ++i) {
14653	SDValue In = N->getOperand(Num: i).getOperand(i: `0`);
14654	if (Is32Bit) {
14655	// For 32-bit values, we need to add an FP_ROUND node (if we made it
14656	// here, we know that all inputs are extending loads so this is safe).
14657	if (In.isUndef())
14658	Ops.push_back(Elt: DAG.getUNDEF(VT: SrcVT));
14659	else {
14660	SDValue Trunc =
14661	DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(`0`),
14662	DAG.getIntPtrConstant(`1`, dl, /isTarget=/true));
14663	Ops.push_back(Elt: Trunc);
14664	}
14665	} else
14666	Ops.push_back(Elt: In.isUndef() ? DAG.getUNDEF(VT: SrcVT) : In.getOperand(i: `0`));
14667	}
14668
14669	unsigned Opcode;
14670	if (FirstConversion == PPCISD::FCTIDZ \|\|
14671	FirstConversion == PPCISD::FCTIWZ)
14672	Opcode = ISD::FP_TO_SINT;
14673	else
14674	Opcode = ISD::FP_TO_UINT;
14675
14676	EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14677	SDValue BV = DAG.getBuildVector(VT: NewVT, DL: dl, Ops);
14678	return DAG.getNode(Opcode, DL: dl, VT: TargetVT, Operand: BV);
14679	}
14680	return SDValue ();
14681	}
14682
14683	/// Reduce the number of loads when building a vector.
14684	///
14685	/// Building a vector out of multiple loads can be converted to a load
14686	/// of the vector type if the loads are consecutive. If the loads are
14687	/// consecutive but in descending order, a shuffle is added at the end
14688	/// to reorder the vector.
14689	static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
14690	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14691	"Should be called with a BUILD_VECTOR node");
14692
14693	SDLoc dl(N);
14694
14695	// Return early for non byte-sized type, as they can't be consecutive.
14696	if (!N->getValueType(ResNo: `0`).getVectorElementType().isByteSized())
14697	return SDValue ();
14698
14699	bool InputsAreConsecutiveLoads = true;
14700	bool InputsAreReverseConsecutive = true;
14701	unsigned ElemSize = N->getValueType(ResNo: `0`).getScalarType().getStoreSize();
14702	SDValue FirstInput = N->getOperand(Num: `0`);
14703	bool IsRoundOfExtLoad = false;
14704	LoadSDNode FirstLoad = nullptr*;
14705
14706	if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14707	FirstInput.getOperand(i: `0`).getOpcode() == ISD::LOAD) {
14708	FirstLoad = cast<LoadSDNode>(Val: FirstInput.getOperand(i: `0`));
14709	IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14710	}
14711	// Not a build vector of (possibly fp_rounded) loads.
14712	if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) \|\|
14713	N->getNumOperands() == `1`)
14714	return SDValue ();
14715
14716	if (!IsRoundOfExtLoad)
14717	FirstLoad = cast<LoadSDNode>(Val&: FirstInput);
14718
14719	SmallVector<LoadSDNode *, `4`> InputLoads;
14720	InputLoads.push_back(Elt: FirstLoad);
14721	for (int i = `1`, e = N->getNumOperands(); i < e; ++i) {
14722	// If any inputs are fp_round(extload), they all must be.
14723	if (IsRoundOfExtLoad && N->getOperand(Num: i).getOpcode() != ISD::FP_ROUND)
14724	return SDValue ();
14725
14726	SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(Num: i).getOperand(i: `0`) :
14727	N->getOperand(Num: i);
14728	if (NextInput.getOpcode() != ISD::LOAD)
14729	return SDValue ();
14730
14731	SDValue PreviousInput =
14732	IsRoundOfExtLoad ? N->getOperand(Num: i-`1`).getOperand(i: `0`) : N->getOperand(Num: i-`1`);
14733	LoadSDNode *LD1 = cast<LoadSDNode>(Val&: PreviousInput);
14734	LoadSDNode *LD2 = cast<LoadSDNode>(Val&: NextInput);
14735
14736	// If any inputs are fp_round(extload), they all must be.
14737	if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14738	return SDValue ();
14739
14740	// We only care about regular loads. The PPC-specific load intrinsics
14741	// will not lead to a merge opportunity.
14742	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD2, Base: LD1, Bytes: ElemSize, Dist: `1`))
14743	InputsAreConsecutiveLoads = false;
14744	if (!DAG.areNonVolatileConsecutiveLoads(LD: LD1, Base: LD2, Bytes: ElemSize, Dist: `1`))
14745	InputsAreReverseConsecutive = false;
14746
14747	// Exit early if the loads are neither consecutive nor reverse consecutive.
14748	if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14749	return SDValue ();
14750	InputLoads.push_back(Elt: LD2);
14751	}
14752
14753	assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14754	"The loads cannot be both consecutive and reverse consecutive.");
14755
14756	SDValue WideLoad;
14757	SDValue ReturnSDVal;
14758	if (InputsAreConsecutiveLoads) {
14759	assert(FirstLoad && "Input needs to be a LoadSDNode.");
14760	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: FirstLoad->getChain(),
14761	Ptr: FirstLoad->getBasePtr(), PtrInfo: FirstLoad->getPointerInfo(),
14762	Alignment: FirstLoad->getAlign());
14763	ReturnSDVal = WideLoad;
14764	} else if (InputsAreReverseConsecutive) {
14765	LoadSDNode *LastLoad = InputLoads.back();
14766	assert(LastLoad && "Input needs to be a LoadSDNode.");
14767	WideLoad = DAG.getLoad(VT: N->getValueType(ResNo: `0`), dl, Chain: LastLoad->getChain(),
14768	Ptr: LastLoad->getBasePtr(), PtrInfo: LastLoad->getPointerInfo(),
14769	Alignment: LastLoad->getAlign());
14770	SmallVector<int, `16`> Ops;
14771	for (int i = N->getNumOperands() - `1`; i >= `0`; i--)
14772	Ops.push_back(Elt: i);
14773
14774	ReturnSDVal = DAG.getVectorShuffle(VT: N->getValueType(ResNo: `0`), dl, N1: WideLoad,
14775	N2: DAG.getUNDEF(VT: N->getValueType(ResNo: `0`)), Mask: Ops);
14776	} else
14777	return SDValue ();
14778
14779	for (auto *LD : InputLoads)
14780	DAG.makeEquivalentMemoryOrdering(OldLoad: LD, NewMemOp: WideLoad);
14781	return ReturnSDVal;
14782	}
14783
14784	// This function adds the required vector_shuffle needed to get
14785	// the elements of the vector extract in the correct position
14786	// as specified by the CorrectElems encoding.
14787	static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
14788	SDValue Input, uint64_t Elems,
14789	uint64_t CorrectElems) {
14790	SDLoc dl(N);
14791
14792	unsigned NumElems = Input.getValueType().getVectorNumElements();
14793	SmallVector<int, `16`> ShuffleMask(NumElems, -`1`);
14794
14795	// Knowing the element indices being extracted from the original
14796	// vector and the order in which they're being inserted, just put
14797	// them at element indices required for the instruction.
14798	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
14799	if (DAG.getDataLayout().isLittleEndian())
14800	ShuffleMask [CorrectElems & `0xF`] = Elems & `0xF`;
14801	else
14802	ShuffleMask [(CorrectElems & `0xF0`) >> `4`] = (Elems & `0xF0`) >> `4`;
14803	CorrectElems = CorrectElems >> `8`;
14804	Elems = Elems >> `8`;
14805	}
14806
14807	SDValue Shuffle =
14808	DAG.getVectorShuffle(VT: Input.getValueType(), dl, N1: Input,
14809	N2: DAG.getUNDEF(VT: Input.getValueType()), Mask: ShuffleMask);
14810
14811	EVT VT = N->getValueType(ResNo: `0`);
14812	SDValue Conv = DAG.getBitcast(VT, V: Shuffle);
14813
14814	EVT ExtVT = EVT::getVectorVT(Context&: *DAG.getContext(),
14815	VT: Input.getValueType().getVectorElementType(),
14816	NumElements: VT.getVectorNumElements());
14817	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT, N1: Conv,
14818	N2: DAG.getValueType(ExtVT));
14819	}
14820
14821	// Look for build vector patterns where input operands come from sign
14822	// extended vector_extract elements of specific indices. If the correct indices
14823	// aren't used, add a vector shuffle to fix up the indices and create
14824	// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14825	// during instruction selection.
14826	static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
14827	// This array encodes the indices that the vector sign extend instructions
14828	// extract from when extending from one type to another for both BE and LE.
14829	// The right nibble of each byte corresponds to the LE incides.
14830	// and the left nibble of each byte corresponds to the BE incides.
14831	// For example: 0x3074B8FC byte->word
14832	// For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14833	// For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14834	// For example: 0x000070F8 byte->double word
14835	// For LE: the allowed indices are: 0x0,0x8
14836	// For BE: the allowed indices are: 0x7,0xF
14837	uint64_t TargetElems[] = {
14838	`0x3074B8FC`, // b->w
14839	`0x000070F8`, // b->d
14840	`0x10325476`, // h->w
14841	`0x00003074`, // h->d
14842	`0x00001032`, // w->d
14843	};
14844
14845	uint64_t Elems = `0`;
14846	int Index;
14847	SDValue Input;
14848
14849	auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14850	if (!Op)
14851	return false;
14852	if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14853	Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14854	return false;
14855
14856	// A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14857	// of the right width.
14858	SDValue Extract = Op.getOperand(i: `0`);
14859	if (Extract.getOpcode() == ISD::ANY_EXTEND)
14860	Extract = Extract.getOperand(i: `0`);
14861	if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14862	return false;
14863
14864	ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
14865	if (!ExtOp)
14866	return false;
14867
14868	Index = ExtOp->getZExtValue();
14869	if (Input && Input != Extract.getOperand(i: `0`))
14870	return false;
14871
14872	if (!Input)
14873	Input = Extract.getOperand(i: `0`);
14874
14875	Elems = Elems << `8`;
14876	Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << `4`;
14877	Elems \|= Index;
14878
14879	return true;
14880	};
14881
14882	// If the build vector operands aren't sign extended vector extracts,
14883	// of the same input vector, then return.
14884	for (unsigned i = `0`; i < N->getNumOperands(); i++) {
14885	if (!isSExtOfVecExtract (N->getOperand(Num: i))) {
14886	return SDValue ();
14887	}
14888	}
14889
14890	// If the vector extract indicies are not correct, add the appropriate
14891	// vector_shuffle.
14892	int TgtElemArrayIdx;
14893	int InputSize = Input.getValueType().getScalarSizeInBits();
14894	int OutputSize = N->getValueType(ResNo: `0`).getScalarSizeInBits();
14895	if (InputSize + OutputSize == `40`)
14896	TgtElemArrayIdx = `0`;
14897	else if (InputSize + OutputSize == `72`)
14898	TgtElemArrayIdx = `1`;
14899	else if (InputSize + OutputSize == `48`)
14900	TgtElemArrayIdx = `2`;
14901	else if (InputSize + OutputSize == `80`)
14902	TgtElemArrayIdx = `3`;
14903	else if (InputSize + OutputSize == `96`)
14904	TgtElemArrayIdx = `4`;
14905	else
14906	return SDValue ();
14907
14908	uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14909	CorrectElems = DAG.getDataLayout().isLittleEndian()
14910	? CorrectElems & `0x0F0F0F0F0F0F0F0F`
14911	: CorrectElems & `0xF0F0F0F0F0F0F0F0`;
14912	if (Elems != CorrectElems) {
14913	return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14914	}
14915
14916	// Regular lowering will catch cases where a shuffle is not needed.
14917	return SDValue ();
14918	}
14919
14920	// Look for the pattern of a load from a narrow width to i128, feeding
14921	// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14922	// (LXVRZX). This node represents a zero extending load that will be matched
14923	// to the Load VSX Vector Rightmost instructions.
14924	static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14925	SDLoc DL(N);
14926
14927	// This combine is only eligible for a BUILD_VECTOR of v1i128.
14928	if (N->getValueType(`0`) != MVT::v1i128)
14929	return SDValue ();
14930
14931	SDValue Operand = N->getOperand(Num: `0`);
14932	// Proceed with the transformation if the operand to the BUILD_VECTOR
14933	// is a load instruction.
14934	if (Operand.getOpcode() != ISD::LOAD)
14935	return SDValue ();
14936
14937	auto *LD = cast<LoadSDNode>(Val&: Operand);
14938	EVT MemoryType = LD->getMemoryVT();
14939
14940	// This transformation is only valid if the we are loading either a byte,
14941	// halfword, word, or doubleword.
14942	bool ValidLDType = MemoryType == MVT::i8 \|\| MemoryType == MVT::i16 \|\|
14943	MemoryType == MVT::i32 \|\| MemoryType == MVT::i64;
14944
14945	// Ensure that the load from the narrow width is being zero extended to i128.
14946	if (!ValidLDType \|\|
14947	(LD->getExtensionType() != ISD::ZEXTLOAD &&
14948	LD->getExtensionType() != ISD::EXTLOAD))
14949	return SDValue ();
14950
14951	SDValue LoadOps[] = {
14952	LD->getChain(), LD->getBasePtr(),
14953	DAG.getIntPtrConstant(Val: MemoryType.getScalarSizeInBits(), DL)};
14954
14955	return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14956	DAG.getVTList(MVT::v1i128, MVT::Other),
14957	LoadOps, MemoryType, LD->getMemOperand());
14958	}
14959
14960	SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14961	DAGCombinerInfo &DCI) const {
14962	assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14963	"Should be called with a BUILD_VECTOR node");
14964
14965	SelectionDAG &DAG = DCI.DAG;
14966	SDLoc dl(N);
14967
14968	if (!Subtarget.hasVSX())
14969	return SDValue ();
14970
14971	// The target independent DAG combiner will leave a build_vector of
14972	// float-to-int conversions intact. We can generate MUCH better code for
14973	// a float-to-int conversion of a vector of floats.
14974	SDValue FirstInput = N->getOperand(Num: `0`);
14975	if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14976	SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14977	if (Reduced)
14978	return Reduced;
14979	}
14980
14981	// If we're building a vector out of consecutive loads, just load that
14982	// vector type.
14983	SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14984	if (Reduced)
14985	return Reduced;
14986
14987	// If we're building a vector out of extended elements from another vector
14988	// we have P9 vector integer extend instructions. The code assumes legal
14989	// input types (i.e. it can't handle things like v4i16) so do not run before
14990	// legalization.
14991	if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14992	Reduced = combineBVOfVecSExt(N, DAG);
14993	if (Reduced)
14994	return Reduced;
14995	}
14996
14997	// On Power10, the Load VSX Vector Rightmost instructions can be utilized
14998	// if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14999	// is a load from <valid narrow width> to i128.
15000	if (Subtarget.isISA3_1()) {
15001	SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15002	if (BVOfZLoad)
15003	return BVOfZLoad;
15004	}
15005
15006	if (N->getValueType(`0`) != MVT::v2f64)
15007	return SDValue ();
15008
15009	// Looking for:
15010	// (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15011	if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15012	FirstInput.getOpcode() != ISD::UINT_TO_FP)
15013	return SDValue ();
15014	if (N->getOperand(Num: `1`).getOpcode() != ISD::SINT_TO_FP &&
15015	N->getOperand(Num: `1`).getOpcode() != ISD::UINT_TO_FP)
15016	return SDValue ();
15017	if (FirstInput.getOpcode() != N->getOperand(Num: `1`).getOpcode())
15018	return SDValue ();
15019
15020	SDValue Ext1 = FirstInput.getOperand(i: `0`);
15021	SDValue Ext2 = N->getOperand(Num: `1`).getOperand(i: `0`);
15022	if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
15023	Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15024	return SDValue ();
15025
15026	ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Val: Ext1.getOperand(i: `1`));
15027	ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Val: Ext2.getOperand(i: `1`));
15028	if (!Ext1Op \|\| !Ext2Op)
15029	return SDValue ();
15030	if (Ext1.getOperand(`0`).getValueType() != MVT::v4i32 \|\|
15031	Ext1.getOperand(`0`) != Ext2.getOperand(`0`))
15032	return SDValue ();
15033
15034	int FirstElem = Ext1Op->getZExtValue();
15035	int SecondElem = Ext2Op->getZExtValue();
15036	int SubvecIdx;
15037	if (FirstElem == `0` && SecondElem == `1`)
15038	SubvecIdx = Subtarget.isLittleEndian() ? `1` : `0`;
15039	else if (FirstElem == `2` && SecondElem == `3`)
15040	SubvecIdx = Subtarget.isLittleEndian() ? `0` : `1`;
15041	else
15042	return SDValue ();
15043
15044	SDValue SrcVec = Ext1.getOperand(i: `0`);
15045	auto NodeType = (N->getOperand(Num: `1`).getOpcode() == ISD::SINT_TO_FP) ?
15046	PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15047	return DAG.getNode(NodeType, dl, MVT::v2f64,
15048	SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15049	}
15050
15051	SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15052	DAGCombinerInfo &DCI) const {
15053	assert((N->getOpcode() == ISD::SINT_TO_FP \|\|
15054	N->getOpcode() == ISD::UINT_TO_FP) &&
15055	"Need an int -> FP conversion node here");
15056
15057	if (useSoftFloat() \|\| !Subtarget.has64BitSupport())
15058	return SDValue ();
15059
15060	SelectionDAG &DAG = DCI.DAG;
15061	SDLoc dl(N);
15062	SDValue Op(N, `0`);
15063
15064	// Don't handle ppc_fp128 here or conversions that are out-of-range capable
15065	// from the hardware.
15066	if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15067	return SDValue ();
15068	if (!Op.getOperand(i: `0`).getValueType().isSimple())
15069	return SDValue ();
15070	if (Op.getOperand(`0`).getValueType().getSimpleVT() <= MVT(MVT::i1) \|\|
15071	Op.getOperand(`0`).getValueType().getSimpleVT() > MVT(MVT::i64))
15072	return SDValue ();
15073
15074	SDValue FirstOperand(Op.getOperand(i: `0`));
15075	bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15076	(FirstOperand.getValueType() == MVT::i8 \|\|
15077	FirstOperand.getValueType() == MVT::i16);
15078	if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15079	bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15080	bool DstDouble = Op.getValueType() == MVT::f64;
15081	unsigned ConvOp = Signed ?
15082	(DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15083	(DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15084	SDValue WidthConst =
15085	DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? `1` : `2`,
15086	dl, false);
15087	LoadSDNode *LDN = cast<LoadSDNode>(Val: FirstOperand.getNode());
15088	SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15089	SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
15090	DAG.getVTList(MVT::f64, MVT::Other),
15091	Ops, MVT::i8, LDN->getMemOperand());
15092	DAG.makeEquivalentMemoryOrdering(OldLoad: LDN, NewMemOp: Ld);
15093
15094	// For signed conversion, we need to sign-extend the value in the VSR
15095	if (Signed) {
15096	SDValue ExtOps[] = { Ld, WidthConst };
15097	SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15098	return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15099	} else
15100	return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15101	}
15102
15103
15104	// For i32 intermediate values, unfortunately, the conversion functions
15105	// leave the upper 32 bits of the value are undefined. Within the set of
15106	// scalar instructions, we have no method for zero- or sign-extending the
15107	// value. Thus, we cannot handle i32 intermediate values here.
15108	if (Op.getOperand(`0`).getValueType() == MVT::i32)
15109	return SDValue ();
15110
15111	assert((Op.getOpcode() == ISD::SINT_TO_FP \|\| Subtarget.hasFPCVT()) &&
15112	"UINT_TO_FP is supported only with FPCVT");
15113
15114	// If we have FCFIDS, then use it when converting to single-precision.
15115	// Otherwise, convert to double-precision and then round.
15116	unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15117	? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15118	: PPCISD::FCFIDS)
15119	: (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15120	: PPCISD::FCFID);
15121	MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15122	? MVT::f32
15123	: MVT::f64;
15124
15125	// If we're converting from a float, to an int, and back to a float again,
15126	// then we don't need the store/load pair at all.
15127	if ((Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_UINT &&
15128	Subtarget.hasFPCVT()) \|\|
15129	(Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT)) {
15130	SDValue Src = Op.getOperand(i: `0`).getOperand(i: `0`);
15131	if (Src.getValueType() == MVT::f32) {
15132	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15133	DCI.AddToWorklist(N: Src.getNode());
15134	} else if (Src.getValueType() != MVT::f64) {
15135	// Make sure that we don't pick up a ppc_fp128 source value.
15136	return SDValue ();
15137	}
15138
15139	unsigned FCTOp =
15140	Op.getOperand(i: `0`).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15141	PPCISD::FCTIDUZ;
15142
15143	SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15144	SDValue FP = DAG.getNode(Opcode: FCFOp, DL: dl, VT: FCFTy, Operand: Tmp);
15145
15146	if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15147	FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15148	DAG.getIntPtrConstant(`0`, dl, /isTarget=/true));
15149	DCI.AddToWorklist(N: FP.getNode());
15150	}
15151
15152	return FP;
15153	}
15154
15155	return SDValue ();
15156	}
15157
15158	// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15159	// builtins) into loads with swaps.
15160	SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15161	DAGCombinerInfo &DCI) const {
15162	// Delay VSX load for LE combine until after LegalizeOps to prioritize other
15163	// load combines.
15164	if (DCI.isBeforeLegalizeOps())
15165	return SDValue ();
15166
15167	SelectionDAG &DAG = DCI.DAG;
15168	SDLoc dl(N);
15169	SDValue Chain;
15170	SDValue Base;
15171	MachineMemOperand *MMO;
15172
15173	switch (N->getOpcode()) {
15174	default:
15175	llvm_unreachable("Unexpected opcode for little endian VSX load");
15176	case ISD::LOAD: {
15177	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
15178	Chain = LD->getChain();
15179	Base = LD->getBasePtr();
15180	MMO = LD->getMemOperand();
15181	// If the MMO suggests this isn't a load of a full vector, leave
15182	// things alone. For a built-in, we have to make the change for
15183	// correctness, so if there is a size problem that will be a bug.
15184	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
15185	return SDValue ();
15186	break;
15187	}
15188	case ISD::INTRINSIC_W_CHAIN: {
15189	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
15190	Chain = Intrin->getChain();
15191	// Similarly to the store case below, Intrin->getBasePtr() doesn't get
15192	// us what we want. Get operand 2 instead.
15193	Base = Intrin->getOperand(Num: `2`);
15194	MMO = Intrin->getMemOperand();
15195	break;
15196	}
15197	}
15198
15199	MVT VecTy = N->getValueType(ResNo: `0`).getSimpleVT();
15200
15201	SDValue LoadOps[] = { Chain, Base };
15202	SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
15203	DAG.getVTList(MVT::v2f64, MVT::Other),
15204	LoadOps, MVT::v2f64, MMO);
15205
15206	DCI.AddToWorklist(N: Load.getNode());
15207	Chain = Load.getValue(R: `1`);
15208	SDValue Swap = DAG.getNode(
15209	PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15210	DCI.AddToWorklist(N: Swap.getNode());
15211
15212	// Add a bitcast if the resulting load type doesn't match v2f64.
15213	if (VecTy != MVT::v2f64) {
15214	SDValue N = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: VecTy, Operand: Swap);
15215	DCI.AddToWorklist(N: N.getNode());
15216	// Package {bitcast value, swap's chain} to match Load's shape.
15217	return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15218	N, Swap.getValue(`1`));
15219	}
15220
15221	return Swap;
15222	}
15223
15224	// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15225	// builtins) into stores with swaps.
15226	SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
15227	DAGCombinerInfo &DCI) const {
15228	// Delay VSX store for LE combine until after LegalizeOps to prioritize other
15229	// store combines.
15230	if (DCI.isBeforeLegalizeOps())
15231	return SDValue ();
15232
15233	SelectionDAG &DAG = DCI.DAG;
15234	SDLoc dl(N);
15235	SDValue Chain;
15236	SDValue Base;
15237	unsigned SrcOpnd;
15238	MachineMemOperand *MMO;
15239
15240	switch (N->getOpcode()) {
15241	default:
15242	llvm_unreachable("Unexpected opcode for little endian VSX store");
15243	case ISD::STORE: {
15244	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
15245	Chain = ST->getChain();
15246	Base = ST->getBasePtr();
15247	MMO = ST->getMemOperand();
15248	SrcOpnd = `1`;
15249	// If the MMO suggests this isn't a store of a full vector, leave
15250	// things alone. For a built-in, we have to make the change for
15251	// correctness, so if there is a size problem that will be a bug.
15252	if (!MMO->getSize().hasValue() \|\| MMO->getSize().getValue() < `16`)
15253	return SDValue ();
15254	break;
15255	}
15256	case ISD::INTRINSIC_VOID: {
15257	MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(Val: N);
15258	Chain = Intrin->getChain();
15259	// Intrin->getBasePtr() oddly does not get what we want.
15260	Base = Intrin->getOperand(Num: `3`);
15261	MMO = Intrin->getMemOperand();
15262	SrcOpnd = `2`;
15263	break;
15264	}
15265	}
15266
15267	SDValue Src = N->getOperand(Num: SrcOpnd);
15268	MVT VecTy = Src.getValueType().getSimpleVT();
15269
15270	// All stores are done as v2f64 and possible bit cast.
15271	if (VecTy != MVT::v2f64) {
15272	Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15273	DCI.AddToWorklist(N: Src.getNode());
15274	}
15275
15276	SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15277	DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15278	DCI.AddToWorklist(N: Swap.getNode());
15279	Chain = Swap.getValue(R: `1`);
15280	SDValue StoreOps[] = { Chain, Swap, Base };
15281	SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
15282	DAG.getVTList(MVT::Other),
15283	StoreOps, VecTy, MMO);
15284	DCI.AddToWorklist(N: Store.getNode());
15285	return Store;
15286	}
15287
15288	// Handle DAG combine for STORE (FP_TO_INT F).
15289	SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15290	DAGCombinerInfo &DCI) const {
15291	SelectionDAG &DAG = DCI.DAG;
15292	SDLoc dl(N);
15293	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
15294	(void)Opcode;
15295	bool Strict = N->getOperand(Num: `1`)->isStrictFPOpcode();
15296
15297	assert((Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
15298	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT)
15299	&& "Not a FP_TO_INT Instruction!");
15300
15301	SDValue Val = N->getOperand(Num: `1`).getOperand(i: Strict ? `1` : `0`);
15302	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
15303	EVT ResVT = Val.getValueType();
15304
15305	if (!Subtarget.hasVSX() \|\| !Subtarget.hasFPCVT() \|\| !isTypeLegal(VT: ResVT))
15306	return SDValue ();
15307
15308	// Only perform combine for conversion to i64/i32 or power9 i16/i8.
15309	bool ValidTypeForStoreFltAsInt =
15310	(Op1VT == MVT::i32 \|\| (Op1VT == MVT::i64 && Subtarget.isPPC64()) \|\|
15311	(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 \|\| Op1VT == MVT::i8)));
15312
15313	// TODO: Lower conversion from f128 on all VSX targets
15314	if (ResVT == MVT::ppcf128 \|\| (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15315	return SDValue ();
15316
15317	if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) \|\|
15318	cast<StoreSDNode>(N)->isTruncatingStore() \|\| !ValidTypeForStoreFltAsInt)
15319	return SDValue ();
15320
15321	Val = convertFPToInt(Op: N->getOperand(Num: `1`), DAG, Subtarget);
15322
15323	// Set number of bytes being converted.
15324	unsigned ByteSize = Op1VT.getScalarSizeInBits() / `8`;
15325	SDValue Ops[] = {N->getOperand(Num: `0`), Val, N->getOperand(Num: `2`),
15326	DAG.getIntPtrConstant(Val: ByteSize, DL: dl, isTarget: false),
15327	DAG.getValueType(Op1VT)};
15328
15329	Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
15330	DAG.getVTList(MVT::Other), Ops,
15331	cast<StoreSDNode>(N)->getMemoryVT(),
15332	cast<StoreSDNode>(N)->getMemOperand());
15333
15334	return Val;
15335	}
15336
15337	static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15338	// Check that the source of the element keeps flipping
15339	// (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15340	bool PrevElemFromFirstVec = Mask [`0`] < NumElts;
15341	for (int i = `1`, e = Mask.size(); i < e; i++) {
15342	if (PrevElemFromFirstVec && Mask [i] < NumElts)
15343	return false;
15344	if (!PrevElemFromFirstVec && Mask [i] >= NumElts)
15345	return false;
15346	PrevElemFromFirstVec = !PrevElemFromFirstVec;
15347	}
15348	return true;
15349	}
15350
15351	static bool isSplatBV(SDValue Op) {
15352	if (Op.getOpcode() != ISD::BUILD_VECTOR)
15353	return false;
15354	SDValue FirstOp;
15355
15356	// Find first non-undef input.
15357	for (int i = `0`, e = Op.getNumOperands(); i < e; i++) {
15358	FirstOp = Op.getOperand(i);
15359	if (!FirstOp.isUndef())
15360	break;
15361	}
15362
15363	// All inputs are undef or the same as the first non-undef input.
15364	for (int i = `1`, e = Op.getNumOperands(); i < e; i++)
15365	if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15366	return false;
15367	return true;
15368	}
15369
15370	static SDValue isScalarToVec(SDValue Op) {
15371	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15372	return Op;
15373	if (Op.getOpcode() != ISD::BITCAST)
15374	return SDValue ();
15375	Op = Op.getOperand(i: `0`);
15376	if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15377	return Op;
15378	return SDValue ();
15379	}
15380
15381	// Fix up the shuffle mask to account for the fact that the result of
15382	// scalar_to_vector is not in lane zero. This just takes all values in
15383	// the ranges specified by the min/max indices and adds the number of
15384	// elements required to ensure each element comes from the respective
15385	// position in the valid lane.
15386	// On little endian, that's just the corresponding element in the other
15387	// half of the vector. On big endian, it is in the same half but right
15388	// justified rather than left justified in that half.
15389	static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15390	int LHSMaxIdx, int RHSMinIdx,
15391	int RHSMaxIdx, int HalfVec,
15392	unsigned ValidLaneWidth,
15393	const PPCSubtarget &Subtarget) {
15394	for (int i = `0`, e = ShuffV.size(); i < e; i++) {
15395	int Idx = ShuffV [i];
15396	if ((Idx >= `0` && Idx < LHSMaxIdx) \|\| (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15397	ShuffV [i] +=
15398	Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15399	}
15400	}
15401
15402	// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15403	// the original is:
15404	// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15405	// In such a case, just change the shuffle mask to extract the element
15406	// from the permuted index.
15407	static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15408	const PPCSubtarget &Subtarget) {
15409	SDLoc dl(OrigSToV);
15410	EVT VT = OrigSToV.getValueType();
15411	assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15412	"Expecting a SCALAR_TO_VECTOR here");
15413	SDValue Input = OrigSToV.getOperand(i: `0`);
15414
15415	if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15416	ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Val: Input.getOperand(i: `1`));
15417	SDValue OrigVector = Input.getOperand(i: `0`);
15418
15419	// Can't handle non-const element indices or different vector types
15420	// for the input to the extract and the output of the scalar_to_vector.
15421	if (Idx && VT == OrigVector.getValueType()) {
15422	unsigned NumElts = VT.getVectorNumElements();
15423	assert(
15424	NumElts > `1` &&
15425	"Cannot produce a permuted scalar_to_vector for one element vector");
15426	SmallVector<int, `16`> NewMask(NumElts, -`1`);
15427	unsigned ResultInElt = NumElts / `2`;
15428	ResultInElt -= Subtarget.isLittleEndian() ? `0` : `1`;
15429	NewMask [ResultInElt] = Idx->getZExtValue();
15430	return DAG.getVectorShuffle(VT, dl, N1: OrigVector, N2: OrigVector, Mask: NewMask);
15431	}
15432	}
15433	return DAG.getNode(Opcode: PPCISD::SCALAR_TO_VECTOR_PERMUTED, DL: dl, VT,
15434	Operand: OrigSToV.getOperand(i: `0`));
15435	}
15436
15437	// On little endian subtargets, combine shuffles such as:
15438	// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15439	// into:
15440	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15441	// because the latter can be matched to a single instruction merge.
15442	// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15443	// to put the value into element zero. Adjust the shuffle mask so that the
15444	// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15445	// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15446	// nodes with elements smaller than doubleword because all the ways
15447	// of getting scalar data into a vector register put the value in the
15448	// rightmost element of the left half of the vector.
15449	SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15450	SelectionDAG &DAG) const {
15451	SDValue LHS = SVN->getOperand(Num: `0`);
15452	SDValue RHS = SVN->getOperand(Num: `1`);
15453	auto Mask = SVN->getMask();
15454	int NumElts = LHS.getValueType().getVectorNumElements();
15455	SDValue Res(SVN, `0`);
15456	SDLoc dl(SVN);
15457	bool IsLittleEndian = Subtarget.isLittleEndian();
15458
15459	// On big endian targets this is only useful for subtargets with direct moves.
15460	// On little endian targets it would be useful for all subtargets with VSX.
15461	// However adding special handling for LE subtargets without direct moves
15462	// would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15463	// which includes direct moves.
15464	if (!Subtarget.hasDirectMove())
15465	return Res;
15466
15467	// If this is not a shuffle of a shuffle and the first element comes from
15468	// the second vector, canonicalize to the commuted form. This will make it
15469	// more likely to match one of the single instruction patterns.
15470	if (Mask [`0`] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15471	RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15472	std::swap(a&: LHS, b&: RHS);
15473	Res = DAG.getCommutedVectorShuffle(SV: *SVN);
15474	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
15475	}
15476
15477	// Adjust the shuffle mask if either input vector comes from a
15478	// SCALAR_TO_VECTOR and keep the respective input vector in permuted
15479	// form (to prevent the need for a swap).
15480	SmallVector<int, `16`> ShuffV(Mask);
15481	SDValue SToVLHS = isScalarToVec(Op: LHS);
15482	SDValue SToVRHS = isScalarToVec(Op: RHS);
15483	if (SToVLHS \|\| SToVRHS) {
15484	// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15485	// same type and have differing element sizes, then do not perform
15486	// the following transformation. The current transformation for
15487	// SCALAR_TO_VECTOR assumes that both input vectors have the same
15488	// element size. This will be updated in the future to account for
15489	// differing sizes of the LHS and RHS.
15490	if (SToVLHS && SToVRHS &&
15491	(SToVLHS.getValueType().getScalarSizeInBits() !=
15492	SToVRHS.getValueType().getScalarSizeInBits()))
15493	return Res;
15494
15495	int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15496	: SToVRHS.getValueType().getVectorNumElements();
15497	int NumEltsOut = ShuffV.size();
15498	// The width of the "valid lane" (i.e. the lane that contains the value that
15499	// is vectorized) needs to be expressed in terms of the number of elements
15500	// of the shuffle. It is thereby the ratio of the values before and after
15501	// any bitcast.
15502	unsigned ValidLaneWidth =
15503	SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15504	LHS.getValueType().getScalarSizeInBits()
15505	: SToVRHS.getValueType().getScalarSizeInBits() /
15506	RHS.getValueType().getScalarSizeInBits();
15507
15508	// Initially assume that neither input is permuted. These will be adjusted
15509	// accordingly if either input is.
15510	int LHSMaxIdx = -`1`;
15511	int RHSMinIdx = -`1`;
15512	int RHSMaxIdx = -`1`;
15513	int HalfVec = LHS.getValueType().getVectorNumElements() / `2`;
15514
15515	// Get the permuted scalar to vector nodes for the source(s) that come from
15516	// ISD::SCALAR_TO_VECTOR.
15517	// On big endian systems, this only makes sense for element sizes smaller
15518	// than 64 bits since for 64-bit elements, all instructions already put
15519	// the value into element zero. Since scalar size of LHS and RHS may differ
15520	// after isScalarToVec, this should be checked using their own sizes.
15521	if (SToVLHS) {
15522	if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= `64`)
15523	return Res;
15524	// Set up the values for the shuffle vector fixup.
15525	LHSMaxIdx = NumEltsOut / NumEltsIn;
15526	SToVLHS = getSToVPermuted(OrigSToV: SToVLHS, DAG, Subtarget);
15527	if (SToVLHS.getValueType() != LHS.getValueType())
15528	SToVLHS = DAG.getBitcast(VT: LHS.getValueType(), V: SToVLHS);
15529	LHS = SToVLHS;
15530	}
15531	if (SToVRHS) {
15532	if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= `64`)
15533	return Res;
15534	RHSMinIdx = NumEltsOut;
15535	RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15536	SToVRHS = getSToVPermuted(OrigSToV: SToVRHS, DAG, Subtarget);
15537	if (SToVRHS.getValueType() != RHS.getValueType())
15538	SToVRHS = DAG.getBitcast(VT: RHS.getValueType(), V: SToVRHS);
15539	RHS = SToVRHS;
15540	}
15541
15542	// Fix up the shuffle mask to reflect where the desired element actually is.
15543	// The minimum and maximum indices that correspond to element zero for both
15544	// the LHS and RHS are computed and will control which shuffle mask entries
15545	// are to be changed. For example, if the RHS is permuted, any shuffle mask
15546	// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15547	fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15548	HalfVec, ValidLaneWidth, Subtarget);
15549	Res = DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
15550
15551	// We may have simplified away the shuffle. We won't be able to do anything
15552	// further with it here.
15553	if (!isa<ShuffleVectorSDNode>(Val: Res))
15554	return Res;
15555	Mask = cast<ShuffleVectorSDNode>(Val&: Res)->getMask();
15556	}
15557
15558	SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15559	// The common case after we commuted the shuffle is that the RHS is a splat
15560	// and we have elements coming in from the splat at indices that are not
15561	// conducive to using a merge.
15562	// Example:
15563	// vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15564	if (!isSplatBV(Op: TheSplat))
15565	return Res;
15566
15567	// We are looking for a mask such that all even elements are from
15568	// one vector and all odd elements from the other.
15569	if (!isAlternatingShuffMask(Mask, NumElts))
15570	return Res;
15571
15572	// Adjust the mask so we are pulling in the same index from the splat
15573	// as the index from the interesting vector in consecutive elements.
15574	if (IsLittleEndian) {
15575	// Example (even elements from first vector):
15576	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15577	if (Mask [`0`] < NumElts)
15578	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
15579	if (ShuffV [i] < `0`)
15580	continue;
15581	// If element from non-splat is undef, pick first element from splat.
15582	ShuffV [i] = (ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] : `0`) + NumElts;
15583	}
15584	// Example (odd elements from first vector):
15585	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15586	else
15587	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
15588	if (ShuffV [i] < `0`)
15589	continue;
15590	// If element from non-splat is undef, pick first element from splat.
15591	ShuffV [i] = (ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] : `0`) + NumElts;
15592	}
15593	} else {
15594	// Example (even elements from first vector):
15595	// vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15596	if (Mask [`0`] < NumElts)
15597	for (int i = `0`, e = Mask.size(); i < e; i += `2`) {
15598	if (ShuffV [i] < `0`)
15599	continue;
15600	// If element from non-splat is undef, pick first element from splat.
15601	ShuffV [i] = ShuffV [i + `1`] >= `0` ? ShuffV [i + `1`] - NumElts : `0`;
15602	}
15603	// Example (odd elements from first vector):
15604	// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15605	else
15606	for (int i = `1`, e = Mask.size(); i < e; i += `2`) {
15607	if (ShuffV [i] < `0`)
15608	continue;
15609	// If element from non-splat is undef, pick first element from splat.
15610	ShuffV [i] = ShuffV [i - `1`] >= `0` ? ShuffV [i - `1`] - NumElts : `0`;
15611	}
15612	}
15613
15614	// If the RHS has undefs, we need to remove them since we may have created
15615	// a shuffle that adds those instead of the splat value.
15616	SDValue SplatVal =
15617	cast<BuildVectorSDNode>(Val: TheSplat.getNode())->getSplatValue();
15618	TheSplat = DAG.getSplatBuildVector(VT: TheSplat.getValueType(), DL: dl, Op: SplatVal);
15619
15620	if (IsLittleEndian)
15621	RHS = TheSplat;
15622	else
15623	LHS = TheSplat;
15624	return DAG.getVectorShuffle(VT: SVN->getValueType(ResNo: `0`), dl, N1: LHS, N2: RHS, Mask: ShuffV);
15625	}
15626
15627	SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15628	LSBaseSDNode *LSBase,
15629	DAGCombinerInfo &DCI) const {
15630	assert((ISD::isNormalLoad(LSBase) \|\| ISD::isNormalStore(LSBase)) &&
15631	"Not a reverse memop pattern!");
15632
15633	auto IsElementReverse = [](const ShuffleVectorSDNode SVN) -> bool* {
15634	auto Mask = SVN->getMask();
15635	int i = `0`;
15636	auto I = Mask.rbegin();
15637	auto E = Mask.rend();
15638
15639	for (; I != E; ++I) {
15640	if (*I != i)
15641	return false;
15642	i++;
15643	}
15644	return true;
15645	};
15646
15647	SelectionDAG &DAG = DCI.DAG;
15648	EVT VT = SVN->getValueType(ResNo: `0`);
15649
15650	if (!isTypeLegal(VT) \|\| !Subtarget.isLittleEndian() \|\| !Subtarget.hasVSX())
15651	return SDValue ();
15652
15653	// Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15654	// See comment in PPCVSXSwapRemoval.cpp.
15655	// It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15656	if (!Subtarget.hasP9Vector())
15657	return SDValue ();
15658
15659	if(!IsElementReverse (SVN))
15660	return SDValue ();
15661
15662	if (LSBase->getOpcode() == ISD::LOAD) {
15663	// If the load return value 0 has more than one user except the
15664	// shufflevector instruction, it is not profitable to replace the
15665	// shufflevector with a reverse load.
15666	for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15667	UI != UE; ++UI)
15668	if (UI.getUse().getResNo() == `0` && UI ->getOpcode() != ISD::VECTOR_SHUFFLE)
15669	return SDValue ();
15670
15671	SDLoc dl(LSBase);
15672	SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15673	return DAG.getMemIntrinsicNode(
15674	PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15675	LSBase->getMemoryVT(), LSBase->getMemOperand());
15676	}
15677
15678	if (LSBase->getOpcode() == ISD::STORE) {
15679	// If there are other uses of the shuffle, the swap cannot be avoided.
15680	// Forcing the use of an X-Form (since swapped stores only have
15681	// X-Forms) without removing the swap is unprofitable.
15682	if (!SVN->hasOneUse())
15683	return SDValue ();
15684
15685	SDLoc dl(LSBase);
15686	SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(Num: `0`),
15687	LSBase->getBasePtr()};
15688	return DAG.getMemIntrinsicNode(
15689	PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15690	LSBase->getMemoryVT(), LSBase->getMemOperand());
15691	}
15692
15693	llvm_unreachable("Expected a load or store node here");
15694	}
15695
15696	static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15697	unsigned IntrinsicID = Intrin.getConstantOperandVal(i: `1`);
15698	if (IntrinsicID == Intrinsic::ppc_stdcx)
15699	StoreWidth = `8`;
15700	else if (IntrinsicID == Intrinsic::ppc_stwcx)
15701	StoreWidth = `4`;
15702	else if (IntrinsicID == Intrinsic::ppc_sthcx)
15703	StoreWidth = `2`;
15704	else if (IntrinsicID == Intrinsic::ppc_stbcx)
15705	StoreWidth = `1`;
15706	else
15707	return false;
15708	return true;
15709	}
15710
15711	SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
15712	DAGCombinerInfo &DCI) const {
15713	SelectionDAG &DAG = DCI.DAG;
15714	SDLoc dl(N);
15715	switch (N->getOpcode()) {
15716	default: break;
15717	case ISD::ADD:
15718	return combineADD(N, DCI);
15719	case ISD::AND: {
15720	// We don't want (and (zext (shift...)), C) if C fits in the width of the
15721	// original input as that will prevent us from selecting optimal rotates.
15722	// This only matters if the input to the extend is i32 widened to i64.
15723	SDValue Op1 = N->getOperand(Num: `0`);
15724	SDValue Op2 = N->getOperand(Num: `1`);
15725	if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15726	Op1.getOpcode() != ISD::ANY_EXTEND) \|\|
15727	!isa<ConstantSDNode>(Op2) \|\| N->getValueType(`0`) != MVT::i64 \|\|
15728	Op1.getOperand(`0`).getValueType() != MVT::i32)
15729	break;
15730	SDValue NarrowOp = Op1.getOperand(i: `0`);
15731	if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15732	NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15733	break;
15734
15735	uint64_t Imm = Op2 ->getAsZExtVal();
15736	// Make sure that the constant is narrow enough to fit in the narrow type.
15737	if (!isUInt<`32`>(x: Imm))
15738	break;
15739	SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15740	SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15741	return DAG.getZExtOrTrunc(Op: NarrowAnd, DL: dl, VT: N->getValueType(ResNo: `0`));
15742	}
15743	case ISD::SHL:
15744	return combineSHL(N, DCI);
15745	case ISD::SRA:
15746	return combineSRA(N, DCI);
15747	case ISD::SRL:
15748	return combineSRL(N, DCI);
15749	case ISD::MUL:
15750	return combineMUL(N, DCI);
15751	case ISD::FMA:
15752	case PPCISD::FNMSUB:
15753	return combineFMALike(N, DCI);
15754	case PPCISD::SHL:
15755	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 << V -> 0.
15756	return N->getOperand(Num: `0`);
15757	break;
15758	case PPCISD::SRL:
15759	if (isNullConstant(V: N->getOperand(Num: `0`))) // 0 >>u V -> 0.
15760	return N->getOperand(Num: `0`);
15761	break;
15762	case PPCISD::SRA:
15763	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `0`))) {
15764	if (C->isZero() \|\| // 0 >>s V -> 0.
15765	C->isAllOnes()) // -1 >>s V -> -1.
15766	return N->getOperand(Num: `0`);
15767	}
15768	break;
15769	case ISD::SIGN_EXTEND:
15770	case ISD::ZERO_EXTEND:
15771	case ISD::ANY_EXTEND:
15772	return DAGCombineExtBoolTrunc(N, DCI);
15773	case ISD::TRUNCATE:
15774	return combineTRUNCATE(N, DCI);
15775	case ISD::SETCC:
15776	if (SDValue CSCC = combineSetCC(N, DCI))
15777	return CSCC;
15778	[[fallthrough]];
15779	case ISD::SELECT_CC:
15780	return DAGCombineTruncBoolExt(N, DCI);
15781	case ISD::SINT_TO_FP:
15782	case ISD::UINT_TO_FP:
15783	return combineFPToIntToFP(N, DCI);
15784	case ISD::VECTOR_SHUFFLE:
15785	if (ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode())) {
15786	LSBaseSDNode* LSBase = cast<LSBaseSDNode>(Val: N->getOperand(Num: `0`));
15787	return combineVReverseMemOP(SVN: cast<ShuffleVectorSDNode>(Val: N), LSBase, DCI);
15788	}
15789	return combineVectorShuffle(SVN: cast<ShuffleVectorSDNode>(Val: N), DAG&: DCI.DAG);
15790	case ISD::STORE: {
15791
15792	EVT Op1VT = N->getOperand(Num: `1`).getValueType();
15793	unsigned Opcode = N->getOperand(Num: `1`).getOpcode();
15794
15795	if (Opcode == ISD::FP_TO_SINT \|\| Opcode == ISD::FP_TO_UINT \|\|
15796	Opcode == ISD::STRICT_FP_TO_SINT \|\| Opcode == ISD::STRICT_FP_TO_UINT) {
15797	SDValue Val = combineStoreFPToInt(N, DCI);
15798	if (Val)
15799	return Val;
15800	}
15801
15802	if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15803	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N->getOperand(Num: `1`));
15804	SDValue Val= combineVReverseMemOP(SVN, LSBase: cast<LSBaseSDNode>(Val: N), DCI);
15805	if (Val)
15806	return Val;
15807	}
15808
15809	// Turn STORE (BSWAP) -> sthbrx/stwbrx.
15810	if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15811	N->getOperand(`1`).getNode()->hasOneUse() &&
15812	(Op1VT == MVT::i32 \|\| Op1VT == MVT::i16 \|\|
15813	(Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15814
15815	// STBRX can only handle simple types and it makes no sense to store less
15816	// two bytes in byte-reversed order.
15817	EVT mVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
15818	if (mVT.isExtended() \|\| mVT.getSizeInBits() < `16`)
15819	break;
15820
15821	SDValue BSwapOp = N->getOperand(Num: `1`).getOperand(i: `0`);
15822	// Do an any-extend to 32-bits if this is a half-word input.
15823	if (BSwapOp.getValueType() == MVT::i16)
15824	BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15825
15826	// If the type of BSWAP operand is wider than stored memory width
15827	// it need to be shifted to the right side before STBRX.
15828	if (Op1VT.bitsGT(VT: mVT)) {
15829	int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15830	BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15831	DAG.getConstant(Shift, dl, MVT::i32));
15832	// Need to truncate if this is a bswap of i64 stored as i32/i16.
15833	if (Op1VT == MVT::i64)
15834	BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15835	}
15836
15837	SDValue Ops[] = {
15838	N->getOperand(Num: `0`), BSwapOp, N->getOperand(Num: `2`), DAG.getValueType(mVT)
15839	};
15840	return
15841	DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15842	Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15843	cast<StoreSDNode>(N)->getMemOperand());
15844	}
15845
15846	// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15847	// So it can increase the chance of CSE constant construction.
15848	if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15849	isa<ConstantSDNode>(N->getOperand(`1`)) && Op1VT == MVT::i32) {
15850	// Need to sign-extended to 64-bits to handle negative values.
15851	EVT MemVT = cast<StoreSDNode>(Val: N)->getMemoryVT();
15852	uint64_t Val64 = SignExtend64(X: N->getConstantOperandVal(Num: `1`),
15853	B: MemVT.getSizeInBits());
15854	SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15855
15856	// DAG.getTruncStore() can't be used here because it doesn't accept
15857	// the general (base + offset) addressing mode.
15858	// So we use UpdateNodeOperands and setTruncatingStore instead.
15859	DAG.UpdateNodeOperands(N, Op1: N->getOperand(Num: `0`), Op2: Const64, Op3: N->getOperand(Num: `2`),
15860	Op4: N->getOperand(Num: `3`));
15861	cast<StoreSDNode>(Val: N)->setTruncatingStore(true);
15862	return SDValue (N, `0`);
15863	}
15864
15865	// For little endian, VSX stores require generating xxswapd/lxvd2x.
15866	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15867	if (Op1VT.isSimple()) {
15868	MVT StoreVT = Op1VT.getSimpleVT();
15869	if (Subtarget.needsSwapsForVSXMemOps() &&
15870	(StoreVT == MVT::v2f64 \|\| StoreVT == MVT::v2i64 \|\|
15871	StoreVT == MVT::v4f32 \|\| StoreVT == MVT::v4i32))
15872	return expandVSXStoreForLE(N, DCI);
15873	}
15874	break;
15875	}
15876	case ISD::LOAD: {
15877	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
15878	EVT VT = LD->getValueType(ResNo: `0`);
15879
15880	// For little endian, VSX loads require generating lxvd2x/xxswapd.
15881	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15882	if (VT.isSimple()) {
15883	MVT LoadVT = VT.getSimpleVT();
15884	if (Subtarget.needsSwapsForVSXMemOps() &&
15885	(LoadVT == MVT::v2f64 \|\| LoadVT == MVT::v2i64 \|\|
15886	LoadVT == MVT::v4f32 \|\| LoadVT == MVT::v4i32))
15887	return expandVSXLoadForLE(N, DCI);
15888	}
15889
15890	// We sometimes end up with a 64-bit integer load, from which we extract
15891	// two single-precision floating-point numbers. This happens with
15892	// std::complex<float>, and other similar structures, because of the way we
15893	// canonicalize structure copies. However, if we lack direct moves,
15894	// then the final bitcasts from the extracted integer values to the
15895	// floating-point numbers turn into store/load pairs. Even with direct moves,
15896	// just loading the two floating-point numbers is likely better.
15897	auto ReplaceTwoFloatLoad = [&]() {
15898	if (VT != MVT::i64)
15899	return false;
15900
15901	if (LD->getExtensionType() != ISD::NON_EXTLOAD \|\|
15902	LD->isVolatile())
15903	return false;
15904
15905	// We're looking for a sequence like this:
15906	// t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15907	// t16: i64 = srl t13, Constant:i32<32>
15908	// t17: i32 = truncate t16
15909	// t18: f32 = bitcast t17
15910	// t19: i32 = truncate t13
15911	// t20: f32 = bitcast t19
15912
15913	if (!LD->hasNUsesOfValue(NUses: `2`, Value: `0`))
15914	return false;
15915
15916	auto UI = LD->use_begin();
15917	while (UI.getUse().getResNo() != `0`) ++UI;
15918	SDNode Trunc = UI ++;
15919	while (UI.getUse().getResNo() != `0`) ++UI;
15920	SDNode RightShift = UI;
15921	if (Trunc->getOpcode() != ISD::TRUNCATE)
15922	std::swap(a&: Trunc, b&: RightShift);
15923
15924	if (Trunc->getOpcode() != ISD::TRUNCATE \|\|
15925	Trunc->getValueType(`0`) != MVT::i32 \|\|
15926	!Trunc->hasOneUse())
15927	return false;
15928	if (RightShift->getOpcode() != ISD::SRL \|\|
15929	!isa<ConstantSDNode>(Val: RightShift->getOperand(Num: `1`)) \|\|
15930	RightShift->getConstantOperandVal(Num: `1`) != `32` \|\|
15931	!RightShift->hasOneUse())
15932	return false;
15933
15934	SDNode Trunc2 = RightShift->use_begin();
15935	if (Trunc2->getOpcode() != ISD::TRUNCATE \|\|
15936	Trunc2->getValueType(`0`) != MVT::i32 \|\|
15937	!Trunc2->hasOneUse())
15938	return false;
15939
15940	SDNode Bitcast = Trunc->use_begin();
15941	SDNode Bitcast2 = Trunc2->use_begin();
15942
15943	if (Bitcast->getOpcode() != ISD::BITCAST \|\|
15944	Bitcast->getValueType(`0`) != MVT::f32)
15945	return false;
15946	if (Bitcast2->getOpcode() != ISD::BITCAST \|\|
15947	Bitcast2->getValueType(`0`) != MVT::f32)
15948	return false;
15949
15950	if (Subtarget.isLittleEndian())
15951	std::swap(a&: Bitcast, b&: Bitcast2);
15952
15953	// Bitcast has the second float (in memory-layout order) and Bitcast2
15954	// has the first one.
15955
15956	SDValue BasePtr = LD->getBasePtr();
15957	if (LD->isIndexed()) {
15958	assert(LD->getAddressingMode() == ISD::PRE_INC &&
15959	"Non-pre-inc AM on PPC?");
15960	BasePtr =
15961	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
15962	N2: LD->getOffset());
15963	}
15964
15965	auto MMOFlags =
15966	LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15967	SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15968	LD->getPointerInfo(), LD->getAlign(),
15969	MMOFlags, LD->getAAInfo());
15970	SDValue AddPtr =
15971	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(),
15972	N1: BasePtr, N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
15973	SDValue FloatLoad2 = DAG.getLoad(
15974	MVT::f32, dl, SDValue(FloatLoad.getNode(), `1`), AddPtr,
15975	LD->getPointerInfo().getWithOffset(`4`),
15976	commonAlignment(LD->getAlign(), `4`), MMOFlags, LD->getAAInfo());
15977
15978	if (LD->isIndexed()) {
15979	// Note that DAGCombine should re-form any pre-increment load(s) from
15980	// what is produced here if that makes sense.
15981	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: BasePtr);
15982	}
15983
15984	DCI.CombineTo(N: Bitcast2, Res: FloatLoad);
15985	DCI.CombineTo(N: Bitcast, Res: FloatLoad2);
15986
15987	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, LD->isIndexed() ? `2` : `1`),
15988	To: SDValue (FloatLoad2.getNode(), `1`));
15989	return true;
15990	};
15991
15992	if (ReplaceTwoFloatLoad ())
15993	return SDValue (N, `0`);
15994
15995	EVT MemVT = LD->getMemoryVT();
15996	Type Ty = MemVT.getTypeForEVT(Context&: DAG.getContext());
15997	Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15998	if (LD->isUnindexed() && VT.isVector() &&
15999	((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16000	// P8 and later hardware should just use LOAD.
16001	!Subtarget.hasP8Vector() &&
16002	(VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
16003	VT == MVT::v4f32))) &&
16004	LD->getAlign() < ABIAlignment) {
16005	// This is a type-legal unaligned Altivec load.
16006	SDValue Chain = LD->getChain();
16007	SDValue Ptr = LD->getBasePtr();
16008	bool isLittleEndian = Subtarget.isLittleEndian();
16009
16010	// This implements the loading of unaligned vectors as described in
16011	// the venerable Apple Velocity Engine overview. Specifically:
16012	// https://developer.apple.com/hardwaredrivers/ve/alignment.html
16013	// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16014	//
16015	// The general idea is to expand a sequence of one or more unaligned
16016	// loads into an alignment-based permutation-control instruction (lvsl
16017	// or lvsr), a series of regular vector loads (which always truncate
16018	// their input address to an aligned address), and a series of
16019	// permutations. The results of these permutations are the requested
16020	// loaded values. The trick is that the last "extra" load is not taken
16021	// from the address you might suspect (sizeof(vector) bytes after the
16022	// last requested load), but rather sizeof(vector) - 1 bytes after the
16023	// last requested vector. The point of this is to avoid a page fault if
16024	// the base address happened to be aligned. This works because if the
16025	// base address is aligned, then adding less than a full vector length
16026	// will cause the last vector in the sequence to be (re)loaded.
16027	// Otherwise, the next vector will be fetched as you might suspect was
16028	// necessary.
16029
16030	// We might be able to reuse the permutation generation from
16031	// a different base address offset from this one by an aligned amount.
16032	// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16033	// optimization later.
16034	Intrinsic::ID Intr, IntrLD, IntrPerm;
16035	MVT PermCntlTy, PermTy, LDTy;
16036	Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16037	: Intrinsic::ppc_altivec_lvsl;
16038	IntrLD = Intrinsic::ppc_altivec_lvx;
16039	IntrPerm = Intrinsic::ppc_altivec_vperm;
16040	PermCntlTy = MVT::v16i8;
16041	PermTy = MVT::v4i32;
16042	LDTy = MVT::v4i32;
16043
16044	SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16045
16046	// Create the new MMO for the new base load. It is like the original MMO,
16047	// but represents an area in memory almost twice the vector size centered
16048	// on the original address. If the address is unaligned, we might start
16049	// reading up to (sizeof(vector)-1) bytes below the address of the
16050	// original unaligned load.
16051	MachineFunction &MF = DAG.getMachineFunction();
16052	MachineMemOperand *BaseMMO =
16053	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16054	Offset: -(int64_t)MemVT.getStoreSize()+`1`,
16055	Size: `2`*MemVT.getStoreSize()-`1`);
16056
16057	// Create the new base load.
16058	SDValue LDXIntID =
16059	DAG.getTargetConstant(Val: IntrLD, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16060	SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16061	SDValue BaseLoad =
16062	DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16063	DAG.getVTList(PermTy, MVT::Other),
16064	BaseLoadOps, LDTy, BaseMMO);
16065
16066	// Note that the value of IncOffset (which is provided to the next
16067	// load's pointer info offset value, and thus used to calculate the
16068	// alignment), and the value of IncValue (which is actually used to
16069	// increment the pointer value) are different! This is because we
16070	// require the next load to appear to be aligned, even though it
16071	// is actually offset from the base pointer by a lesser amount.
16072	int IncOffset = VT.getSizeInBits() / `8`;
16073	int IncValue = IncOffset;
16074
16075	// Walk (both up and down) the chain looking for another load at the real
16076	// (aligned) offset (the alignment of the other load does not matter in
16077	// this case). If found, then do not use the offset reduction trick, as
16078	// that will prevent the loads from being later combined (as they would
16079	// otherwise be duplicates).
16080	if (!findConsecutiveLoad(LD, DAG))
16081	--IncValue;
16082
16083	SDValue Increment =
16084	DAG.getConstant(Val: IncValue, DL: dl, VT: getPointerTy(DL: MF.getDataLayout()));
16085	Ptr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: Ptr.getValueType(), N1: Ptr, N2: Increment);
16086
16087	MachineMemOperand *ExtraMMO =
16088	MF.getMachineMemOperand(MMO: LD->getMemOperand(),
16089	Offset: `1`, Size: `2`*MemVT.getStoreSize()-`1`);
16090	SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16091	SDValue ExtraLoad =
16092	DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16093	DAG.getVTList(PermTy, MVT::Other),
16094	ExtraLoadOps, LDTy, ExtraMMO);
16095
16096	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16097	BaseLoad.getValue(`1`), ExtraLoad.getValue(`1`));
16098
16099	// Because vperm has a big-endian bias, we must reverse the order
16100	// of the input vectors and complement the permute control vector
16101	// when generating little endian code. We have already handled the
16102	// latter by using lvsr instead of lvsl, so just reverse BaseLoad
16103	// and ExtraLoad here.
16104	SDValue Perm;
16105	if (isLittleEndian)
16106	Perm = BuildIntrinsicOp(IntrPerm,
16107	ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16108	else
16109	Perm = BuildIntrinsicOp(IntrPerm,
16110	BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16111
16112	if (VT != PermTy)
16113	Perm = Subtarget.hasAltivec()
16114	? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16115	: DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16116	DAG.getTargetConstant(`1`, dl, MVT::i64));
16117	// second argument is 1 because this rounding
16118	// is always exact.
16119
16120	// The output of the permutation is our loaded result, the TokenFactor is
16121	// our new chain.
16122	DCI.CombineTo(N, Res0: Perm, Res1: TF);
16123	return SDValue (N, `0`);
16124	}
16125	}
16126	break;
16127	case ISD::INTRINSIC_WO_CHAIN: {
16128	bool isLittleEndian = Subtarget.isLittleEndian();
16129	unsigned IID = N->getConstantOperandVal(Num: `0`);
16130	Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16131	: Intrinsic::ppc_altivec_lvsl);
16132	if (IID == Intr && N->getOperand(Num: `1`)->getOpcode() == ISD::ADD) {
16133	SDValue Add = N->getOperand(Num: `1`);
16134
16135	int Bits = `4` / 16 byte alignment /;
16136
16137	if (DAG.MaskedValueIsZero(Op: Add ->getOperand(Num: `1`),
16138	Mask: APInt::getAllOnes(numBits: Bits / alignment /)
16139	.zext(width: Add.getScalarValueSizeInBits()))) {
16140	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
16141	for (SDNode *U : BasePtr->uses()) {
16142	if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16143	U->getConstantOperandVal(Num: `0`) == IID) {
16144	// We've found another LVSL/LVSR, and this address is an aligned
16145	// multiple of that one. The results will be the same, so use the
16146	// one we've just found instead.
16147
16148	return SDValue (U, `0`);
16149	}
16150	}
16151	}
16152
16153	if (isa<ConstantSDNode>(Val: Add ->getOperand(Num: `1`))) {
16154	SDNode *BasePtr = Add ->getOperand(Num: `0`).getNode();
16155	for (SDNode *U : BasePtr->uses()) {
16156	if (U->getOpcode() == ISD::ADD &&
16157	isa<ConstantSDNode>(Val: U->getOperand(Num: `1`)) &&
16158	(Add ->getConstantOperandVal(Num: `1`) - U->getConstantOperandVal(Num: `1`)) %
16159	(`1ULL` << Bits) ==
16160	`0`) {
16161	SDNode *OtherAdd = U;
16162	for (SDNode *V : OtherAdd->uses()) {
16163	if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16164	V->getConstantOperandVal(Num: `0`) == IID) {
16165	return SDValue (V, `0`);
16166	}
16167	}
16168	}
16169	}
16170	}
16171	}
16172
16173	// Combine vmaxsw/h/b(a, a's negation) to abs(a)
16174	// Expose the vabsduw/h/b opportunity for down stream
16175	if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16176	(IID == Intrinsic::ppc_altivec_vmaxsw \|\|
16177	IID == Intrinsic::ppc_altivec_vmaxsh \|\|
16178	IID == Intrinsic::ppc_altivec_vmaxsb)) {
16179	SDValue V1 = N->getOperand(Num: `1`);
16180	SDValue V2 = N->getOperand(Num: `2`);
16181	if ((V1.getSimpleValueType() == MVT::v4i32 \|\|
16182	V1.getSimpleValueType() == MVT::v8i16 \|\|
16183	V1.getSimpleValueType() == MVT::v16i8) &&
16184	V1.getSimpleValueType() == V2.getSimpleValueType()) {
16185	// (0-a, a)
16186	if (V1.getOpcode() == ISD::SUB &&
16187	ISD::isBuildVectorAllZeros(N: V1.getOperand(i: `0`).getNode()) &&
16188	V1.getOperand(i: `1`) == V2) {
16189	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V2.getValueType(), Operand: V2);
16190	}
16191	// (a, 0-a)
16192	if (V2.getOpcode() == ISD::SUB &&
16193	ISD::isBuildVectorAllZeros(N: V2.getOperand(i: `0`).getNode()) &&
16194	V2.getOperand(i: `1`) == V1) {
16195	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
16196	}
16197	// (x-y, y-x)
16198	if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16199	V1.getOperand(i: `0`) == V2.getOperand(i: `1`) &&
16200	V1.getOperand(i: `1`) == V2.getOperand(i: `0`)) {
16201	return DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: V1.getValueType(), Operand: V1);
16202	}
16203	}
16204	}
16205	}
16206
16207	break;
16208	case ISD::INTRINSIC_W_CHAIN:
16209	switch (N->getConstantOperandVal(Num: `1`)) {
16210	default:
16211	break;
16212	case Intrinsic::ppc_altivec_vsum4sbs:
16213	case Intrinsic::ppc_altivec_vsum4shs:
16214	case Intrinsic::ppc_altivec_vsum4ubs: {
16215	// These sum-across intrinsics only have a chain due to the side effect
16216	// that they may set the SAT bit. If we know the SAT bit will not be set
16217	// for some inputs, we can replace any uses of their chain with the
16218	// input chain.
16219	if (BuildVectorSDNode *BVN =
16220	dyn_cast<BuildVectorSDNode>(Val: N->getOperand(Num: `3`))) {
16221	APInt APSplatBits, APSplatUndef;
16222	unsigned SplatBitSize;
16223	bool HasAnyUndefs;
16224	bool BVNIsConstantSplat = BVN->isConstantSplat(
16225	SplatValue&: APSplatBits, SplatUndef&: APSplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: `0`,
16226	isBigEndian: !Subtarget.isLittleEndian());
16227	// If the constant splat vector is 0, the SAT bit will not be set.
16228	if (BVNIsConstantSplat && APSplatBits == `0`)
16229	DAG.ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: N->getOperand(Num: `0`));
16230	}
16231	return SDValue ();
16232	}
16233	case Intrinsic::ppc_vsx_lxvw4x:
16234	case Intrinsic::ppc_vsx_lxvd2x:
16235	// For little endian, VSX loads require generating lxvd2x/xxswapd.
16236	// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16237	if (Subtarget.needsSwapsForVSXMemOps())
16238	return expandVSXLoadForLE(N, DCI);
16239	break;
16240	}
16241	break;
16242	case ISD::INTRINSIC_VOID:
16243	// For little endian, VSX stores require generating xxswapd/stxvd2x.
16244	// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16245	if (Subtarget.needsSwapsForVSXMemOps()) {
16246	switch (N->getConstantOperandVal(Num: `1`)) {
16247	default:
16248	break;
16249	case Intrinsic::ppc_vsx_stxvw4x:
16250	case Intrinsic::ppc_vsx_stxvd2x:
16251	return expandVSXStoreForLE(N, DCI);
16252	}
16253	}
16254	break;
16255	case ISD::BSWAP: {
16256	// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16257	// For subtargets without LDBRX, we can still do better than the default
16258	// expansion even for 64-bit BSWAP (LOAD).
16259	bool Is64BitBswapOn64BitTgt =
16260	Subtarget.isPPC64() && N->getValueType(`0`) == MVT::i64;
16261	bool IsSingleUseNormalLd = ISD::isNormalLoad(N: N->getOperand(Num: `0`).getNode()) &&
16262	N->getOperand(Num: `0`).hasOneUse();
16263	if (IsSingleUseNormalLd &&
16264	(N->getValueType(`0`) == MVT::i32 \|\| N->getValueType(`0`) == MVT::i16 \|\|
16265	(Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16266	SDValue Load = N->getOperand(Num: `0`);
16267	LoadSDNode *LD = cast<LoadSDNode>(Val&: Load);
16268	// Create the byte-swapping load.
16269	SDValue Ops[] = {
16270	LD->getChain(), // Chain
16271	LD->getBasePtr(), // Ptr
16272	DAG.getValueType(N->getValueType(ResNo: `0`)) // VT
16273	};
16274	SDValue BSLoad =
16275	DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
16276	DAG.getVTList(N->getValueType(`0`) == MVT::i64 ?
16277	MVT::i64 : MVT::i32, MVT::Other),
16278	Ops, LD->getMemoryVT(), LD->getMemOperand());
16279
16280	// If this is an i16 load, insert the truncate.
16281	SDValue ResVal = BSLoad;
16282	if (N->getValueType(`0`) == MVT::i16)
16283	ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16284
16285	// First, combine the bswap away. This makes the value produced by the
16286	// load dead.
16287	DCI.CombineTo(N, Res: ResVal);
16288
16289	// Next, combine the load away, we give it a bogus result value but a real
16290	// chain result. The result value is dead because the bswap is dead.
16291	DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: `1`));
16292
16293	// Return N so it doesn't get rechecked!
16294	return SDValue (N, `0`);
16295	}
16296	// Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16297	// before legalization so that the BUILD_PAIR is handled correctly.
16298	if (!DCI.isBeforeLegalize() \|\| !Is64BitBswapOn64BitTgt \|\|
16299	!IsSingleUseNormalLd)
16300	return SDValue ();
16301	LoadSDNode *LD = cast<LoadSDNode>(Val: N->getOperand(Num: `0`));
16302
16303	// Can't split volatile or atomic loads.
16304	if (!LD->isSimple())
16305	return SDValue ();
16306	SDValue BasePtr = LD->getBasePtr();
16307	SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16308	LD->getPointerInfo(), LD->getAlign());
16309	Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16310	BasePtr = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: BasePtr.getValueType(), N1: BasePtr,
16311	N2: DAG.getIntPtrConstant(Val: `4`, DL: dl));
16312	MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
16313	MMO: LD->getMemOperand(), Offset: `4`, Size: `4`);
16314	SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16315	Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16316	SDValue Res;
16317	if (Subtarget.isLittleEndian())
16318	Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16319	else
16320	Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16321	SDValue TF =
16322	DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16323	Hi.getOperand(`0`).getValue(`1`), Lo.getOperand(`0`).getValue(`1`));
16324	DAG.ReplaceAllUsesOfValueWith(From: SDValue (LD, `1`), To: TF);
16325	return Res;
16326	}
16327	case PPCISD::VCMP:
16328	// If a VCMP_rec node already exists with exactly the same operands as this
16329	// node, use its result instead of this node (VCMP_rec computes both a CR6
16330	// and a normal output).
16331	//
16332	if (!N->getOperand(Num: `0`).hasOneUse() &&
16333	!N->getOperand(Num: `1`).hasOneUse() &&
16334	!N->getOperand(Num: `2`).hasOneUse()) {
16335
16336	// Scan all of the users of the LHS, looking for VCMP_rec's that match.
16337	SDNode VCMPrecNode = nullptr*;
16338
16339	SDNode *LHSN = N->getOperand(Num: `0`).getNode();
16340	for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16341	UI != E; ++UI)
16342	if (UI ->getOpcode() == PPCISD::VCMP_rec &&
16343	UI ->getOperand(Num: `1`) == N->getOperand(Num: `1`) &&
16344	UI ->getOperand(Num: `2`) == N->getOperand(Num: `2`) &&
16345	UI ->getOperand(Num: `0`) == N->getOperand(Num: `0`)) {
16346	VCMPrecNode = *UI;
16347	break;
16348	}
16349
16350	// If there is no VCMP_rec node, or if the flag value has a single use,
16351	// don't transform this.
16352	if (!VCMPrecNode \|\| VCMPrecNode->hasNUsesOfValue(NUses: `0`, Value: `1`))
16353	break;
16354
16355	// Look at the (necessarily single) use of the flag value. If it has a
16356	// chain, this transformation is more complex. Note that multiple things
16357	// could use the value result, which we should ignore.
16358	SDNode FlagUser = nullptr*;
16359	for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16360	FlagUser == nullptr; ++UI) {
16361	assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16362	SDNode User = UI;
16363	for (unsigned i = `0`, e = User->getNumOperands(); i != e; ++i) {
16364	if (User->getOperand(Num: i) == SDValue (VCMPrecNode, `1`)) {
16365	FlagUser = User;
16366	break;
16367	}
16368	}
16369	}
16370
16371	// If the user is a MFOCRF instruction, we know this is safe.
16372	// Otherwise we give up for right now.
16373	if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16374	return SDValue (VCMPrecNode, `0`);
16375	}
16376	break;
16377	case ISD::BR_CC: {
16378	// If this is a branch on an altivec predicate comparison, lower this so
16379	// that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16380	// lowering is done pre-legalize, because the legalizer lowers the predicate
16381	// compare down to code that is difficult to reassemble.
16382	// This code also handles branches that depend on the result of a store
16383	// conditional.
16384	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
16385	SDValue LHS = N->getOperand(Num: `2`), RHS = N->getOperand(Num: `3`);
16386
16387	int CompareOpc;
16388	bool isDot;
16389
16390	if (!isa<ConstantSDNode>(Val: RHS) \|\| (CC != ISD::SETEQ && CC != ISD::SETNE))
16391	break;
16392
16393	// Since we are doing this pre-legalize, the RHS can be a constant of
16394	// arbitrary bitwidth which may cause issues when trying to get the value
16395	// from the underlying APInt.
16396	auto RHSAPInt = RHS ->getAsAPIntVal();
16397	if (!RHSAPInt.isIntN(N: `64`))
16398	break;
16399
16400	unsigned Val = RHSAPInt.getZExtValue();
16401	auto isImpossibleCompare = [&]() {
16402	// If this is a comparison against something other than 0/1, then we know
16403	// that the condition is never/always true.
16404	if (Val != `0` && Val != `1`) {
16405	if (CC == ISD::SETEQ) // Cond never true, remove branch.
16406	return N->getOperand(Num: `0`);
16407	// Always !=, turn it into an unconditional branch.
16408	return DAG.getNode(ISD::BR, dl, MVT::Other,
16409	N->getOperand(`0`), N->getOperand(`4`));
16410	}
16411	return SDValue ();
16412	};
16413	// Combine branches fed by store conditional instructions (st[bhwd]cx).
16414	unsigned StoreWidth = `0`;
16415	if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16416	isStoreConditional(Intrin: LHS, StoreWidth)) {
16417	if (SDValue Impossible = isImpossibleCompare ())
16418	return Impossible;
16419	PPC::Predicate CompOpc;
16420	// eq 0 => ne
16421	// ne 0 => eq
16422	// eq 1 => eq
16423	// ne 1 => ne
16424	if (Val == `0`)
16425	CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16426	else
16427	CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16428
16429	SDValue Ops[] = {LHS.getOperand(`0`), LHS.getOperand(`2`), LHS.getOperand(`3`),
16430	DAG.getConstant(StoreWidth, dl, MVT::i32)};
16431	auto *MemNode = cast<MemSDNode>(Val&: LHS);
16432	SDValue ConstSt = DAG.getMemIntrinsicNode(
16433	PPCISD::STORE_COND, dl,
16434	DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16435	MemNode->getMemoryVT(), MemNode->getMemOperand());
16436
16437	SDValue InChain;
16438	// Unchain the branch from the original store conditional.
16439	if (N->getOperand(Num: `0`) == LHS.getValue(R: `1`))
16440	InChain = LHS.getOperand(i: `0`);
16441	else if (N->getOperand(Num: `0`).getOpcode() == ISD::TokenFactor) {
16442	SmallVector<SDValue, `4`> InChains;
16443	SDValue InTF = N->getOperand(Num: `0`);
16444	for (int i = `0`, e = InTF.getNumOperands(); i < e; i++)
16445	if (InTF.getOperand(i) != LHS.getValue(R: `1`))
16446	InChains.push_back(Elt: InTF.getOperand(i));
16447	InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16448	}
16449
16450	return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16451	DAG.getConstant(CompOpc, dl, MVT::i32),
16452	DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(`4`),
16453	ConstSt.getValue(`2`));
16454	}
16455
16456	if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16457	getVectorCompareInfo(Intrin: LHS, CompareOpc, isDot, Subtarget)) {
16458	assert(isDot && "Can't compare against a vector result!");
16459
16460	if (SDValue Impossible = isImpossibleCompare ())
16461	return Impossible;
16462
16463	bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == `0`);
16464	// Create the PPCISD altivec 'dot' comparison node.
16465	SDValue Ops[] = {
16466	LHS.getOperand(`2`), // LHS of compare
16467	LHS.getOperand(`3`), // RHS of compare
16468	DAG.getConstant(CompareOpc, dl, MVT::i32)
16469	};
16470	EVT VTs[] = { LHS.getOperand(`2`).getValueType(), MVT::Glue };
16471	SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16472
16473	// Unpack the result based on how the target uses it.
16474	PPC::Predicate CompOpc;
16475	switch (LHS.getConstantOperandVal(i: `1`)) {
16476	default: // Can't happen, don't crash on invalid number though.
16477	case `0`: // Branch on the value of the EQ bit of CR6.
16478	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16479	break;
16480	case `1`: // Branch on the inverted value of the EQ bit of CR6.
16481	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16482	break;
16483	case `2`: // Branch on the value of the LT bit of CR6.
16484	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16485	break;
16486	case `3`: // Branch on the inverted value of the LT bit of CR6.
16487	CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16488	break;
16489	}
16490
16491	return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(`0`),
16492	DAG.getConstant(CompOpc, dl, MVT::i32),
16493	DAG.getRegister(PPC::CR6, MVT::i32),
16494	N->getOperand(`4`), CompNode.getValue(`1`));
16495	}
16496	break;
16497	}
16498	case ISD::BUILD_VECTOR:
16499	return DAGCombineBuildVector(N, DCI);
16500	}
16501
16502	return SDValue ();
16503	}
16504
16505	SDValue
16506	PPCTargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
16507	SelectionDAG &DAG,
16508	SmallVectorImpl<SDNode > &Created) const* {
16509	// fold (sdiv X, pow2)
16510	EVT VT = N->getValueType(ResNo: `0`);
16511	if (VT == MVT::i64 && !Subtarget.isPPC64())
16512	return SDValue ();
16513	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
16514	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
16515	return SDValue ();
16516
16517	SDLoc DL(N);
16518	SDValue N0 = N->getOperand(Num: `0`);
16519
16520	bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16521	unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16522	SDValue ShiftAmt = DAG.getConstant(Val: Lg2, DL, VT);
16523
16524	SDValue Op = DAG.getNode(Opcode: PPCISD::SRA_ADDZE, DL, VT, N1: N0, N2: ShiftAmt);
16525	Created.push_back(Elt: Op.getNode());
16526
16527	if (IsNegPow2) {
16528	Op = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
16529	Created.push_back(Elt: Op.getNode());
16530	}
16531
16532	return Op;
16533	}
16534
16535	//===----------------------------------------------------------------------===//
16536	// Inline Assembly Support
16537	//===----------------------------------------------------------------------===//
16538
16539	void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16540	KnownBits &Known,
16541	const APInt &DemandedElts,
16542	const SelectionDAG &DAG,
16543	unsigned Depth) const {
16544	Known.resetAll();
16545	switch (Op.getOpcode()) {
16546	default: break;
16547	case PPCISD::LBRX: {
16548	// lhbrx is known to have the top bits cleared out.
16549	if (cast<VTSDNode>(Op.getOperand(`2`))->getVT() == MVT::i16)
16550	Known.Zero = `0xFFFF0000`;
16551	break;
16552	}
16553	case ISD::INTRINSIC_WO_CHAIN: {
16554	switch (Op.getConstantOperandVal(i: `0`)) {
16555	default: break;
16556	case Intrinsic::ppc_altivec_vcmpbfp_p:
16557	case Intrinsic::ppc_altivec_vcmpeqfp_p:
16558	case Intrinsic::ppc_altivec_vcmpequb_p:
16559	case Intrinsic::ppc_altivec_vcmpequh_p:
16560	case Intrinsic::ppc_altivec_vcmpequw_p:
16561	case Intrinsic::ppc_altivec_vcmpequd_p:
16562	case Intrinsic::ppc_altivec_vcmpequq_p:
16563	case Intrinsic::ppc_altivec_vcmpgefp_p:
16564	case Intrinsic::ppc_altivec_vcmpgtfp_p:
16565	case Intrinsic::ppc_altivec_vcmpgtsb_p:
16566	case Intrinsic::ppc_altivec_vcmpgtsh_p:
16567	case Intrinsic::ppc_altivec_vcmpgtsw_p:
16568	case Intrinsic::ppc_altivec_vcmpgtsd_p:
16569	case Intrinsic::ppc_altivec_vcmpgtsq_p:
16570	case Intrinsic::ppc_altivec_vcmpgtub_p:
16571	case Intrinsic::ppc_altivec_vcmpgtuh_p:
16572	case Intrinsic::ppc_altivec_vcmpgtuw_p:
16573	case Intrinsic::ppc_altivec_vcmpgtud_p:
16574	case Intrinsic::ppc_altivec_vcmpgtuq_p:
16575	Known.Zero = ~`1U`; // All bits but the low one are known to be zero.
16576	break;
16577	}
16578	break;
16579	}
16580	case ISD::INTRINSIC_W_CHAIN: {
16581	switch (Op.getConstantOperandVal(i: `1`)) {
16582	default:
16583	break;
16584	case Intrinsic::ppc_load2r:
16585	// Top bits are cleared for load2r (which is the same as lhbrx).
16586	Known.Zero = `0xFFFF0000`;
16587	break;
16588	}
16589	break;
16590	}
16591	}
16592	}
16593
16594	Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop ML) const* {
16595	switch (Subtarget.getCPUDirective()) {
16596	default: break;
16597	case PPC::DIR_970:
16598	case PPC::DIR_PWR4:
16599	case PPC::DIR_PWR5:
16600	case PPC::DIR_PWR5X:
16601	case PPC::DIR_PWR6:
16602	case PPC::DIR_PWR6X:
16603	case PPC::DIR_PWR7:
16604	case PPC::DIR_PWR8:
16605	case PPC::DIR_PWR9:
16606	case PPC::DIR_PWR10:
16607	case PPC::DIR_PWR_FUTURE: {
16608	if (!ML)
16609	break;
16610
16611	if (!DisableInnermostLoopAlign32) {
16612	// If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16613	// so that we can decrease cache misses and branch-prediction misses.
16614	// Actual alignment of the loop will depend on the hotness check and other
16615	// logic in alignBlocks.
16616	if (ML->getLoopDepth() > `1` && ML->getSubLoops().empty())
16617	return Align (`32`);
16618	}
16619
16620	const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16621
16622	// For small loops (between 5 and 8 instructions), align to a 32-byte
16623	// boundary so that the entire loop fits in one instruction-cache line.
16624	uint64_t LoopSize = `0`;
16625	for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16626	for (const MachineInstr &J : **I) {
16627	LoopSize += TII->getInstSizeInBytes(MI: J);
16628	if (LoopSize > `32`)
16629	break;
16630	}
16631
16632	if (LoopSize > `16` && LoopSize <= `32`)
16633	return Align (`32`);
16634
16635	break;
16636	}
16637	}
16638
16639	return TargetLowering::getPrefLoopAlignment(ML);
16640	}
16641
16642	/// getConstraintType - Given a constraint, return the type of
16643	/// constraint it is for this target.
16644	PPCTargetLowering::ConstraintType
16645	PPCTargetLowering::getConstraintType(StringRef Constraint) const {
16646	if (Constraint.size() == `1`) {
16647	switch (Constraint [`0`]) {
16648	default: break;
16649	case `'b'`:
16650	case `'r'`:
16651	case `'f'`:
16652	case `'d'`:
16653	case `'v'`:
16654	case `'y'`:
16655	return C_RegisterClass;
16656	case `'Z'`:
16657	// FIXME: While Z does indicate a memory constraint, it specifically
16658	// indicates an r+r address (used in conjunction with the 'y' modifier
16659	// in the replacement string). Currently, we're forcing the base
16660	// register to be r0 in the asm printer (which is interpreted as zero)
16661	// and forming the complete address in the second register. This is
16662	// suboptimal.
16663	return C_Memory;
16664	}
16665	} else if (Constraint == "wc") { // individual CR bits.
16666	return C_RegisterClass;
16667	} else if (Constraint == "wa" \|\| Constraint == "wd" \|\|
16668	Constraint == "wf" \|\| Constraint == "ws" \|\|
16669	Constraint == "wi" \|\| Constraint == "ww") {
16670	return C_RegisterClass; // VSX registers.
16671	}
16672	return TargetLowering::getConstraintType(Constraint);
16673	}
16674
16675	/// Examine constraint type and operand type and determine a weight value.
16676	/// This object must already have been set up with the operand type
16677	/// and the current alternative constraint selected.
16678	TargetLowering::ConstraintWeight
16679	PPCTargetLowering::getSingleConstraintMatchWeight(
16680	AsmOperandInfo &info, const char constraint) const* {
16681	ConstraintWeight weight = CW_Invalid;
16682	Value *CallOperandVal = info.CallOperandVal;
16683	// If we don't have a value, we can't do a match,
16684	// but allow it at the lowest weight.
16685	if (!CallOperandVal)
16686	return CW_Default;
16687	Type *type = CallOperandVal->getType();
16688
16689	// Look at the constraint type.
16690	if (StringRef (constraint) == "wc" && type->isIntegerTy(Bitwidth: `1`))
16691	return CW_Register; // an individual CR bit.
16692	else if ((StringRef (constraint) == "wa" \|\|
16693	StringRef (constraint) == "wd" \|\|
16694	StringRef (constraint) == "wf") &&
16695	type->isVectorTy())
16696	return CW_Register;
16697	else if (StringRef (constraint) == "wi" && type->isIntegerTy(Bitwidth: `64`))
16698	return CW_Register; // just hold 64-bit integers data.
16699	else if (StringRef (constraint) == "ws" && type->isDoubleTy())
16700	return CW_Register;
16701	else if (StringRef (constraint) == "ww" && type->isFloatTy())
16702	return CW_Register;
16703
16704	switch (*constraint) {
16705	default:
16706	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
16707	break;
16708	case `'b'`:
16709	if (type->isIntegerTy())
16710	weight = CW_Register;
16711	break;
16712	case `'f'`:
16713	if (type->isFloatTy())
16714	weight = CW_Register;
16715	break;
16716	case `'d'`:
16717	if (type->isDoubleTy())
16718	weight = CW_Register;
16719	break;
16720	case `'v'`:
16721	if (type->isVectorTy())
16722	weight = CW_Register;
16723	break;
16724	case `'y'`:
16725	weight = CW_Register;
16726	break;
16727	case `'Z'`:
16728	weight = CW_Memory;
16729	break;
16730	}
16731	return weight;
16732	}
16733
16734	std::pair<unsigned, const TargetRegisterClass *>
16735	PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16736	StringRef Constraint,
16737	MVT VT) const {
16738	if (Constraint.size() == `1`) {
16739	// GCC RS6000 Constraint Letters
16740	switch (Constraint [`0`]) {
16741	case `'b'`: // R1-R31
16742	if (VT == MVT::i64 && Subtarget.isPPC64())
16743	return std::make_pair(`0U`, &PPC::G8RC_NOX0RegClass);
16744	return std::make_pair(`0U`, &PPC::GPRC_NOR0RegClass);
16745	case `'r'`: // R0-R31
16746	if (VT == MVT::i64 && Subtarget.isPPC64())
16747	return std::make_pair(`0U`, &PPC::G8RCRegClass);
16748	return std::make_pair(`0U`, &PPC::GPRCRegClass);
16749	// 'd' and 'f' constraints are both defined to be "the floating point
16750	// registers", where one is for 32-bit and the other for 64-bit. We don't
16751	// really care overly much here so just give them all the same reg classes.
16752	case `'d'`:
16753	case `'f'`:
16754	if (Subtarget.hasSPE()) {
16755	if (VT == MVT::f32 \|\| VT == MVT::i32)
16756	return std::make_pair(`0U`, &PPC::GPRCRegClass);
16757	if (VT == MVT::f64 \|\| VT == MVT::i64)
16758	return std::make_pair(`0U`, &PPC::SPERCRegClass);
16759	} else {
16760	if (VT == MVT::f32 \|\| VT == MVT::i32)
16761	return std::make_pair(`0U`, &PPC::F4RCRegClass);
16762	if (VT == MVT::f64 \|\| VT == MVT::i64)
16763	return std::make_pair(`0U`, &PPC::F8RCRegClass);
16764	}
16765	break;
16766	case `'v'`:
16767	if (Subtarget.hasAltivec() && VT.isVector())
16768	return std::make_pair(`0U`, &PPC::VRRCRegClass);
16769	else if (Subtarget.hasVSX())
16770	// Scalars in Altivec registers only make sense with VSX.
16771	return std::make_pair(`0U`, &PPC::VFRCRegClass);
16772	break;
16773	case `'y'`: // crrc
16774	return std::make_pair(`0U`, &PPC::CRRCRegClass);
16775	}
16776	} else if (Constraint == "wc" && Subtarget.useCRBits()) {
16777	// An individual CR bit.
16778	return std::make_pair(`0U`, &PPC::CRBITRCRegClass);
16779	} else if ((Constraint == "wa" \|\| Constraint == "wd" \|\|
16780	Constraint == "wf" \|\| Constraint == "wi") &&
16781	Subtarget.hasVSX()) {
16782	// A VSX register for either a scalar (FP) or vector. There is no
16783	// support for single precision scalars on subtargets prior to Power8.
16784	if (VT.isVector())
16785	return std::make_pair(`0U`, &PPC::VSRCRegClass);
16786	if (VT == MVT::f32 && Subtarget.hasP8Vector())
16787	return std::make_pair(`0U`, &PPC::VSSRCRegClass);
16788	return std::make_pair(`0U`, &PPC::VSFRCRegClass);
16789	} else if ((Constraint == "ws" \|\| Constraint == "ww") && Subtarget.hasVSX()) {
16790	if (VT == MVT::f32 && Subtarget.hasP8Vector())
16791	return std::make_pair(`0U`, &PPC::VSSRCRegClass);
16792	else
16793	return std::make_pair(`0U`, &PPC::VSFRCRegClass);
16794	} else if (Constraint == "lr") {
16795	if (VT == MVT::i64)
16796	return std::make_pair(`0U`, &PPC::LR8RCRegClass);
16797	else
16798	return std::make_pair(`0U`, &PPC::LRRCRegClass);
16799	}
16800
16801	// Handle special cases of physical registers that are not properly handled
16802	// by the base class.
16803	if (Constraint [`0`] == `'{'` && Constraint [Constraint.size() - `1`] == `'}'`) {
16804	// If we name a VSX register, we can't defer to the base class because it
16805	// will not recognize the correct register (their names will be VSL{0-31}
16806	// and V{0-31} so they won't match). So we match them here.
16807	if (Constraint.size() > `3` && Constraint [`1`] == `'v'` && Constraint [`2`] == `'s'`) {
16808	int VSNum = atoi(nptr: Constraint.data() + `3`);
16809	assert(VSNum >= `0` && VSNum <= `63` &&
16810	"Attempted to access a vsr out of range");
16811	if (VSNum < `32`)
16812	return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16813	return std::make_pair(PPC::V0 + VSNum - `32`, &PPC::VSRCRegClass);
16814	}
16815
16816	// For float registers, we can't defer to the base class as it will match
16817	// the SPILLTOVSRRC class.
16818	if (Constraint.size() > `3` && Constraint [`1`] == `'f'`) {
16819	int RegNum = atoi(nptr: Constraint.data() + `2`);
16820	if (RegNum > `31` \|\| RegNum < `0`)
16821	report_fatal_error(reason: "Invalid floating point register number");
16822	if (VT == MVT::f32 \|\| VT == MVT::i32)
16823	return Subtarget.hasSPE()
16824	? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16825	: std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16826	if (VT == MVT::f64 \|\| VT == MVT::i64)
16827	return Subtarget.hasSPE()
16828	? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16829	: std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16830	}
16831	}
16832
16833	std::pair<unsigned, const TargetRegisterClass *> R =
16834	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16835
16836	// r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16837	// (which we call X[0-9]+). If a 64-bit value has been requested, and a
16838	// 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16839	// register.
16840	// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16841	// the AsmName field from RegisterInfo.td, then this would not be necessary.*
16842	if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16843	PPC::GPRCRegClass.contains(R.first))
16844	return std::make_pair(TRI->getMatchingSuperReg(R.first,
16845	PPC::sub_32, &PPC::G8RCRegClass),
16846	&PPC::G8RCRegClass);
16847
16848	// GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16849	if (!R.second && StringRef ("{cc}").equals_insensitive(RHS: Constraint)) {
16850	R.first = PPC::CR0;
16851	R.second = &PPC::CRRCRegClass;
16852	}
16853	// FIXME: This warning should ideally be emitted in the front end.
16854	const auto &TM = getTargetMachine();
16855	if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16856	if (((R.first >= PPC::V20 && R.first <= PPC::V31) \|\|
16857	(R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16858	(R.second == &PPC::VSRCRegClass \|\| R.second == &PPC::VSFRCRegClass))
16859	errs() << "warning: vector registers 20 to 32 are reserved in the "
16860	"default AIX AltiVec ABI and cannot be used\n";
16861	}
16862
16863	return R;
16864	}
16865
16866	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16867	/// vector. If it is invalid, don't add anything to Ops.
16868	void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
16869	StringRef Constraint,
16870	std::vector<SDValue> &Ops,
16871	SelectionDAG &DAG) const {
16872	SDValue Result;
16873
16874	// Only support length 1 constraints.
16875	if (Constraint.size() > `1`)
16876	return;
16877
16878	char Letter = Constraint [`0`];
16879	switch (Letter) {
16880	default: break;
16881	case `'I'`:
16882	case `'J'`:
16883	case `'K'`:
16884	case `'L'`:
16885	case `'M'`:
16886	case `'N'`:
16887	case `'O'`:
16888	case `'P'`: {
16889	ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Val&: Op);
16890	if (!CST) return; // Must be an immediate to match.
16891	SDLoc dl(Op);
16892	int64_t Value = CST->getSExtValue();
16893	EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16894	// numbers are printed as such.
16895	switch (Letter) {
16896	default: llvm_unreachable("Unknown constraint letter!");
16897	case `'I'`: // "I" is a signed 16-bit constant.
16898	if (isInt<`16`>(x: Value))
16899	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16900	break;
16901	case `'J'`: // "J" is a constant with only the high-order 16 bits nonzero.
16902	if (isShiftedUInt<`16`, `16`>(x: Value))
16903	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16904	break;
16905	case `'L'`: // "L" is a signed 16-bit constant shifted left 16 bits.
16906	if (isShiftedInt<`16`, `16`>(x: Value))
16907	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16908	break;
16909	case `'K'`: // "K" is a constant with only the low-order 16 bits nonzero.
16910	if (isUInt<`16`>(x: Value))
16911	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16912	break;
16913	case `'M'`: // "M" is a constant that is greater than 31.
16914	if (Value > `31`)
16915	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16916	break;
16917	case `'N'`: // "N" is a positive constant that is an exact power of two.
16918	if (Value > `0` && isPowerOf2_64(Value))
16919	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16920	break;
16921	case `'O'`: // "O" is the constant zero.
16922	if (Value == `0`)
16923	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16924	break;
16925	case `'P'`: // "P" is a constant whose negation is a signed 16-bit constant.
16926	if (isInt<`16`>(x: -Value))
16927	Result = DAG.getTargetConstant(Val: Value, DL: dl, VT: TCVT);
16928	break;
16929	}
16930	break;
16931	}
16932	}
16933
16934	if (Result.getNode()) {
16935	Ops.push_back(x: Result);
16936	return;
16937	}
16938
16939	// Handle standard constraint letters.
16940	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16941	}
16942
16943	void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
16944	SmallVectorImpl<SDValue> &Ops,
16945	SelectionDAG &DAG) const {
16946	if (I.getNumOperands() <= `1`)
16947	return;
16948	if (!isa<ConstantSDNode>(Val: Ops [`1`].getNode()))
16949	return;
16950	auto IntrinsicID = Ops [`1`].getNode()->getAsZExtVal();
16951	if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16952	IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16953	return;
16954
16955	if (MDNode *MDN = I.getMetadata(KindID: LLVMContext::MD_annotation))
16956	Ops.push_back(Elt: DAG.getMDNode(MD: MDN));
16957	}
16958
16959	// isLegalAddressingMode - Return true if the addressing mode represented
16960	// by AM is legal for this target, for a load/store of the specified type.
16961	bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
16962	const AddrMode &AM, Type *Ty,
16963	unsigned AS,
16964	Instruction I) const* {
16965	// Vector type r+i form is supported since power9 as DQ form. We don't check
16966	// the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16967	// imm form is preferred and the offset can be adjusted to use imm form later
16968	// in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16969	// max offset to check legal addressing mode, we should be a little aggressive
16970	// to contain other offsets for that LSRUse.
16971	if (Ty->isVectorTy() && AM.BaseOffs != `0` && !Subtarget.hasP9Vector())
16972	return false;
16973
16974	// PPC allows a sign-extended 16-bit immediate field.
16975	if (AM.BaseOffs <= -(`1LL` << `16`) \|\| AM.BaseOffs >= (`1LL` << `16`)-`1`)
16976	return false;
16977
16978	// No global is ever allowed as a base.
16979	if (AM.BaseGV)
16980	return false;
16981
16982	// PPC only support r+r,
16983	switch (AM.Scale) {
16984	case `0`: // "r+i" or just "i", depending on HasBaseReg.
16985	break;
16986	case `1`:
16987	if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16988	return false;
16989	// Otherwise we have r+r or r+i.
16990	break;
16991	case `2`:
16992	if (AM.HasBaseReg \|\| AM.BaseOffs) // 2r+r or 2r+i is not allowed.
16993	return false;
16994	// Allow 2r as r+r.*
16995	break;
16996	default:
16997	// No other scales are supported.
16998	return false;
16999	}
17000
17001	return true;
17002	}
17003
17004	SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17005	SelectionDAG &DAG) const {
17006	MachineFunction &MF = DAG.getMachineFunction();
17007	MachineFrameInfo &MFI = MF.getFrameInfo();
17008	MFI.setReturnAddressIsTaken(true);
17009
17010	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17011	return SDValue ();
17012
17013	SDLoc dl(Op);
17014	unsigned Depth = Op.getConstantOperandVal(i: `0`);
17015
17016	// Make sure the function does not optimize away the store of the RA to
17017	// the stack.
17018	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17019	FuncInfo->setLRStoreRequired();
17020	bool isPPC64 = Subtarget.isPPC64();
17021	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
17022
17023	if (Depth > `0`) {
17024	// The link register (return address) is saved in the caller's frame
17025	// not the callee's stack frame. So we must get the caller's frame
17026	// address and load the return address at the LR offset from there.
17027	SDValue FrameAddr =
17028	DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17029	Ptr: LowerFRAMEADDR(Op, DAG), PtrInfo: MachinePointerInfo ());
17030	SDValue Offset =
17031	DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17032	isPPC64 ? MVT::i64 : MVT::i32);
17033	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(),
17034	Ptr: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: PtrVT, N1: FrameAddr, N2: Offset),
17035	PtrInfo: MachinePointerInfo ());
17036	}
17037
17038	// Just load the return address off the stack.
17039	SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17040	return DAG.getLoad(VT: PtrVT, dl, Chain: DAG.getEntryNode(), Ptr: RetAddrFI,
17041	PtrInfo: MachinePointerInfo ());
17042	}
17043
17044	SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17045	SelectionDAG &DAG) const {
17046	SDLoc dl(Op);
17047	unsigned Depth = Op.getConstantOperandVal(i: `0`);
17048
17049	MachineFunction &MF = DAG.getMachineFunction();
17050	MachineFrameInfo &MFI = MF.getFrameInfo();
17051	MFI.setFrameAddressIsTaken(true);
17052
17053	EVT PtrVT = getPointerTy(DL: MF.getDataLayout());
17054	bool isPPC64 = PtrVT == MVT::i64;
17055
17056	// Naked functions never have a frame pointer, and so we use r1. For all
17057	// other functions, this decision must be delayed until during PEI.
17058	unsigned FrameReg;
17059	if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17060	FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17061	else
17062	FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17063
17064	SDValue FrameAddr = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl, Reg: FrameReg,
17065	VT: PtrVT);
17066	while (Depth--)
17067	FrameAddr = DAG.getLoad(VT: Op.getValueType(), dl, Chain: DAG.getEntryNode(),
17068	Ptr: FrameAddr, PtrInfo: MachinePointerInfo ());
17069	return FrameAddr;
17070	}
17071
17072	// FIXME? Maybe this could be a TableGen attribute on some registers and
17073	// this table could be generated automatically from RegInfo.
17074	Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
17075	const MachineFunction &MF) const {
17076	bool isPPC64 = Subtarget.isPPC64();
17077
17078	bool is64Bit = isPPC64 && VT == LLT::scalar(SizeInBits: `64`);
17079	if (!is64Bit && VT != LLT::scalar(SizeInBits: `32`))
17080	report_fatal_error(reason: "Invalid register global variable type");
17081
17082	Register Reg = StringSwitch<Register>(RegName)
17083	.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17084	.Case("r2", isPPC64 ? Register() : PPC::R2)
17085	.Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17086	.Default(Register());
17087
17088	if (Reg)
17089	return Reg;
17090	report_fatal_error(reason: "Invalid register name global variable");
17091	}
17092
17093	bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
17094	// 32-bit SVR4 ABI access everything as got-indirect.
17095	if (Subtarget.is32BitELFABI())
17096	return true;
17097
17098	// AIX accesses everything indirectly through the TOC, which is similar to
17099	// the GOT.
17100	if (Subtarget.isAIXABI())
17101	return true;
17102
17103	CodeModel::Model CModel = getTargetMachine().getCodeModel();
17104	// If it is small or large code model, module locals are accessed
17105	// indirectly by loading their address from .toc/.got.
17106	if (CModel == CodeModel::Small \|\| CModel == CodeModel::Large)
17107	return true;
17108
17109	// JumpTable and BlockAddress are accessed as got-indirect.
17110	if (isa<JumpTableSDNode>(Val: GA) \|\| isa<BlockAddressSDNode>(Val: GA))
17111	return true;
17112
17113	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: GA))
17114	return Subtarget.isGVIndirectSymbol(GV: G->getGlobal());
17115
17116	return false;
17117	}
17118
17119	bool
17120	PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
17121	// The PowerPC target isn't yet aware of offsets.
17122	return false;
17123	}
17124
17125	bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
17126	const CallInst &I,
17127	MachineFunction &MF,
17128	unsigned Intrinsic) const {
17129	switch (Intrinsic) {
17130	case Intrinsic::ppc_atomicrmw_xchg_i128:
17131	case Intrinsic::ppc_atomicrmw_add_i128:
17132	case Intrinsic::ppc_atomicrmw_sub_i128:
17133	case Intrinsic::ppc_atomicrmw_nand_i128:
17134	case Intrinsic::ppc_atomicrmw_and_i128:
17135	case Intrinsic::ppc_atomicrmw_or_i128:
17136	case Intrinsic::ppc_atomicrmw_xor_i128:
17137	case Intrinsic::ppc_cmpxchg_i128:
17138	Info.opc = ISD::INTRINSIC_W_CHAIN;
17139	Info.memVT = MVT::i128;
17140	Info.ptrVal = I.getArgOperand(i: `0`);
17141	Info.offset = `0`;
17142	Info.align = Align (`16`);
17143	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
17144	MachineMemOperand::MOVolatile;
17145	return true;
17146	case Intrinsic::ppc_atomic_load_i128:
17147	Info.opc = ISD::INTRINSIC_W_CHAIN;
17148	Info.memVT = MVT::i128;
17149	Info.ptrVal = I.getArgOperand(i: `0`);
17150	Info.offset = `0`;
17151	Info.align = Align (`16`);
17152	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
17153	return true;
17154	case Intrinsic::ppc_atomic_store_i128:
17155	Info.opc = ISD::INTRINSIC_VOID;
17156	Info.memVT = MVT::i128;
17157	Info.ptrVal = I.getArgOperand(i: `2`);
17158	Info.offset = `0`;
17159	Info.align = Align (`16`);
17160	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
17161	return true;
17162	case Intrinsic::ppc_altivec_lvx:
17163	case Intrinsic::ppc_altivec_lvxl:
17164	case Intrinsic::ppc_altivec_lvebx:
17165	case Intrinsic::ppc_altivec_lvehx:
17166	case Intrinsic::ppc_altivec_lvewx:
17167	case Intrinsic::ppc_vsx_lxvd2x:
17168	case Intrinsic::ppc_vsx_lxvw4x:
17169	case Intrinsic::ppc_vsx_lxvd2x_be:
17170	case Intrinsic::ppc_vsx_lxvw4x_be:
17171	case Intrinsic::ppc_vsx_lxvl:
17172	case Intrinsic::ppc_vsx_lxvll: {
17173	EVT VT;
17174	switch (Intrinsic) {
17175	case Intrinsic::ppc_altivec_lvebx:
17176	VT = MVT::i8;
17177	break;
17178	case Intrinsic::ppc_altivec_lvehx:
17179	VT = MVT::i16;
17180	break;
17181	case Intrinsic::ppc_altivec_lvewx:
17182	VT = MVT::i32;
17183	break;
17184	case Intrinsic::ppc_vsx_lxvd2x:
17185	case Intrinsic::ppc_vsx_lxvd2x_be:
17186	VT = MVT::v2f64;
17187	break;
17188	default:
17189	VT = MVT::v4i32;
17190	break;
17191	}
17192
17193	Info.opc = ISD::INTRINSIC_W_CHAIN;
17194	Info.memVT = VT;
17195	Info.ptrVal = I.getArgOperand(i: `0`);
17196	Info.offset = -VT.getStoreSize()+`1`;
17197	Info.size = `2`*VT.getStoreSize()-`1`;
17198	Info.align = Align (`1`);
17199	Info.flags = MachineMemOperand::MOLoad;
17200	return true;
17201	}
17202	case Intrinsic::ppc_altivec_stvx:
17203	case Intrinsic::ppc_altivec_stvxl:
17204	case Intrinsic::ppc_altivec_stvebx:
17205	case Intrinsic::ppc_altivec_stvehx:
17206	case Intrinsic::ppc_altivec_stvewx:
17207	case Intrinsic::ppc_vsx_stxvd2x:
17208	case Intrinsic::ppc_vsx_stxvw4x:
17209	case Intrinsic::ppc_vsx_stxvd2x_be:
17210	case Intrinsic::ppc_vsx_stxvw4x_be:
17211	case Intrinsic::ppc_vsx_stxvl:
17212	case Intrinsic::ppc_vsx_stxvll: {
17213	EVT VT;
17214	switch (Intrinsic) {
17215	case Intrinsic::ppc_altivec_stvebx:
17216	VT = MVT::i8;
17217	break;
17218	case Intrinsic::ppc_altivec_stvehx:
17219	VT = MVT::i16;
17220	break;
17221	case Intrinsic::ppc_altivec_stvewx:
17222	VT = MVT::i32;
17223	break;
17224	case Intrinsic::ppc_vsx_stxvd2x:
17225	case Intrinsic::ppc_vsx_stxvd2x_be:
17226	VT = MVT::v2f64;
17227	break;
17228	default:
17229	VT = MVT::v4i32;
17230	break;
17231	}
17232
17233	Info.opc = ISD::INTRINSIC_VOID;
17234	Info.memVT = VT;
17235	Info.ptrVal = I.getArgOperand(i: `1`);
17236	Info.offset = -VT.getStoreSize()+`1`;
17237	Info.size = `2`*VT.getStoreSize()-`1`;
17238	Info.align = Align (`1`);
17239	Info.flags = MachineMemOperand::MOStore;
17240	return true;
17241	}
17242	case Intrinsic::ppc_stdcx:
17243	case Intrinsic::ppc_stwcx:
17244	case Intrinsic::ppc_sthcx:
17245	case Intrinsic::ppc_stbcx: {
17246	EVT VT;
17247	auto Alignment = Align (`8`);
17248	switch (Intrinsic) {
17249	case Intrinsic::ppc_stdcx:
17250	VT = MVT::i64;
17251	break;
17252	case Intrinsic::ppc_stwcx:
17253	VT = MVT::i32;
17254	Alignment = Align (`4`);
17255	break;
17256	case Intrinsic::ppc_sthcx:
17257	VT = MVT::i16;
17258	Alignment = Align (`2`);
17259	break;
17260	case Intrinsic::ppc_stbcx:
17261	VT = MVT::i8;
17262	Alignment = Align (`1`);
17263	break;
17264	}
17265	Info.opc = ISD::INTRINSIC_W_CHAIN;
17266	Info.memVT = VT;
17267	Info.ptrVal = I.getArgOperand(i: `0`);
17268	Info.offset = `0`;
17269	Info.align = Alignment;
17270	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
17271	return true;
17272	}
17273	default:
17274	break;
17275	}
17276
17277	return false;
17278	}
17279
17280	/// It returns EVT::Other if the type should be determined using generic
17281	/// target-independent logic.
17282	EVT PPCTargetLowering::getOptimalMemOpType(
17283	const MemOp &Op, const AttributeList &FuncAttributes) const {
17284	if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17285	// We should use Altivec/VSX loads and stores when available. For unaligned
17286	// addresses, unaligned VSX loads are only fast starting with the P8.
17287	if (Subtarget.hasAltivec() && Op.size() >= `16`) {
17288	if (Op.isMemset() && Subtarget.hasVSX()) {
17289	uint64_t TailSize = Op.size() % `16`;
17290	// For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17291	// element if vector element type matches tail store. For tail size
17292	// 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17293	if (TailSize > `2` && TailSize <= `4`) {
17294	return MVT::v8i16;
17295	}
17296	return MVT::v4i32;
17297	}
17298	if (Op.isAligned(Align(`16`)) \|\| Subtarget.hasP8Vector())
17299	return MVT::v4i32;
17300	}
17301	}
17302
17303	if (Subtarget.isPPC64()) {
17304	return MVT::i64;
17305	}
17306
17307	return MVT::i32;
17308	}
17309
17310	/// Returns true if it is beneficial to convert a load of a constant
17311	/// to just the constant itself.
17312	bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
17313	Type Ty) const* {
17314	assert(Ty->isIntegerTy());
17315
17316	unsigned BitSize = Ty->getPrimitiveSizeInBits();
17317	return !(BitSize == `0` \|\| BitSize > `64`);
17318	}
17319
17320	bool PPCTargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
17321	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
17322	return false;
17323	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17324	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17325	return NumBits1 == `64` && NumBits2 == `32`;
17326	}
17327
17328	bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
17329	if (!VT1.isInteger() \|\| !VT2.isInteger())
17330	return false;
17331	unsigned NumBits1 = VT1.getSizeInBits();
17332	unsigned NumBits2 = VT2.getSizeInBits();
17333	return NumBits1 == `64` && NumBits2 == `32`;
17334	}
17335
17336	bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
17337	// Generally speaking, zexts are not free, but they are free when they can be
17338	// folded with other operations.
17339	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17340	EVT MemVT = LD->getMemoryVT();
17341	if ((MemVT == MVT::i1 \|\| MemVT == MVT::i8 \|\| MemVT == MVT::i16 \|\|
17342	(Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17343	(LD->getExtensionType() == ISD::NON_EXTLOAD \|\|
17344	LD->getExtensionType() == ISD::ZEXTLOAD))
17345	return true;
17346	}
17347
17348	// FIXME: Add other cases...
17349	// - 32-bit shifts with a zext to i64
17350	// - zext after ctlz, bswap, etc.
17351	// - zext after and by a constant mask
17352
17353	return TargetLowering::isZExtFree(Val, VT2);
17354	}
17355
17356	bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17357	assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17358	"invalid fpext types");
17359	// Extending to float128 is not free.
17360	if (DestVT == MVT::f128)
17361	return false;
17362	return true;
17363	}
17364
17365	bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17366	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
17367	}
17368
17369	bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17370	return isInt<`16`>(x: Imm) \|\| isUInt<`16`>(x: Imm);
17371	}
17372
17373	bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
17374	MachineMemOperand::Flags,
17375	unsigned Fast) const* {
17376	if (DisablePPCUnaligned)
17377	return false;
17378
17379	// PowerPC supports unaligned memory access for simple non-vector types.
17380	// Although accessing unaligned addresses is not as efficient as accessing
17381	// aligned addresses, it is generally more efficient than manual expansion,
17382	// and generally only traps for software emulation when crossing page
17383	// boundaries.
17384
17385	if (!VT.isSimple())
17386	return false;
17387
17388	if (VT.isFloatingPoint() && !VT.isVector() &&
17389	!Subtarget.allowsUnalignedFPAccess())
17390	return false;
17391
17392	if (VT.getSimpleVT().isVector()) {
17393	if (Subtarget.hasVSX()) {
17394	if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17395	VT != MVT::v4f32 && VT != MVT::v4i32)
17396	return false;
17397	} else {
17398	return false;
17399	}
17400	}
17401
17402	if (VT == MVT::ppcf128)
17403	return false;
17404
17405	if (Fast)
17406	*Fast = `1`;
17407
17408	return true;
17409	}
17410
17411	bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
17412	SDValue C) const {
17413	// Check integral scalar types.
17414	if (!VT.isScalarInteger())
17415	return false;
17416	if (auto *ConstNode = dyn_cast<ConstantSDNode>(Val: C.getNode())) {
17417	if (!ConstNode->getAPIntValue().isSignedIntN(N: `64`))
17418	return false;
17419	// This transformation will generate >= 2 operations. But the following
17420	// cases will generate <= 2 instructions during ISEL. So exclude them.
17421	// 1. If the constant multiplier fits 16 bits, it can be handled by one
17422	// HW instruction, ie. MULLI
17423	// 2. If the multiplier after shifted fits 16 bits, an extra shift
17424	// instruction is needed than case 1, ie. MULLI and RLDICR
17425	int64_t Imm = ConstNode->getSExtValue();
17426	unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
17427	Imm >>= Shift;
17428	if (isInt<`16`>(x: Imm))
17429	return false;
17430	uint64_t UImm = static_cast<uint64_t>(Imm);
17431	if (isPowerOf2_64(Value: UImm + `1`) \|\| isPowerOf2_64(Value: UImm - `1`) \|\|
17432	isPowerOf2_64(Value: `1` - UImm) \|\| isPowerOf2_64(Value: -`1` - UImm))
17433	return true;
17434	}
17435	return false;
17436	}
17437
17438	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17439	EVT VT) const {
17440	return isFMAFasterThanFMulAndFAdd(
17441	F: MF.getFunction(), Ty: VT.getTypeForEVT(Context&: MF.getFunction().getContext()));
17442	}
17443
17444	bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
17445	Type Ty) const* {
17446	if (Subtarget.hasSPE() \|\| Subtarget.useSoftFloat())
17447	return false;
17448	switch (Ty->getScalarType()->getTypeID()) {
17449	case Type::FloatTyID:
17450	case Type::DoubleTyID:
17451	return true;
17452	case Type::FP128TyID:
17453	return Subtarget.hasP9Vector();
17454	default:
17455	return false;
17456	}
17457	}
17458
17459	// FIXME: add more patterns which are not profitable to hoist.
17460	bool PPCTargetLowering::isProfitableToHoist(Instruction I) const* {
17461	if (!I->hasOneUse())
17462	return true;
17463
17464	Instruction *User = I->user_back();
17465	assert(User && "A single use instruction with no uses.");
17466
17467	switch (I->getOpcode()) {
17468	case Instruction::FMul: {
17469	// Don't break FMA, PowerPC prefers FMA.
17470	if (User->getOpcode() != Instruction::FSub &&
17471	User->getOpcode() != Instruction::FAdd)
17472	return true;
17473
17474	const TargetOptions &Options = getTargetMachine().Options;
17475	const Function *F = I->getFunction();
17476	const DataLayout &DL = F->getParent()->getDataLayout();
17477	Type *Ty = User->getOperand(i: `0`)->getType();
17478
17479	return !(
17480	isFMAFasterThanFMulAndFAdd(F: *F, Ty) &&
17481	isOperationLegalOrCustom(Op: ISD::FMA, VT: getValueType(DL, Ty)) &&
17482	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath));
17483	}
17484	case Instruction::Load: {
17485	// Don't break "store (load float)" pattern, this pattern will be combined*
17486	// to "store (load int32)" in later InstCombine pass. See function
17487	// combineLoadToOperationType. On PowerPC, loading a float point takes more
17488	// cycles than loading a 32 bit integer.
17489	LoadInst *LI = cast<LoadInst>(Val: I);
17490	// For the loads that combineLoadToOperationType does nothing, like
17491	// ordered load, it should be profitable to hoist them.
17492	// For swifterror load, it can only be used for pointer to pointer type, so
17493	// later type check should get rid of this case.
17494	if (!LI->isUnordered())
17495	return true;
17496
17497	if (User->getOpcode() != Instruction::Store)
17498	return true;
17499
17500	if (I->getType()->getTypeID() != Type::FloatTyID)
17501	return true;
17502
17503	return false;
17504	}
17505	default:
17506	return true;
17507	}
17508	return true;
17509	}
17510
17511	const MCPhysReg *
17512	PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
17513	// LR is a callee-save register, but we must treat it as clobbered by any call
17514	// site. Hence we include LR in the scratch registers, which are in turn added
17515	// as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17516	// to CTR, which is used by any indirect call.
17517	static const MCPhysReg ScratchRegs[] = {
17518	PPC::X12, PPC::LR8, PPC::CTR8, `0`
17519	};
17520
17521	return ScratchRegs;
17522	}
17523
17524	Register PPCTargetLowering::getExceptionPointerRegister(
17525	const Constant PersonalityFn) const* {
17526	return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17527	}
17528
17529	Register PPCTargetLowering::getExceptionSelectorRegister(
17530	const Constant PersonalityFn) const* {
17531	return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17532	}
17533
17534	bool
17535	PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17536	EVT VT , unsigned DefinedValues) const {
17537	if (VT == MVT::v2i64)
17538	return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17539
17540	if (Subtarget.hasVSX())
17541	return true;
17542
17543	return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17544	}
17545
17546	Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode N) const* {
17547	if (DisableILPPref \|\| Subtarget.enableMachineScheduler())
17548	return TargetLowering::getSchedulingPreference(N);
17549
17550	return Sched::ILP;
17551	}
17552
17553	// Create a fast isel object.
17554	FastISel *
17555	PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
17556	const TargetLibraryInfo LibInfo) const* {
17557	return PPC::createFastISel(FuncInfo, LibInfo);
17558	}
17559
17560	// 'Inverted' means the FMA opcode after negating one multiplicand.
17561	// For example, (fma -a b c) = (fnmsub a b c)
17562	static unsigned invertFMAOpcode(unsigned Opc) {
17563	switch (Opc) {
17564	default:
17565	llvm_unreachable("Invalid FMA opcode for PowerPC!");
17566	case ISD::FMA:
17567	return PPCISD::FNMSUB;
17568	case PPCISD::FNMSUB:
17569	return ISD::FMA;
17570	}
17571	}
17572
17573	SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
17574	bool LegalOps, bool OptForSize,
17575	NegatibleCost &Cost,
17576	unsigned Depth) const {
17577	if (Depth > SelectionDAG::MaxRecursionDepth)
17578	return SDValue ();
17579
17580	unsigned Opc = Op.getOpcode();
17581	EVT VT = Op.getValueType();
17582	SDNodeFlags Flags = Op.getNode()->getFlags();
17583
17584	switch (Opc) {
17585	case PPCISD::FNMSUB:
17586	if (!Op.hasOneUse() \|\| !isTypeLegal(VT))
17587	break;
17588
17589	const TargetOptions &Options = getTargetMachine().Options;
17590	SDValue N0 = Op.getOperand(i: `0`);
17591	SDValue N1 = Op.getOperand(i: `1`);
17592	SDValue N2 = Op.getOperand(i: `2`);
17593	SDLoc Loc(Op);
17594
17595	NegatibleCost N2Cost = NegatibleCost::Expensive;
17596	SDValue NegN2 =
17597	getNegatedExpression(Op: N2, DAG, LegalOps, OptForSize, Cost&: N2Cost, Depth: Depth + `1`);
17598
17599	if (!NegN2)
17600	return SDValue ();
17601
17602	// (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17603	// (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17604	// These transformations may change sign of zeroes. For example,
17605	// -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17606	if (Flags.hasNoSignedZeros() \|\| Options.NoSignedZerosFPMath) {
17607	// Try and choose the cheaper one to negate.
17608	NegatibleCost N0Cost = NegatibleCost::Expensive;
17609	SDValue NegN0 = getNegatedExpression(Op: N0, DAG, LegalOps, OptForSize,
17610	Cost&: N0Cost, Depth: Depth + `1`);
17611
17612	NegatibleCost N1Cost = NegatibleCost::Expensive;
17613	SDValue NegN1 = getNegatedExpression(Op: N1, DAG, LegalOps, OptForSize,
17614	Cost&: N1Cost, Depth: Depth + `1`);
17615
17616	if (NegN0 && N0Cost <= N1Cost) {
17617	Cost = std::min(a: N0Cost, b: N2Cost);
17618	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: NegN0, N2: N1, N3: NegN2, Flags);
17619	} else if (NegN1) {
17620	Cost = std::min(a: N1Cost, b: N2Cost);
17621	return DAG.getNode(Opcode: Opc, DL: Loc, VT, N1: N0, N2: NegN1, N3: NegN2, Flags);
17622	}
17623	}
17624
17625	// (fneg (fnmsub a b c)) => (fma a b (fneg c))
17626	if (isOperationLegal(Op: ISD::FMA, VT)) {
17627	Cost = N2Cost;
17628	return DAG.getNode(Opcode: ISD::FMA, DL: Loc, VT, N1: N0, N2: N1, N3: NegN2, Flags);
17629	}
17630
17631	break;
17632	}
17633
17634	return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17635	Cost, Depth);
17636	}
17637
17638	// Override to enable LOAD_STACK_GUARD lowering on Linux.
17639	bool PPCTargetLowering::useLoadStackGuardNode() const {
17640	if (!Subtarget.isTargetLinux())
17641	return TargetLowering::useLoadStackGuardNode();
17642	return true;
17643	}
17644
17645	// Override to disable global variable loading on Linux and insert AIX canary
17646	// word declaration.
17647	void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
17648	if (Subtarget.isAIXABI()) {
17649	M.getOrInsertGlobal(Name: AIXSSPCanaryWordName,
17650	Ty: PointerType::getUnqual(C&: M.getContext()));
17651	return;
17652	}
17653	if (!Subtarget.isTargetLinux())
17654	return TargetLowering::insertSSPDeclarations(M);
17655	}
17656
17657	Value PPCTargetLowering::getSDagStackGuard(const* Module &M) const {
17658	if (Subtarget.isAIXABI())
17659	return M.getGlobalVariable(Name: AIXSSPCanaryWordName);
17660	return TargetLowering::getSDagStackGuard(M);
17661	}
17662
17663	bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
17664	bool ForCodeSize) const {
17665	if (!VT.isSimple() \|\| !Subtarget.hasVSX())
17666	return false;
17667
17668	switch(VT.getSimpleVT().SimpleTy) {
17669	default:
17670	// For FP types that are currently not supported by PPC backend, return
17671	// false. Examples: f16, f80.
17672	return false;
17673	case MVT::f32:
17674	case MVT::f64: {
17675	if (Subtarget.hasPrefixInstrs()) {
17676	// we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17677	return true;
17678	}
17679	bool IsExact;
17680	APSInt IntResult(`16`, false);
17681	// The rounding mode doesn't really matter because we only care about floats
17682	// that can be converted to integers exactly.
17683	Imm.convertToInteger(Result&: IntResult, RM: APFloat::rmTowardZero, IsExact: &IsExact);
17684	// For exact values in the range [-16, 15] we can materialize the float.
17685	if (IsExact && IntResult <= `15` && IntResult >= -`16`)
17686	return true;
17687	return Imm.isZero();
17688	}
17689	case MVT::ppcf128:
17690	return Imm.isPosZero();
17691	}
17692	}
17693
17694	// For vector shift operation op, fold
17695	// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17696	static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
17697	SelectionDAG &DAG) {
17698	SDValue N0 = N->getOperand(Num: `0`);
17699	SDValue N1 = N->getOperand(Num: `1`);
17700	EVT VT = N0.getValueType();
17701	unsigned OpSizeInBits = VT.getScalarSizeInBits();
17702	unsigned Opcode = N->getOpcode();
17703	unsigned TargetOpcode;
17704
17705	switch (Opcode) {
17706	default:
17707	llvm_unreachable("Unexpected shift operation");
17708	case ISD::SHL:
17709	TargetOpcode = PPCISD::SHL;
17710	break;
17711	case ISD::SRL:
17712	TargetOpcode = PPCISD::SRL;
17713	break;
17714	case ISD::SRA:
17715	TargetOpcode = PPCISD::SRA;
17716	break;
17717	}
17718
17719	if (VT.isVector() && TLI.isOperationLegal(Op: Opcode, VT) &&
17720	N1 ->getOpcode() == ISD::AND)
17721	if (ConstantSDNode *Mask = isConstOrConstSplat(N: N1 ->getOperand(Num: `1`)))
17722	if (Mask->getZExtValue() == OpSizeInBits - `1`)
17723	return DAG.getNode(Opcode: TargetOpcode, DL: SDLoc (N), VT, N1: N0, N2: N1 ->getOperand(Num: `0`));
17724
17725	return SDValue ();
17726	}
17727
17728	SDValue PPCTargetLowering::combineSHL(SDNode N, DAGCombinerInfo &DCI) const* {
17729	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
17730	return Value;
17731
17732	SDValue N0 = N->getOperand(Num: `0`);
17733	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
17734	if (!Subtarget.isISA3_0() \|\| !Subtarget.isPPC64() \|\|
17735	N0.getOpcode() != ISD::SIGN_EXTEND \|\|
17736	N0.getOperand(`0`).getValueType() != MVT::i32 \|\| CN1 == nullptr \|\|
17737	N->getValueType(`0`) != MVT::i64)
17738	return SDValue ();
17739
17740	// We can't save an operation here if the value is already extended, and
17741	// the existing shift is easier to combine.
17742	SDValue ExtsSrc = N0.getOperand(i: `0`);
17743	if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17744	ExtsSrc.getOperand(i: `0`).getOpcode() == ISD::AssertSext)
17745	return SDValue ();
17746
17747	SDLoc DL(N0);
17748	SDValue ShiftBy = SDValue (CN1, `0`);
17749	// We want the shift amount to be i32 on the extswli, but the shift could
17750	// have an i64.
17751	if (ShiftBy.getValueType() == MVT::i64)
17752	ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17753
17754	return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(`0`),
17755	ShiftBy);
17756	}
17757
17758	SDValue PPCTargetLowering::combineSRA(SDNode N, DAGCombinerInfo &DCI) const* {
17759	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
17760	return Value;
17761
17762	return SDValue ();
17763	}
17764
17765	SDValue PPCTargetLowering::combineSRL(SDNode N, DAGCombinerInfo &DCI) const* {
17766	if (auto Value = stripModuloOnShift(TLI: *this, N, DAG&: DCI.DAG))
17767	return Value;
17768
17769	return SDValue ();
17770	}
17771
17772	// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17773	// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17774	// When C is zero, the equation (addi Z, -C) can be simplified to Z
17775	// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17776	static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
17777	const PPCSubtarget &Subtarget) {
17778	if (!Subtarget.isPPC64())
17779	return SDValue ();
17780
17781	SDValue LHS = N->getOperand(Num: `0`);
17782	SDValue RHS = N->getOperand(Num: `1`);
17783
17784	auto isZextOfCompareWithConstant = [](SDValue Op) {
17785	if (Op.getOpcode() != ISD::ZERO_EXTEND \|\| !Op.hasOneUse() \|\|
17786	Op.getValueType() != MVT::i64)
17787	return false;
17788
17789	SDValue Cmp = Op.getOperand(i: `0`);
17790	if (Cmp.getOpcode() != ISD::SETCC \|\| !Cmp.hasOneUse() \|\|
17791	Cmp.getOperand(`0`).getValueType() != MVT::i64)
17792	return false;
17793
17794	if (auto *Constant = dyn_cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`))) {
17795	int64_t NegConstant = `0` - Constant->getSExtValue();
17796	// Due to the limitations of the addi instruction,
17797	// -C is required to be [-32768, 32767].
17798	return isInt<`16`>(x: NegConstant);
17799	}
17800
17801	return false;
17802	};
17803
17804	bool LHSHasPattern = isZextOfCompareWithConstant (LHS);
17805	bool RHSHasPattern = isZextOfCompareWithConstant (RHS);
17806
17807	// If there is a pattern, canonicalize a zext operand to the RHS.
17808	if (LHSHasPattern && !RHSHasPattern)
17809	std::swap(a&: LHS, b&: RHS);
17810	else if (!LHSHasPattern && !RHSHasPattern)
17811	return SDValue ();
17812
17813	SDLoc DL(N);
17814	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17815	SDValue Cmp = RHS.getOperand(i: `0`);
17816	SDValue Z = Cmp.getOperand(i: `0`);
17817	auto *Constant = cast<ConstantSDNode>(Val: Cmp.getOperand(i: `1`));
17818	int64_t NegConstant = `0` - Constant->getSExtValue();
17819
17820	switch(cast<CondCodeSDNode>(Val: Cmp.getOperand(i: `2`))->get()) {
17821	default: break;
17822	case ISD::SETNE: {
17823	// when C == 0
17824	// --> addze X, (addic Z, -1).carry
17825	// /
17826	// add X, (zext(setne Z, C))--
17827	// \ when -32768 <= -C <= 32767 && C != 0
17828	// --> addze X, (addic (addi Z, -C), -1).carry
17829	SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17830	DAG.getConstant(NegConstant, DL, MVT::i64));
17831	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
17832	SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17833	AddOrZ, DAG.getConstant(-`1ULL`, DL, MVT::i64));
17834	return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(`0`, DL, MVT::i64),
17835	SDValue(Addc.getNode(), `1`));
17836	}
17837	case ISD::SETEQ: {
17838	// when C == 0
17839	// --> addze X, (subfic Z, 0).carry
17840	// /
17841	// add X, (zext(sete Z, C))--
17842	// \ when -32768 <= -C <= 32767 && C != 0
17843	// --> addze X, (subfic (addi Z, -C), 0).carry
17844	SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17845	DAG.getConstant(NegConstant, DL, MVT::i64));
17846	SDValue AddOrZ = NegConstant != `0` ? Add : Z;
17847	SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17848	DAG.getConstant(`0`, DL, MVT::i64), AddOrZ);
17849	return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(`0`, DL, MVT::i64),
17850	SDValue(Subc.getNode(), `1`));
17851	}
17852	}
17853
17854	return SDValue ();
17855	}
17856
17857	// Transform
17858	// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17859	// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17860	// In this case both C1 and C2 must be known constants.
17861	// C1+C2 must fit into a 34 bit signed integer.
17862	static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
17863	const PPCSubtarget &Subtarget) {
17864	if (!Subtarget.isUsingPCRelativeCalls())
17865	return SDValue ();
17866
17867	// Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17868	// If we find that node try to cast the Global Address and the Constant.
17869	SDValue LHS = N->getOperand(Num: `0`);
17870	SDValue RHS = N->getOperand(Num: `1`);
17871
17872	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17873	std::swap(a&: LHS, b&: RHS);
17874
17875	if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17876	return SDValue ();
17877
17878	// Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17879	GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(Val: LHS.getOperand(i: `0`));
17880	ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(Val&: RHS);
17881
17882	// Check that both casts succeeded.
17883	if (!GSDN \|\| !ConstNode)
17884	return SDValue ();
17885
17886	int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17887	SDLoc DL(GSDN);
17888
17889	// The signed int offset needs to fit in 34 bits.
17890	if (!isInt<`34`>(x: NewOffset))
17891	return SDValue ();
17892
17893	// The new global address is a copy of the old global address except
17894	// that it has the updated Offset.
17895	SDValue GA =
17896	DAG.getTargetGlobalAddress(GV: GSDN->getGlobal(), DL, VT: GSDN->getValueType(ResNo: `0`),
17897	offset: NewOffset, TargetFlags: GSDN->getTargetFlags());
17898	SDValue MatPCRel =
17899	DAG.getNode(Opcode: PPCISD::MAT_PCREL_ADDR, DL, VT: GSDN->getValueType(ResNo: `0`), Operand: GA);
17900	return MatPCRel;
17901	}
17902
17903	SDValue PPCTargetLowering::combineADD(SDNode N, DAGCombinerInfo &DCI) const* {
17904	if (auto Value = combineADDToADDZE(N, DAG&: DCI.DAG, Subtarget))
17905	return Value;
17906
17907	if (auto Value = combineADDToMAT_PCREL_ADDR(N, DAG&: DCI.DAG, Subtarget))
17908	return Value;
17909
17910	return SDValue ();
17911	}
17912
17913	// Detect TRUNCATE operations on bitcasts of float128 values.
17914	// What we are looking for here is the situtation where we extract a subset
17915	// of bits from a 128 bit float.
17916	// This can be of two forms:
17917	// 1) BITCAST of f128 feeding TRUNCATE
17918	// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17919	// The reason this is required is because we do not have a legal i128 type
17920	// and so we want to prevent having to store the f128 and then reload part
17921	// of it.
17922	SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17923	DAGCombinerInfo &DCI) const {
17924	// If we are using CRBits then try that first.
17925	if (Subtarget.useCRBits()) {
17926	// Check if CRBits did anything and return that if it did.
17927	if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17928	return CRTruncValue;
17929	}
17930
17931	SDLoc dl(N);
17932	SDValue Op0 = N->getOperand(Num: `0`);
17933
17934	// Looking for a truncate of i128 to i64.
17935	if (Op0.getValueType() != MVT::i128 \|\| N->getValueType(`0`) != MVT::i64)
17936	return SDValue ();
17937
17938	int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? `1` : `0`;
17939
17940	// SRL feeding TRUNCATE.
17941	if (Op0.getOpcode() == ISD::SRL) {
17942	ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Val: Op0.getOperand(i: `1`));
17943	// The right shift has to be by 64 bits.
17944	if (!ConstNode \|\| ConstNode->getZExtValue() != `64`)
17945	return SDValue ();
17946
17947	// Switch the element number to extract.
17948	EltToExtract = EltToExtract ? `0` : `1`;
17949	// Update Op0 past the SRL.
17950	Op0 = Op0.getOperand(i: `0`);
17951	}
17952
17953	// BITCAST feeding a TRUNCATE possibly via SRL.
17954	if (Op0.getOpcode() == ISD::BITCAST &&
17955	Op0.getValueType() == MVT::i128 &&
17956	Op0.getOperand(`0`).getValueType() == MVT::f128) {
17957	SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(`0`));
17958	return DCI.DAG.getNode(
17959	ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17960	DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17961	}
17962	return SDValue ();
17963	}
17964
17965	SDValue PPCTargetLowering::combineMUL(SDNode N, DAGCombinerInfo &DCI) const* {
17966	SelectionDAG &DAG = DCI.DAG;
17967
17968	ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N: N->getOperand(Num: `1`));
17969	if (!ConstOpOrElement)
17970	return SDValue ();
17971
17972	// An imul is usually smaller than the alternative sequence for legal type.
17973	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17974	isOperationLegal(Op: ISD::MUL, VT: N->getValueType(ResNo: `0`)))
17975	return SDValue ();
17976
17977	auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17978	switch (this->Subtarget.getCPUDirective()) {
17979	default:
17980	// TODO: enhance the condition for subtarget before pwr8
17981	return false;
17982	case PPC::DIR_PWR8:
17983	// type mul add shl
17984	// scalar 4 1 1
17985	// vector 7 2 2
17986	return true;
17987	case PPC::DIR_PWR9:
17988	case PPC::DIR_PWR10:
17989	case PPC::DIR_PWR_FUTURE:
17990	// type mul add shl
17991	// scalar 5 2 2
17992	// vector 7 2 2
17993
17994	// The cycle RATIO of related operations are showed as a table above.
17995	// Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17996	// scalar and vector type. For 2 instrs patterns, add/sub + shl
17997	// are 4, it is always profitable; but for 3 instrs patterns
17998	// (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17999	// So we should only do it for vector type.
18000	return IsAddOne && IsNeg ? VT.isVector() : true;
18001	}
18002	};
18003
18004	EVT VT = N->getValueType(ResNo: `0`);
18005	SDLoc DL(N);
18006
18007	const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18008	bool IsNeg = MulAmt.isNegative();
18009	APInt MulAmtAbs = MulAmt.abs();
18010
18011	if ((MulAmtAbs - `1`).isPowerOf2()) {
18012	// (mul x, 2^N + 1) => (add (shl x, N), x)
18013	// (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18014
18015	if (!IsProfitable (IsNeg, true, VT))
18016	return SDValue ();
18017
18018	SDValue Op0 = N->getOperand(Num: `0`);
18019	SDValue Op1 =
18020	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
18021	N2: DAG.getConstant(Val: (MulAmtAbs - `1`).logBase2(), DL, VT));
18022	SDValue Res = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: Op1);
18023
18024	if (!IsNeg)
18025	return Res;
18026
18027	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Res);
18028	} else if ((MulAmtAbs + `1`).isPowerOf2()) {
18029	// (mul x, 2^N - 1) => (sub (shl x, N), x)
18030	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18031
18032	if (!IsProfitable (IsNeg, false, VT))
18033	return SDValue ();
18034
18035	SDValue Op0 = N->getOperand(Num: `0`);
18036	SDValue Op1 =
18037	DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: N->getOperand(Num: `0`),
18038	N2: DAG.getConstant(Val: (MulAmtAbs + `1`).logBase2(), DL, VT));
18039
18040	if (!IsNeg)
18041	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op1, N2: Op0);
18042	else
18043	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0, N2: Op1);
18044
18045	} else {
18046	return SDValue ();
18047	}
18048	}
18049
18050	// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18051	// in combiner since we need to check SD flags and other subtarget features.
18052	SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18053	DAGCombinerInfo &DCI) const {
18054	SDValue N0 = N->getOperand(Num: `0`);
18055	SDValue N1 = N->getOperand(Num: `1`);
18056	SDValue N2 = N->getOperand(Num: `2`);
18057	SDNodeFlags Flags = N->getFlags();
18058	EVT VT = N->getValueType(ResNo: `0`);
18059	SelectionDAG &DAG = DCI.DAG;
18060	const TargetOptions &Options = getTargetMachine().Options;
18061	unsigned Opc = N->getOpcode();
18062	bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18063	bool LegalOps = !DCI.isBeforeLegalizeOps();
18064	SDLoc Loc(N);
18065
18066	if (!isOperationLegal(Op: ISD::FMA, VT))
18067	return SDValue ();
18068
18069	// Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18070	// since (fnmsub a b c)=-0 while c-ab=+0.
18071	if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18072	return SDValue ();
18073
18074	// (fma (fneg a) b c) => (fnmsub a b c)
18075	// (fnmsub (fneg a) b c) => (fma a b c)
18076	if (SDValue NegN0 = getCheaperNegatedExpression(Op: N0, DAG, LegalOps, OptForSize: CodeSize))
18077	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: NegN0, N2: N1, N3: N2, Flags);
18078
18079	// (fma a (fneg b) c) => (fnmsub a b c)
18080	// (fnmsub a (fneg b) c) => (fma a b c)
18081	if (SDValue NegN1 = getCheaperNegatedExpression(Op: N1, DAG, LegalOps, OptForSize: CodeSize))
18082	return DAG.getNode(Opcode: invertFMAOpcode(Opc), DL: Loc, VT, N1: N0, N2: NegN1, N3: N2, Flags);
18083
18084	return SDValue ();
18085	}
18086
18087	bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst CI) const* {
18088	// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18089	if (!Subtarget.is64BitELFABI())
18090	return false;
18091
18092	// If not a tail call then no need to proceed.
18093	if (!CI->isTailCall())
18094	return false;
18095
18096	// If sibling calls have been disabled and tail-calls aren't guaranteed
18097	// there is no reason to duplicate.
18098	auto &TM = getTargetMachine();
18099	if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18100	return false;
18101
18102	// Can't tail call a function called indirectly, or if it has variadic args.
18103	const Function *Callee = CI->getCalledFunction();
18104	if (!Callee \|\| Callee->isVarArg())
18105	return false;
18106
18107	// Make sure the callee and caller calling conventions are eligible for tco.
18108	const Function *Caller = CI->getParent()->getParent();
18109	if (!areCallingConvEligibleForTCO_64SVR4(CallerCC: Caller->getCallingConv(),
18110	CalleeCC: CI->getCallingConv()))
18111	return false;
18112
18113	// If the function is local then we have a good chance at tail-calling it
18114	return getTargetMachine().shouldAssumeDSOLocal(GV: Callee);
18115	}
18116
18117	bool PPCTargetLowering::
18118	isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18119	const Value *Mask = AndI.getOperand(i: `1`);
18120	// If the mask is suitable for andi. or andis. we should sink the and.
18121	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Mask)) {
18122	// Can't handle constants wider than 64-bits.
18123	if (CI->getBitWidth() > `64`)
18124	return false;
18125	int64_t ConstVal = CI->getZExtValue();
18126	return isUInt<`16`>(x: ConstVal) \|\|
18127	(isUInt<`16`>(x: ConstVal >> `16`) && !(ConstVal & `0xFFFF`));
18128	}
18129
18130	// For non-constant masks, we can always use the record-form and.
18131	return true;
18132	}
18133
18134	/// getAddrModeForFlags - Based on the set of address flags, select the most
18135	/// optimal instruction format to match by.
18136	PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18137	// This is not a node we should be handling here.
18138	if (Flags == PPC::MOF_None)
18139	return PPC::AM_None;
18140	// Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18141	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DForm))
18142	if ((Flags & FlagSet) == FlagSet)
18143	return PPC::AM_DForm;
18144	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DSForm))
18145	if ((Flags & FlagSet) == FlagSet)
18146	return PPC::AM_DSForm;
18147	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_DQForm))
18148	if ((Flags & FlagSet) == FlagSet)
18149	return PPC::AM_DQForm;
18150	for (auto FlagSet : AddrModesMap.at(k: PPC::AM_PrefixDForm))
18151	if ((Flags & FlagSet) == FlagSet)
18152	return PPC::AM_PrefixDForm;
18153	// If no other forms are selected, return an X-Form as it is the most
18154	// general addressing mode.
18155	return PPC::AM_XForm;
18156	}
18157
18158	/// Set alignment flags based on whether or not the Frame Index is aligned.
18159	/// Utilized when computing flags for address computation when selecting
18160	/// load and store instructions.
18161	static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18162	SelectionDAG &DAG) {
18163	bool IsAdd = ((N.getOpcode() == ISD::ADD) \|\| (N.getOpcode() == ISD::OR));
18164	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: IsAdd ? N.getOperand(i: `0`) : N);
18165	if (!FI)
18166	return;
18167	const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18168	unsigned FrameIndexAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
18169	// If this is (add $FI, $S16Imm), the alignment flags are already set
18170	// based on the immediate. We just need to clear the alignment flags
18171	// if the FI alignment is weaker.
18172	if ((FrameIndexAlign % `4`) != `0`)
18173	FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18174	if ((FrameIndexAlign % `16`) != `0`)
18175	FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18176	// If the address is a plain FrameIndex, set alignment flags based on
18177	// FI alignment.
18178	if (!IsAdd) {
18179	if ((FrameIndexAlign % `4`) == `0`)
18180	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
18181	if ((FrameIndexAlign % `16`) == `0`)
18182	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
18183	}
18184	}
18185
18186	/// Given a node, compute flags that are used for address computation when
18187	/// selecting load and store instructions. The flags computed are stored in
18188	/// FlagSet. This function takes into account whether the node is a constant,
18189	/// an ADD, OR, or a constant, and computes the address flags accordingly.
18190	static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18191	SelectionDAG &DAG) {
18192	// Set the alignment flags for the node depending on if the node is
18193	// 4-byte or 16-byte aligned.
18194	auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18195	if ((Imm & `0x3`) == `0`)
18196	FlagSet \|= PPC::MOF_RPlusSImm16Mult4;
18197	if ((Imm & `0xf`) == `0`)
18198	FlagSet \|= PPC::MOF_RPlusSImm16Mult16;
18199	};
18200
18201	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N)) {
18202	// All 32-bit constants can be computed as LIS + Disp.
18203	const APInt &ConstImm = CN->getAPIntValue();
18204	if (ConstImm.isSignedIntN(N: `32`)) { // Flag to handle 32-bit constants.
18205	FlagSet \|= PPC::MOF_AddrIsSImm32;
18206	SetAlignFlagsForImm (ConstImm.getZExtValue());
18207	setAlignFlagsForFI(N, FlagSet, DAG);
18208	}
18209	if (ConstImm.isSignedIntN(N: `34`)) // Flag to handle 34-bit constants.
18210	FlagSet \|= PPC::MOF_RPlusSImm34;
18211	else // Let constant materialization handle large constants.
18212	FlagSet \|= PPC::MOF_NotAddNorCst;
18213	} else if (N.getOpcode() == ISD::ADD \|\| provablyDisjointOr(DAG, N)) {
18214	// This address can be represented as an addition of:
18215	// - Register + Imm16 (possibly a multiple of 4/16)
18216	// - Register + Imm34
18217	// - Register + PPCISD::Lo
18218	// - Register + Register
18219	// In any case, we won't have to match this as Base + Zero.
18220	SDValue RHS = N.getOperand(i: `1`);
18221	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: RHS)) {
18222	const APInt &ConstImm = CN->getAPIntValue();
18223	if (ConstImm.isSignedIntN(N: `16`)) {
18224	FlagSet \|= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18225	SetAlignFlagsForImm (ConstImm.getZExtValue());
18226	setAlignFlagsForFI(N, FlagSet, DAG);
18227	}
18228	if (ConstImm.isSignedIntN(N: `34`))
18229	FlagSet \|= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18230	else
18231	FlagSet \|= PPC::MOF_RPlusR; // Register.
18232	} else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(i: `1`))
18233	FlagSet \|= PPC::MOF_RPlusLo; // PPCISD::Lo.
18234	else
18235	FlagSet \|= PPC::MOF_RPlusR;
18236	} else { // The address computation is not a constant or an addition.
18237	setAlignFlagsForFI(N, FlagSet, DAG);
18238	FlagSet \|= PPC::MOF_NotAddNorCst;
18239	}
18240	}
18241
18242	static bool isPCRelNode(SDValue N) {
18243	return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR \|\|
18244	isValidPCRelNode<ConstantPoolSDNode>(N) \|\|
18245	isValidPCRelNode<GlobalAddressSDNode>(N) \|\|
18246	isValidPCRelNode<JumpTableSDNode>(N) \|\|
18247	isValidPCRelNode<BlockAddressSDNode>(N));
18248	}
18249
18250	/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18251	/// the address flags of the load/store instruction that is to be matched.
18252	unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18253	SelectionDAG &DAG) const {
18254	unsigned FlagSet = PPC::MOF_None;
18255
18256	// Compute subtarget flags.
18257	if (!Subtarget.hasP9Vector())
18258	FlagSet \|= PPC::MOF_SubtargetBeforeP9;
18259	else {
18260	FlagSet \|= PPC::MOF_SubtargetP9;
18261	if (Subtarget.hasPrefixInstrs())
18262	FlagSet \|= PPC::MOF_SubtargetP10;
18263	}
18264	if (Subtarget.hasSPE())
18265	FlagSet \|= PPC::MOF_SubtargetSPE;
18266
18267	// Check if we have a PCRel node and return early.
18268	if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18269	return FlagSet;
18270
18271	// If the node is the paired load/store intrinsics, compute flags for
18272	// address computation and return early.
18273	unsigned ParentOp = Parent->getOpcode();
18274	if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) \|\|
18275	(ParentOp == ISD::INTRINSIC_VOID))) {
18276	unsigned ID = Parent->getConstantOperandVal(Num: `1`);
18277	if ((ID == Intrinsic::ppc_vsx_lxvp) \|\| (ID == Intrinsic::ppc_vsx_stxvp)) {
18278	SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18279	? Parent->getOperand(`2`)
18280	: Parent->getOperand(`3`);
18281	computeFlagsForAddressComputation(N: IntrinOp, FlagSet, DAG);
18282	FlagSet \|= PPC::MOF_Vector;
18283	return FlagSet;
18284	}
18285	}
18286
18287	// Mark this as something we don't want to handle here if it is atomic
18288	// or pre-increment instruction.
18289	if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Val: Parent))
18290	if (LSB->isIndexed())
18291	return PPC::MOF_None;
18292
18293	// Compute in-memory type flags. This is based on if there are scalars,
18294	// floats or vectors.
18295	const MemSDNode *MN = dyn_cast<MemSDNode>(Val: Parent);
18296	assert(MN && "Parent should be a MemSDNode!");
18297	EVT MemVT = MN->getMemoryVT();
18298	unsigned Size = MemVT.getSizeInBits();
18299	if (MemVT.isScalarInteger()) {
18300	assert(Size <= `128` &&
18301	"Not expecting scalar integers larger than 16 bytes!");
18302	if (Size < `32`)
18303	FlagSet \|= PPC::MOF_SubWordInt;
18304	else if (Size == `32`)
18305	FlagSet \|= PPC::MOF_WordInt;
18306	else
18307	FlagSet \|= PPC::MOF_DoubleWordInt;
18308	} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18309	if (Size == `128`)
18310	FlagSet \|= PPC::MOF_Vector;
18311	else if (Size == `256`) {
18312	assert(Subtarget.pairedVectorMemops() &&
18313	"256-bit vectors are only available when paired vector memops is "
18314	"enabled!");
18315	FlagSet \|= PPC::MOF_Vector;
18316	} else
18317	llvm_unreachable("Not expecting illegal vectors!");
18318	} else { // Floating point type: can be scalar, f128 or vector types.
18319	if (Size == `32` \|\| Size == `64`)
18320	FlagSet \|= PPC::MOF_ScalarFloat;
18321	else if (MemVT == MVT::f128 \|\| MemVT.isVector())
18322	FlagSet \|= PPC::MOF_Vector;
18323	else
18324	llvm_unreachable("Not expecting illegal scalar floats!");
18325	}
18326
18327	// Compute flags for address computation.
18328	computeFlagsForAddressComputation(N, FlagSet, DAG);
18329
18330	// Compute type extension flags.
18331	if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Val: Parent)) {
18332	switch (LN->getExtensionType()) {
18333	case ISD::SEXTLOAD:
18334	FlagSet \|= PPC::MOF_SExt;
18335	break;
18336	case ISD::EXTLOAD:
18337	case ISD::ZEXTLOAD:
18338	FlagSet \|= PPC::MOF_ZExt;
18339	break;
18340	case ISD::NON_EXTLOAD:
18341	FlagSet \|= PPC::MOF_NoExt;
18342	break;
18343	}
18344	} else
18345	FlagSet \|= PPC::MOF_NoExt;
18346
18347	// For integers, no extension is the same as zero extension.
18348	// We set the extension mode to zero extension so we don't have
18349	// to add separate entries in AddrModesMap for loads and stores.
18350	if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18351	FlagSet \|= PPC::MOF_ZExt;
18352	FlagSet &= ~PPC::MOF_NoExt;
18353	}
18354
18355	// If we don't have prefixed instructions, 34-bit constants should be
18356	// treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18357	bool IsNonP1034BitConst =
18358	((PPC::MOF_RPlusSImm34 \| PPC::MOF_AddrIsSImm32 \| PPC::MOF_SubtargetP10) &
18359	FlagSet) == PPC::MOF_RPlusSImm34;
18360	if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18361	IsNonP1034BitConst)
18362	FlagSet \|= PPC::MOF_NotAddNorCst;
18363
18364	return FlagSet;
18365	}
18366
18367	/// SelectForceXFormMode - Given the specified address, force it to be
18368	/// represented as an indexed [r+r] operation (an XForm instruction).
18369	PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
18370	SDValue &Base,
18371	SelectionDAG &DAG) const {
18372
18373	PPC::AddrMode Mode = PPC::AM_XForm;
18374	int16_t ForceXFormImm = `0`;
18375	if (provablyDisjointOr(DAG, N) &&
18376	!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm)) {
18377	Disp = N.getOperand(i: `0`);
18378	Base = N.getOperand(i: `1`);
18379	return Mode;
18380	}
18381
18382	// If the address is the result of an add, we will utilize the fact that the
18383	// address calculation includes an implicit add. However, we can reduce
18384	// register pressure if we do not materialize a constant just for use as the
18385	// index register. We only get rid of the add if it is not an add of a
18386	// value and a 16-bit signed constant and both have a single use.
18387	if (N.getOpcode() == ISD::ADD &&
18388	(!isIntS16Immediate(Op: N.getOperand(i: `1`), Imm&: ForceXFormImm) \|\|
18389	!N.getOperand(i: `1`).hasOneUse() \|\| !N.getOperand(i: `0`).hasOneUse())) {
18390	Disp = N.getOperand(i: `0`);
18391	Base = N.getOperand(i: `1`);
18392	return Mode;
18393	}
18394
18395	// Otherwise, use R0 as the base register.
18396	Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18397	N.getValueType());
18398	Base = N;
18399
18400	return Mode;
18401	}
18402
18403	bool PPCTargetLowering::splitValueIntoRegisterParts(
18404	SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18405	unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18406	EVT ValVT = Val.getValueType();
18407	// If we are splitting a scalar integer into f64 parts (i.e. so they
18408	// can be placed into VFRC registers), we need to zero extend and
18409	// bitcast the values. This will ensure the value is placed into a
18410	// VSR using direct moves or stack operations as needed.
18411	if (PartVT == MVT::f64 &&
18412	(ValVT == MVT::i32 \|\| ValVT == MVT::i16 \|\| ValVT == MVT::i8)) {
18413	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18414	Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18415	Parts[`0`] = Val;
18416	return true;
18417	}
18418	return false;
18419	}
18420
18421	SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18422	SelectionDAG &DAG) const {
18423	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18424	TargetLowering::CallLoweringInfo CLI(DAG);
18425	EVT RetVT = Op.getValueType();
18426	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
18427	SDValue Callee =
18428	DAG.getExternalSymbol(Sym: LibCallName, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
18429	bool SignExtend = TLI.shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: false);
18430	TargetLowering::ArgListTy Args;
18431	TargetLowering::ArgListEntry Entry;
18432	for (const SDValue &N : Op ->op_values()) {
18433	EVT ArgVT = N.getValueType();
18434	Type ArgTy = ArgVT.getTypeForEVT(Context&: DAG.getContext());
18435	Entry.Node = N;
18436	Entry.Ty = ArgTy;
18437	Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(Type: ArgVT, IsSigned: SignExtend);
18438	Entry.IsZExt = !Entry.IsSExt;
18439	Args.push_back(x: Entry);
18440	}
18441
18442	SDValue InChain = DAG.getEntryNode();
18443	SDValue TCChain = InChain;
18444	const Function &F = DAG.getMachineFunction().getFunction();
18445	bool isTailCall =
18446	TLI.isInTailCallPosition(DAG, Node: Op.getNode(), Chain&: TCChain) &&
18447	(RetTy == F.getReturnType() \|\| F.getReturnType()->isVoidTy());
18448	if (isTailCall)
18449	InChain = TCChain;
18450	CLI.setDebugLoc(SDLoc (Op))
18451	.setChain(InChain)
18452	.setLibCallee(CC: CallingConv::C, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
18453	.setTailCall(isTailCall)
18454	.setSExtResult(SignExtend)
18455	.setZExtResult(!SignExtend)
18456	.setIsPostTypeLegalization(true);
18457	return TLI.LowerCallTo(CLI).first;
18458	}
18459
18460	SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18461	const char LibCallFloatName, const* char *LibCallDoubleName, SDValue Op,
18462	SelectionDAG &DAG) const {
18463	if (Op.getValueType() == MVT::f32)
18464	return lowerToLibCall(LibCallName: LibCallFloatName, Op, DAG);
18465
18466	if (Op.getValueType() == MVT::f64)
18467	return lowerToLibCall(LibCallName: LibCallDoubleName, Op, DAG);
18468
18469	return SDValue ();
18470	}
18471
18472	bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18473	SDNodeFlags Flags = Op.getNode()->getFlags();
18474	return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18475	Flags.hasNoNaNs() && Flags.hasNoInfs();
18476	}
18477
18478	bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18479	return Op.getNode()->getFlags().hasApproximateFuncs();
18480	}
18481
18482	bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18483	return getTargetMachine().Options.PPCGenScalarMASSEntries;
18484	}
18485
18486	SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18487	const char *LibCallFloatName,
18488	const char *LibCallDoubleNameFinite,
18489	const char *LibCallFloatNameFinite,
18490	SDValue Op,
18491	SelectionDAG &DAG) const {
18492	if (!isScalarMASSConversionEnabled() \|\| !isLowringToMASSSafe(Op))
18493	return SDValue ();
18494
18495	if (!isLowringToMASSFiniteSafe(Op))
18496	return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18497	DAG);
18498
18499	return lowerLibCallBasedOnType(LibCallFloatName: LibCallFloatNameFinite,
18500	LibCallDoubleName: LibCallDoubleNameFinite, Op, DAG);
18501	}
18502
18503	SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18504	return lowerLibCallBase(LibCallDoubleName: "__xl_pow", LibCallFloatName: "__xl_powf", LibCallDoubleNameFinite: "__xl_pow_finite",
18505	LibCallFloatNameFinite: "__xl_powf_finite", Op, DAG);
18506	}
18507
18508	SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18509	return lowerLibCallBase(LibCallDoubleName: "__xl_sin", LibCallFloatName: "__xl_sinf", LibCallDoubleNameFinite: "__xl_sin_finite",
18510	LibCallFloatNameFinite: "__xl_sinf_finite", Op, DAG);
18511	}
18512
18513	SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18514	return lowerLibCallBase(LibCallDoubleName: "__xl_cos", LibCallFloatName: "__xl_cosf", LibCallDoubleNameFinite: "__xl_cos_finite",
18515	LibCallFloatNameFinite: "__xl_cosf_finite", Op, DAG);
18516	}
18517
18518	SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18519	return lowerLibCallBase(LibCallDoubleName: "__xl_log", LibCallFloatName: "__xl_logf", LibCallDoubleNameFinite: "__xl_log_finite",
18520	LibCallFloatNameFinite: "__xl_logf_finite", Op, DAG);
18521	}
18522
18523	SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18524	return lowerLibCallBase(LibCallDoubleName: "__xl_log10", LibCallFloatName: "__xl_log10f", LibCallDoubleNameFinite: "__xl_log10_finite",
18525	LibCallFloatNameFinite: "__xl_log10f_finite", Op, DAG);
18526	}
18527
18528	SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18529	return lowerLibCallBase(LibCallDoubleName: "__xl_exp", LibCallFloatName: "__xl_expf", LibCallDoubleNameFinite: "__xl_exp_finite",
18530	LibCallFloatNameFinite: "__xl_expf_finite", Op, DAG);
18531	}
18532
18533	// If we happen to match to an aligned D-Form, check if the Frame Index is
18534	// adequately aligned. If it is not, reset the mode to match to X-Form.
18535	static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18536	PPC::AddrMode &Mode) {
18537	if (!isa<FrameIndexSDNode>(Val: N))
18538	return;
18539	if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) \|\|
18540	(Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18541	Mode = PPC::AM_XForm;
18542	}
18543
18544	/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18545	/// compute the address flags of the node, get the optimal address mode based
18546	/// on the flags, and set the Base and Disp based on the address mode.
18547	PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
18548	SDValue N, SDValue &Disp,
18549	SDValue &Base,
18550	SelectionDAG &DAG,
18551	MaybeAlign Align) const {
18552	SDLoc DL(Parent);
18553
18554	// Compute the address flags.
18555	unsigned Flags = computeMOFlags(Parent, N, DAG);
18556
18557	// Get the optimal address mode based on the Flags.
18558	PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18559
18560	// If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18561	// Select an X-Form load if it is not.
18562	setXFormForUnalignedFI(N, Flags, Mode);
18563
18564	// Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18565	if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18566	assert(Subtarget.isUsingPCRelativeCalls() &&
18567	"Must be using PC-Relative calls when a valid PC-Relative node is "
18568	"present!");
18569	Mode = PPC::AM_PCRel;
18570	}
18571
18572	// Set Base and Disp accordingly depending on the address mode.
18573	switch (Mode) {
18574	case PPC::AM_DForm:
18575	case PPC::AM_DSForm:
18576	case PPC::AM_DQForm: {
18577	// This is a register plus a 16-bit immediate. The base will be the
18578	// register and the displacement will be the immediate unless it
18579	// isn't sufficiently aligned.
18580	if (Flags & PPC::MOF_RPlusSImm16) {
18581	SDValue Op0 = N.getOperand(i: `0`);
18582	SDValue Op1 = N.getOperand(i: `1`);
18583	int16_t Imm = Op1 ->getAsZExtVal();
18584	if (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm)) {
18585	Disp = DAG.getTargetConstant(Val: Imm, DL, VT: N.getValueType());
18586	Base = Op0;
18587	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op0)) {
18588	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
18589	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
18590	}
18591	break;
18592	}
18593	}
18594	// This is a register plus the @lo relocation. The base is the register
18595	// and the displacement is the global address.
18596	else if (Flags & PPC::MOF_RPlusLo) {
18597	Disp = N.getOperand(i: `1`).getOperand(i: `0`); // The global address.
18598	assert(Disp.getOpcode() == ISD::TargetGlobalAddress \|\|
18599	Disp.getOpcode() == ISD::TargetGlobalTLSAddress \|\|
18600	Disp.getOpcode() == ISD::TargetConstantPool \|\|
18601	Disp.getOpcode() == ISD::TargetJumpTable);
18602	Base = N.getOperand(i: `0`);
18603	break;
18604	}
18605	// This is a constant address at most 32 bits. The base will be
18606	// zero or load-immediate-shifted and the displacement will be
18607	// the low 16 bits of the address.
18608	else if (Flags & PPC::MOF_AddrIsSImm32) {
18609	auto *CN = cast<ConstantSDNode>(Val&: N);
18610	EVT CNType = CN->getValueType(ResNo: `0`);
18611	uint64_t CNImm = CN->getZExtValue();
18612	// If this address fits entirely in a 16-bit sext immediate field, codegen
18613	// this as "d, 0".
18614	int16_t Imm;
18615	if (isIntS16Immediate(N: CN, Imm) && (!Align \|\| isAligned(Lhs: *Align, SizeInBytes: Imm))) {
18616	Disp = DAG.getTargetConstant(Val: Imm, DL, VT: CNType);
18617	Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18618	CNType);
18619	break;
18620	}
18621	// Handle 32-bit sext immediate with LIS + Addr mode.
18622	if ((CNType == MVT::i32 \|\| isInt<`32`>(CNImm)) &&
18623	(!Align \|\| isAligned(*Align, CNImm))) {
18624	int32_t Addr = (int32_t)CNImm;
18625	// Otherwise, break this down into LIS + Disp.
18626	Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18627	Base =
18628	DAG.getTargetConstant((Addr - (int16_t)Addr) >> `16`, DL, MVT::i32);
18629	uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18630	Base = SDValue (DAG.getMachineNode(Opcode: LIS, dl: DL, VT: CNType, Op1: Base), `0`);
18631	break;
18632	}
18633	}
18634	// Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18635	Disp = DAG.getTargetConstant(Val: `0`, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
18636	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N)) {
18637	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
18638	fixupFuncForFI(DAG, FrameIdx: FI->getIndex(), VT: N.getValueType());
18639	} else
18640	Base = N;
18641	break;
18642	}
18643	case PPC::AM_PrefixDForm: {
18644	int64_t Imm34 = `0`;
18645	unsigned Opcode = N.getOpcode();
18646	if (((Opcode == ISD::ADD) \|\| (Opcode == ISD::OR)) &&
18647	(isIntS34Immediate(Op: N.getOperand(i: `1`), Imm&: Imm34))) {
18648	// N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18649	Disp = DAG.getTargetConstant(Val: Imm34, DL, VT: N.getValueType());
18650	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val: N.getOperand(i: `0`)))
18651	Base = DAG.getTargetFrameIndex(FI: FI->getIndex(), VT: N.getValueType());
18652	else
18653	Base = N.getOperand(i: `0`);
18654	} else if (isIntS34Immediate(Op: N, Imm&: Imm34)) {
18655	// The address is a 34-bit signed immediate.
18656	Disp = DAG.getTargetConstant(Val: Imm34, DL, VT: N.getValueType());
18657	Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18658	}
18659	break;
18660	}
18661	case PPC::AM_PCRel: {
18662	// When selecting PC-Relative instructions, "Base" is not utilized as
18663	// we select the address as [PC+imm].
18664	Disp = N;
18665	break;
18666	}
18667	case PPC::AM_None:
18668	break;
18669	default: { // By default, X-Form is always available to be selected.
18670	// When a frame index is not aligned, we also match by XForm.
18671	FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: N);
18672	Base = FI ? N : N.getOperand(i: `1`);
18673	Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18674	N.getValueType())
18675	: N.getOperand(`0`);
18676	break;
18677	}
18678	}
18679	return Mode;
18680	}
18681
18682	CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
18683	bool Return,
18684	bool IsVarArg) const {
18685	switch (CC) {
18686	case CallingConv::Cold:
18687	return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18688	default:
18689	return CC_PPC64_ELF;
18690	}
18691	}
18692
18693	bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18694	return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18695	}
18696
18697	TargetLowering::AtomicExpansionKind
18698	PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
18699	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18700	if (shouldInlineQuadwordAtomics() && Size == `128`)
18701	return AtomicExpansionKind::MaskedIntrinsic;
18702
18703	switch (AI->getOperation()) {
18704	case AtomicRMWInst::UIncWrap:
18705	case AtomicRMWInst::UDecWrap:
18706	return AtomicExpansionKind::CmpXChg;
18707	default:
18708	return TargetLowering::shouldExpandAtomicRMWInIR(RMW: AI);
18709	}
18710
18711	llvm_unreachable("unreachable atomicrmw operation");
18712	}
18713
18714	TargetLowering::AtomicExpansionKind
18715	PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst AI) const* {
18716	unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18717	if (shouldInlineQuadwordAtomics() && Size == `128`)
18718	return AtomicExpansionKind::MaskedIntrinsic;
18719	return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
18720	}
18721
18722	static Intrinsic::ID
18723	getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
18724	switch (BinOp) {
18725	default:
18726	llvm_unreachable("Unexpected AtomicRMW BinOp");
18727	case AtomicRMWInst::Xchg:
18728	return Intrinsic::ppc_atomicrmw_xchg_i128;
18729	case AtomicRMWInst::Add:
18730	return Intrinsic::ppc_atomicrmw_add_i128;
18731	case AtomicRMWInst::Sub:
18732	return Intrinsic::ppc_atomicrmw_sub_i128;
18733	case AtomicRMWInst::And:
18734	return Intrinsic::ppc_atomicrmw_and_i128;
18735	case AtomicRMWInst::Or:
18736	return Intrinsic::ppc_atomicrmw_or_i128;
18737	case AtomicRMWInst::Xor:
18738	return Intrinsic::ppc_atomicrmw_xor_i128;
18739	case AtomicRMWInst::Nand:
18740	return Intrinsic::ppc_atomicrmw_nand_i128;
18741	}
18742	}
18743
18744	Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18745	IRBuilderBase &Builder, AtomicRMWInst AI, Value AlignedAddr, Value *Incr,
18746	Value Mask, Value ShiftAmt, AtomicOrdering Ord) const {
18747	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18748	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18749	Type *ValTy = Incr->getType();
18750	assert(ValTy->getPrimitiveSizeInBits() == `128`);
18751	Function *RMW = Intrinsic::getDeclaration(
18752	M, id: getIntrinsicForAtomicRMWBinOp128(BinOp: AI->getOperation()));
18753	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
18754	Value *IncrLo = Builder.CreateTrunc(V: Incr, DestTy: Int64Ty, Name: "incr_lo");
18755	Value *IncrHi =
18756	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Incr, RHS: `64`), DestTy: Int64Ty, Name: "incr_hi");
18757	Value *LoHi = Builder.CreateCall(Callee: RMW, Args: {AlignedAddr, IncrLo, IncrHi});
18758	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
18759	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
18760	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
18761	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
18762	return Builder.CreateOr(
18763	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
18764	}
18765
18766	Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18767	IRBuilderBase &Builder, AtomicCmpXchgInst CI, Value AlignedAddr,
18768	Value CmpVal, Value NewVal, Value Mask, AtomicOrdering Ord) const* {
18769	assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18770	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18771	Type *ValTy = CmpVal->getType();
18772	assert(ValTy->getPrimitiveSizeInBits() == `128`);
18773	Function *IntCmpXchg =
18774	Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18775	Type *Int64Ty = Type::getInt64Ty(C&: M->getContext());
18776	Value *CmpLo = Builder.CreateTrunc(V: CmpVal, DestTy: Int64Ty, Name: "cmp_lo");
18777	Value *CmpHi =
18778	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: CmpVal, RHS: `64`), DestTy: Int64Ty, Name: "cmp_hi");
18779	Value *NewLo = Builder.CreateTrunc(V: NewVal, DestTy: Int64Ty, Name: "new_lo");
18780	Value *NewHi =
18781	Builder.CreateTrunc(V: Builder.CreateLShr(LHS: NewVal, RHS: `64`), DestTy: Int64Ty, Name: "new_hi");
18782	emitLeadingFence(Builder, Inst: CI, Ord);
18783	Value *LoHi =
18784	Builder.CreateCall(Callee: IntCmpXchg, Args: {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18785	emitTrailingFence(Builder, Inst: CI, Ord);
18786	Value *Lo = Builder.CreateExtractValue(Agg: LoHi, Idxs: `0`, Name: "lo");
18787	Value *Hi = Builder.CreateExtractValue(Agg: LoHi, Idxs: `1`, Name: "hi");
18788	Lo = Builder.CreateZExt(V: Lo, DestTy: ValTy, Name: "lo64");
18789	Hi = Builder.CreateZExt(V: Hi, DestTy: ValTy, Name: "hi64");
18790	return Builder.CreateOr(
18791	LHS: Lo, RHS: Builder.CreateShl(LHS: Hi, RHS: ConstantInt::get(Ty: ValTy, V: `64`)), Name: "val64");
18792	}
18793

source code of llvm/lib/Target/PowerPC/PPCISelLowering.cpp