PPCISelDAGToDAG.cpp source code [llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp]

1	//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines a pattern matching instruction selector for PowerPC,
10	// converting from a legalized dag to a PPC dag.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "MCTargetDesc/PPCMCTargetDesc.h"
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCISelLowering.h"
18	#include "PPCMachineFunctionInfo.h"
19	#include "PPCSubtarget.h"
20	#include "PPCTargetMachine.h"
21	#include "llvm/ADT/APInt.h"
22	#include "llvm/ADT/APSInt.h"
23	#include "llvm/ADT/DenseMap.h"
24	#include "llvm/ADT/STLExtras.h"
25	#include "llvm/ADT/SmallPtrSet.h"
26	#include "llvm/ADT/SmallVector.h"
27	#include "llvm/ADT/Statistic.h"
28	#include "llvm/Analysis/BranchProbabilityInfo.h"
29	#include "llvm/CodeGen/FunctionLoweringInfo.h"
30	#include "llvm/CodeGen/ISDOpcodes.h"
31	#include "llvm/CodeGen/MachineBasicBlock.h"
32	#include "llvm/CodeGen/MachineFrameInfo.h"
33	#include "llvm/CodeGen/MachineFunction.h"
34	#include "llvm/CodeGen/MachineInstrBuilder.h"
35	#include "llvm/CodeGen/MachineRegisterInfo.h"
36	#include "llvm/CodeGen/SelectionDAG.h"
37	#include "llvm/CodeGen/SelectionDAGISel.h"
38	#include "llvm/CodeGen/SelectionDAGNodes.h"
39	#include "llvm/CodeGen/TargetInstrInfo.h"
40	#include "llvm/CodeGen/TargetRegisterInfo.h"
41	#include "llvm/CodeGen/ValueTypes.h"
42	#include "llvm/CodeGenTypes/MachineValueType.h"
43	#include "llvm/IR/BasicBlock.h"
44	#include "llvm/IR/DebugLoc.h"
45	#include "llvm/IR/Function.h"
46	#include "llvm/IR/GlobalValue.h"
47	#include "llvm/IR/InlineAsm.h"
48	#include "llvm/IR/InstrTypes.h"
49	#include "llvm/IR/IntrinsicsPowerPC.h"
50	#include "llvm/IR/Module.h"
51	#include "llvm/Support/Casting.h"
52	#include "llvm/Support/CodeGen.h"
53	#include "llvm/Support/CommandLine.h"
54	#include "llvm/Support/Compiler.h"
55	#include "llvm/Support/Debug.h"
56	#include "llvm/Support/ErrorHandling.h"
57	#include "llvm/Support/KnownBits.h"
58	#include "llvm/Support/MathExtras.h"
59	#include "llvm/Support/raw_ostream.h"
60	#include <algorithm>
61	#include <cassert>
62	#include <cstdint>
63	#include <iterator>
64	#include <limits>
65	#include <memory>
66	#include <new>
67	#include <tuple>
68	#include <utility>
69
70	using namespace llvm;
71
72	#define DEBUG_TYPE "ppc-isel"
73	#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75	STATISTIC(NumSextSetcc,
76	"Number of (sext(setcc)) nodes expanded into GPR sequence.");
77	STATISTIC(NumZextSetcc,
78	"Number of (zext(setcc)) nodes expanded into GPR sequence.");
79	STATISTIC(SignExtensionsAdded,
80	"Number of sign extensions for compare inputs added.");
81	STATISTIC(ZeroExtensionsAdded,
82	"Number of zero extensions for compare inputs added.");
83	STATISTIC(NumLogicOpsOnComparison,
84	"Number of logical ops on i1 values calculated in GPR.");
85	STATISTIC(OmittedForNonExtendUses,
86	"Number of compares not eliminated as they have non-extending uses.");
87	STATISTIC(NumP9Setb,
88	"Number of compares lowered to setb.");
89
90	// FIXME: Remove this once the bug has been fixed!
91	cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92	cl::desc ("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94	static cl::opt<bool>
95	UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(Val: true),
96	cl::desc ("use aggressive ppc isel for bit permutations"),
97	cl::Hidden);
98	static cl::opt<bool> BPermRewriterNoMasking(
99	"ppc-bit-perm-rewriter-stress-rotates",
100	cl::desc ("stress rotate selection in aggressive ppc isel for "
101	"bit permutations"),
102	cl::Hidden);
103
104	static cl::opt<bool> EnableBranchHint(
105	"ppc-use-branch-hint", cl::init(Val: true),
106	cl::desc ("Enable static hinting of branches on ppc"),
107	cl::Hidden);
108
109	static cl::opt<bool> EnableTLSOpt(
110	"ppc-tls-opt", cl::init(Val: true),
111	cl::desc ("Enable tls optimization peephole"),
112	cl::Hidden);
113
114	enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
115	ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
116	ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
117
118	static cl::opt<ICmpInGPRType> CmpInGPR(
119	"ppc-gpr-icmps", cl::Hidden, cl::init(Val: ICGPR_All),
120	cl::desc ("Specify the types of comparisons to emit GPR-only code for."),
121	cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122	clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123	clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124	clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125	clEnumValN(ICGPR_NonExtIn, "nonextin",
126	"Only comparisons where inputs don't need [sz]ext."),
127	clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128	clEnumValN(ICGPR_ZextI32, "zexti32",
129	"Only i32 comparisons with zext result."),
130	clEnumValN(ICGPR_ZextI64, "zexti64",
131	"Only i64 comparisons with zext result."),
132	clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133	clEnumValN(ICGPR_SextI32, "sexti32",
134	"Only i32 comparisons with sext result."),
135	clEnumValN(ICGPR_SextI64, "sexti64",
136	"Only i64 comparisons with sext result.")));
137	namespace {
138
139	//===--------------------------------------------------------------------===//
140	/// PPCDAGToDAGISel - PPC specific code to select PPC machine
141	/// instructions for SelectionDAG operations.
142	///
143	class PPCDAGToDAGISel : public SelectionDAGISel {
144	const PPCTargetMachine &TM;
145	const PPCSubtarget Subtarget = nullptr*;
146	const PPCTargetLowering PPCLowering = nullptr*;
147	unsigned GlobalBaseReg = `0`;
148
149	public:
150	static char ID;
151
152	PPCDAGToDAGISel() = delete;
153
154	explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
155	: SelectionDAGISel (ID, tm, OptLevel), TM(tm) {}
156
157	bool runOnMachineFunction(MachineFunction &MF) override {
158	// Make sure we re-emit a set of the global base reg if necessary
159	GlobalBaseReg = `0`;
160	Subtarget = &MF.getSubtarget<PPCSubtarget>();
161	PPCLowering = Subtarget->getTargetLowering();
162	if (Subtarget->hasROPProtect()) {
163	// Create a place on the stack for the ROP Protection Hash.
164	// The ROP Protection Hash will always be 8 bytes and aligned to 8
165	// bytes.
166	MachineFrameInfo &MFI = MF.getFrameInfo();
167	PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
168	const int Result = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
169	FI->setROPProtectionHashSaveIndex(Result);
170	}
171	SelectionDAGISel::runOnMachineFunction(MF);
172
173	return true;
174	}
175
176	void PreprocessISelDAG() override;
177	void PostprocessISelDAG() override;
178
179	/// getI16Imm - Return a target constant with the specified value, of type
180	/// i16.
181	inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
182	return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
183	}
184
185	/// getI32Imm - Return a target constant with the specified value, of type
186	/// i32.
187	inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
188	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
189	}
190
191	/// getI64Imm - Return a target constant with the specified value, of type
192	/// i64.
193	inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
194	return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
195	}
196
197	/// getSmallIPtrImm - Return a target constant of pointer type.
198	inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
199	return CurDAG->getTargetConstant(
200	Val: Imm, DL: dl, VT: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()));
201	}
202
203	/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
204	/// rotate and mask opcode and mask operation.
205	static bool isRotateAndMask(SDNode N, unsigned* Mask, bool isShiftMask,
206	unsigned &SH, unsigned &MB, unsigned &ME);
207
208	/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
209	/// base register. Return the virtual register that holds this value.
210	SDNode *getGlobalBaseReg();
211
212	void selectFrameIndex(SDNode SN, SDNode N, uint64_t Offset = `0`);
213
214	// Select - Convert the specified operand from a target-independent to a
215	// target-specific node if it hasn't already been changed.
216	void Select(SDNode *N) override;
217
218	bool tryBitfieldInsert(SDNode *N);
219	bool tryBitPermutation(SDNode *N);
220	bool tryIntCompareInGPR(SDNode *N);
221
222	// tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
223	// an X-Form load instruction with the offset being a relocation coming from
224	// the PPCISD::ADD_TLS.
225	bool tryTLSXFormLoad(LoadSDNode *N);
226	// tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
227	// an X-Form store instruction with the offset being a relocation coming from
228	// the PPCISD::ADD_TLS.
229	bool tryTLSXFormStore(StoreSDNode *N);
230	/// SelectCC - Select a comparison of the specified values with the
231	/// specified condition code, returning the CR# of the expression.
232	SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
233	const SDLoc &dl, SDValue Chain = SDValue ());
234
235	/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
236	/// immediate field. Note that the operand at this point is already the
237	/// result of a prior SelectAddressRegImm call.
238	bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
239	if (N.getOpcode() == ISD::TargetConstant \|\|
240	N.getOpcode() == ISD::TargetGlobalAddress) {
241	Out = N;
242	return true;
243	}
244
245	return false;
246	}
247
248	/// SelectDSForm - Returns true if address N can be represented by the
249	/// addressing mode of DSForm instructions (a base register, plus a signed
250	/// 16-bit displacement that is a multiple of 4.
251	bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
252	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
253	Align: Align (`4`)) == PPC::AM_DSForm;
254	}
255
256	/// SelectDQForm - Returns true if address N can be represented by the
257	/// addressing mode of DQForm instructions (a base register, plus a signed
258	/// 16-bit displacement that is a multiple of 16.
259	bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
260	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
261	Align: Align (`16`)) == PPC::AM_DQForm;
262	}
263
264	/// SelectDForm - Returns true if address N can be represented by
265	/// the addressing mode of DForm instructions (a base register, plus a
266	/// signed 16-bit immediate.
267	bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
268	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
269	Align: std::nullopt) == PPC::AM_DForm;
270	}
271
272	/// SelectPCRelForm - Returns true if address N can be represented by
273	/// PC-Relative addressing mode.
274	bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
275	SDValue &Base) {
276	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
277	Align: std::nullopt) == PPC::AM_PCRel;
278	}
279
280	/// SelectPDForm - Returns true if address N can be represented by Prefixed
281	/// DForm addressing mode (a base register, plus a signed 34-bit immediate.
282	bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
283	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
284	Align: std::nullopt) ==
285	PPC::AM_PrefixDForm;
286	}
287
288	/// SelectXForm - Returns true if address N can be represented by the
289	/// addressing mode of XForm instructions (an indexed [r+r] operation).
290	bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
291	return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, DAG&: *CurDAG,
292	Align: std::nullopt) == PPC::AM_XForm;
293	}
294
295	/// SelectForceXForm - Given the specified address, force it to be
296	/// represented as an indexed [r+r] operation (an XForm instruction).
297	bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
298	SDValue &Base) {
299	return PPCLowering->SelectForceXFormMode(N, Disp, Base, DAG&: *CurDAG) ==
300	PPC::AM_XForm;
301	}
302
303	/// SelectAddrIdx - Given the specified address, check to see if it can be
304	/// represented as an indexed [r+r] operation.
305	/// This is for xform instructions whose associated displacement form is D.
306	/// The last parameter \p 0 means associated D form has no requirment for 16
307	/// bit signed displacement.
308	/// Returns false if it can be represented by [r+imm], which are preferred.
309	bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
310	return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
311	EncodingAlignment: std::nullopt);
312	}
313
314	/// SelectAddrIdx4 - Given the specified address, check to see if it can be
315	/// represented as an indexed [r+r] operation.
316	/// This is for xform instructions whose associated displacement form is DS.
317	/// The last parameter \p 4 means associated DS form 16 bit signed
318	/// displacement must be a multiple of 4.
319	/// Returns false if it can be represented by [r+imm], which are preferred.
320	bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
321	return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
322	EncodingAlignment: Align (`4`));
323	}
324
325	/// SelectAddrIdx16 - Given the specified address, check to see if it can be
326	/// represented as an indexed [r+r] operation.
327	/// This is for xform instructions whose associated displacement form is DQ.
328	/// The last parameter \p 16 means associated DQ form 16 bit signed
329	/// displacement must be a multiple of 16.
330	/// Returns false if it can be represented by [r+imm], which are preferred.
331	bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
332	return PPCLowering->SelectAddressRegReg(N, Base, Index, DAG&: *CurDAG,
333	EncodingAlignment: Align (`16`));
334	}
335
336	/// SelectAddrIdxOnly - Given the specified address, force it to be
337	/// represented as an indexed [r+r] operation.
338	bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
339	return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, DAG&: *CurDAG);
340	}
341
342	/// SelectAddrImm - Returns true if the address N can be represented by
343	/// a base register plus a signed 16-bit displacement [r+imm].
344	/// The last parameter \p 0 means D form has no requirment for 16 bit signed
345	/// displacement.
346	bool SelectAddrImm(SDValue N, SDValue &Disp,
347	SDValue &Base) {
348	return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG,
349	EncodingAlignment: std::nullopt);
350	}
351
352	/// SelectAddrImmX4 - Returns true if the address N can be represented by
353	/// a base register plus a signed 16-bit displacement that is a multiple of
354	/// 4 (last parameter). Suitable for use by STD and friends.
355	bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
356	return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG, EncodingAlignment: Align (`4`));
357	}
358
359	/// SelectAddrImmX16 - Returns true if the address N can be represented by
360	/// a base register plus a signed 16-bit displacement that is a multiple of
361	/// 16(last parameter). Suitable for use by STXV and friends.
362	bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
363	return PPCLowering->SelectAddressRegImm(N, Disp, Base, DAG&: *CurDAG,
364	EncodingAlignment: Align (`16`));
365	}
366
367	/// SelectAddrImmX34 - Returns true if the address N can be represented by
368	/// a base register plus a signed 34-bit displacement. Suitable for use by
369	/// PSTXVP and friends.
370	bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
371	return PPCLowering->SelectAddressRegImm34(N, Disp, Base, DAG&: *CurDAG);
372	}
373
374	// Select an address into a single register.
375	bool SelectAddr(SDValue N, SDValue &Base) {
376	Base = N;
377	return true;
378	}
379
380	bool SelectAddrPCRel(SDValue N, SDValue &Base) {
381	return PPCLowering->SelectAddressPCRel(N, Base);
382	}
383
384	/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
385	/// inline asm expressions. It is always correct to compute the value into
386	/// a register. The case of adding a (possibly relocatable) constant to a
387	/// register can be improved, but it is wrong to substitute Reg+Reg for
388	/// Reg in an asm, because the load or store opcode would have to change.
389	bool SelectInlineAsmMemoryOperand(const SDValue &Op,
390	InlineAsm::ConstraintCode ConstraintID,
391	std::vector<SDValue> &OutOps) override {
392	switch(ConstraintID) {
393	default:
394	errs() << "ConstraintID: "
395	<< InlineAsm::getMemConstraintName(C: ConstraintID) << "\n";
396	llvm_unreachable("Unexpected asm memory constraint");
397	case InlineAsm::ConstraintCode::es:
398	case InlineAsm::ConstraintCode::m:
399	case InlineAsm::ConstraintCode::o:
400	case InlineAsm::ConstraintCode::Q:
401	case InlineAsm::ConstraintCode::Z:
402	case InlineAsm::ConstraintCode::Zy:
403	// We need to make sure that this one operand does not end up in r0
404	// (because we might end up lowering this as 0(%op)).
405	const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
406	const TargetRegisterClass TRC = TRI->getPointerRegClass(MF: MF, /Kind=/`1`);
407	SDLoc dl(Op);
408	SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
409	SDValue NewOp =
410	SDValue (CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS,
411	dl, VT: Op.getValueType(),
412	Op1: Op, Op2: RC), `0`);
413
414	OutOps.push_back(NewOp);
415	return false;
416	}
417	return true;
418	}
419
420	// Include the pieces autogenerated from the target description.
421	#include "PPCGenDAGISel.inc"
422
423	private:
424	bool trySETCC(SDNode *N);
425	bool tryFoldSWTestBRCC(SDNode *N);
426	bool trySelectLoopCountIntrinsic(SDNode *N);
427	bool tryAsSingleRLDICL(SDNode *N);
428	bool tryAsSingleRLDCL(SDNode *N);
429	bool tryAsSingleRLDICR(SDNode *N);
430	bool tryAsSingleRLWINM(SDNode *N);
431	bool tryAsSingleRLWINM8(SDNode *N);
432	bool tryAsSingleRLWIMI(SDNode *N);
433	bool tryAsPairOfRLDICL(SDNode *N);
434	bool tryAsSingleRLDIMI(SDNode *N);
435
436	void PeepholePPC64();
437	void PeepholePPC64ZExt();
438	void PeepholeCROps();
439
440	SDValue combineToCMPB(SDNode *N);
441	void foldBoolExts(SDValue &Res, SDNode *&N);
442
443	bool AllUsersSelectZero(SDNode *N);
444	void SwapAllSelectUsers(SDNode *N);
445
446	bool isOffsetMultipleOf(SDNode N, unsigned* Val) const;
447	void transferMemOperands(SDNode N, SDNode Result);
448	};
449
450	} // end anonymous namespace
451
452	char PPCDAGToDAGISel::ID = `0`;
453
454	INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
455
456	/// getGlobalBaseReg - Output the instructions required to put the
457	/// base address to use for accessing globals into a register.
458	///
459	SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
460	if (!GlobalBaseReg) {
461	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
462	// Insert the set of GlobalBaseReg into the first MBB of the function
463	MachineBasicBlock &FirstMBB = MF->front();
464	MachineBasicBlock::iterator MBBI = FirstMBB.begin();
465	const Module *M = MF->getFunction().getParent();
466	DebugLoc dl;
467
468	if (PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()) == MVT::i32) {
469	if (Subtarget->isTargetELF()) {
470	GlobalBaseReg = PPC::R30;
471	if (!Subtarget->isSecurePlt() &&
472	M->getPICLevel() == PICLevel::SmallPIC) {
473	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MoveGOTtoLR));
474	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MFLR), GlobalBaseReg);
475	MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
476	} else {
477	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MovePCtoLR));
478	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MFLR), GlobalBaseReg);
479	Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
480	BuildMI(FirstMBB, MBBI, dl,
481	TII.get(PPC::Opcode: UpdateGBR), GlobalBaseReg)
482	.addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
483	MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
484	}
485	} else {
486	GlobalBaseReg =
487	RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
488	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MovePCtoLR));
489	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MFLR), GlobalBaseReg);
490	}
491	} else {
492	// We must ensure that this sequence is dominated by the prologue.
493	// FIXME: This is a bit of a big hammer since we don't get the benefits
494	// of shrink-wrapping whenever we emit this instruction. Considering
495	// this is used in any function where we emit a jump table, this may be
496	// a significant limitation. We should consider inserting this in the
497	// block where it is used and then commoning this sequence up if it
498	// appears in multiple places.
499	// Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
500	// MovePCtoLR8.
501	MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
502	GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
503	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MovePCtoLR8));
504	BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::Opcode: MFLR8), GlobalBaseReg);
505	}
506	}
507	return CurDAG->getRegister(Reg: GlobalBaseReg,
508	VT: PPCLowering->getPointerTy(DL: CurDAG->getDataLayout()))
509	.getNode();
510	}
511
512	// Check if a SDValue has the toc-data attribute.
513	static bool hasTocDataAttr(SDValue Val) {
514	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
515	if (!GA)
516	return false;
517
518	const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(Val: GA->getGlobal());
519	if (!GV)
520	return false;
521
522	if (!GV->hasAttribute(Kind: "toc-data"))
523	return false;
524	return true;
525	}
526
527	static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget,
528	const TargetMachine &TM,
529	const SDNode *Node) {
530	// If there isn't an attribute to override the module code model
531	// this will be the effective code model.
532	CodeModel::Model ModuleModel = TM.getCodeModel();
533
534	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val: Node->getOperand(Num: `0`));
535	if (!GA)
536	return ModuleModel;
537
538	const GlobalValue *GV = GA->getGlobal();
539	if (!GV)
540	return ModuleModel;
541
542	return Subtarget.getCodeModel(TM, GV);
543	}
544
545	/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
546	/// operand. If so Imm will receive the 32-bit value.
547	static bool isInt32Immediate(SDNode N, unsigned* &Imm) {
548	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i32) {
549	Imm = N->getAsZExtVal();
550	return true;
551	}
552	return false;
553	}
554
555	/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
556	/// operand. If so Imm will receive the 64-bit value.
557	static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
558	if (N->getOpcode() == ISD::Constant && N->getValueType(ResNo: `0`) == MVT::i64) {
559	Imm = N->getAsZExtVal();
560	return true;
561	}
562	return false;
563	}
564
565	// isInt32Immediate - This method tests to see if a constant operand.
566	// If so Imm will receive the 32 bit value.
567	static bool isInt32Immediate(SDValue N, unsigned &Imm) {
568	return isInt32Immediate(N: N.getNode(), Imm);
569	}
570
571	/// isInt64Immediate - This method tests to see if the value is a 64-bit
572	/// constant operand. If so Imm will receive the 64-bit value.
573	static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
574	return isInt64Immediate(N: N.getNode(), Imm);
575	}
576
577	static unsigned getBranchHint(unsigned PCC,
578	const FunctionLoweringInfo &FuncInfo,
579	const SDValue &DestMBB) {
580	assert(isa<BasicBlockSDNode>(DestMBB));
581
582	if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
583
584	const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
585	const Instruction *BBTerm = BB->getTerminator();
586
587	if (BBTerm->getNumSuccessors() != `2`) return PPC::BR_NO_HINT;
588
589	const BasicBlock *TBB = BBTerm->getSuccessor(Idx: `0`);
590	const BasicBlock *FBB = BBTerm->getSuccessor(Idx: `1`);
591
592	auto TProb = FuncInfo.BPI->getEdgeProbability(Src: BB, Dst: TBB);
593	auto FProb = FuncInfo.BPI->getEdgeProbability(Src: BB, Dst: FBB);
594
595	// We only want to handle cases which are easy to predict at static time, e.g.
596	// C++ throw statement, that is very likely not taken, or calling never
597	// returned function, e.g. stdlib exit(). So we set Threshold to filter
598	// unwanted cases.
599	//
600	// Below is LLVM branch weight table, we only want to handle case 1, 2
601	//
602	// Case Taken:Nontaken Example
603	// 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
604	// 2. Invoke-terminating 1:1048575
605	// 3. Coldblock 4:64 __builtin_expect
606	// 4. Loop Branch 124:4 For loop
607	// 5. PH/ZH/FPH 20:12
608	const uint32_t Threshold = `10000`;
609
610	if (std::max(a: TProb, b: FProb) / Threshold < std::min(a: TProb, b: FProb))
611	return PPC::BR_NO_HINT;
612
613	LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
614	<< "::" << BB->getName() << "'\n"
615	<< " -> " << TBB->getName() << ": " << TProb << "\n"
616	<< " -> " << FBB->getName() << ": " << FProb << "\n");
617
618	const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(Val: DestMBB);
619
620	// If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
621	// because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
622	if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
623	std::swap(a&: TProb, b&: FProb);
624
625	return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
626	}
627
628	// isOpcWithIntImmediate - This method tests to see if the node is a specific
629	// opcode and that it has a immediate integer right operand.
630	// If so Imm will receive the 32 bit value.
631	static bool isOpcWithIntImmediate(SDNode N, unsigned* Opc, unsigned& Imm) {
632	return N->getOpcode() == Opc
633	&& isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm);
634	}
635
636	void PPCDAGToDAGISel::selectFrameIndex(SDNode SN, SDNode N, uint64_t Offset) {
637	SDLoc dl(SN);
638	int FI = cast<FrameIndexSDNode>(Val: N)->getIndex();
639	SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT: N->getValueType(ResNo: `0`));
640	unsigned Opc = N->getValueType(ResNo: `0`) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
641	if (SN->hasOneUse())
642	CurDAG->SelectNodeTo(N: SN, MachineOpc: Opc, VT: N->getValueType(ResNo: `0`), Op1: TFI,
643	Op2: getSmallIPtrImm(Imm: Offset, dl));
644	else
645	ReplaceNode(F: SN, T: CurDAG->getMachineNode(Opcode: Opc, dl, VT: N->getValueType(ResNo: `0`), Op1: TFI,
646	Op2: getSmallIPtrImm(Imm: Offset, dl)));
647	}
648
649	bool PPCDAGToDAGISel::isRotateAndMask(SDNode N, unsigned* Mask,
650	bool isShiftMask, unsigned &SH,
651	unsigned &MB, unsigned &ME) {
652	// Don't even go down this path for i64, since different logic will be
653	// necessary for rldicl/rldicr/rldimi.
654	if (N->getValueType(ResNo: `0`) != MVT::i32)
655	return false;
656
657	unsigned Shift = `32`;
658	unsigned Indeterminant = ~`0`; // bit mask marking indeterminant results
659	unsigned Opcode = N->getOpcode();
660	if (N->getNumOperands() != `2` \|\|
661	!isInt32Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Shift) \|\| (Shift > `31`))
662	return false;
663
664	if (Opcode == ISD::SHL) {
665	// apply shift left to mask if it comes first
666	if (isShiftMask) Mask = Mask << Shift;
667	// determine which bits are made indeterminant by shift
668	Indeterminant = ~(`0xFFFFFFFFu` << Shift);
669	} else if (Opcode == ISD::SRL) {
670	// apply shift right to mask if it comes first
671	if (isShiftMask) Mask = Mask >> Shift;
672	// determine which bits are made indeterminant by shift
673	Indeterminant = ~(`0xFFFFFFFFu` >> Shift);
674	// adjust for the left rotate
675	Shift = `32` - Shift;
676	} else if (Opcode == ISD::ROTL) {
677	Indeterminant = `0`;
678	} else {
679	return false;
680	}
681
682	// if the mask doesn't intersect any Indeterminant bits
683	if (Mask && !(Mask & Indeterminant)) {
684	SH = Shift & `31`;
685	// make sure the mask is still a mask (wrap arounds may not be)
686	return isRunOfOnes(Val: Mask, MB, ME);
687	}
688	return false;
689	}
690
691	// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
692	// instruction use the thread pointer.
693	static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) {
694	assert(
695	Base.getOpcode() == PPCISD::ADD_TLS &&
696	"Only expecting the ADD_TLS instruction to acquire the thread pointer!");
697	const PPCSubtarget &Subtarget =
698	CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
699	SDValue ADDTLSOp1 = Base.getOperand(i: `0`);
700	unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
701
702	// Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
703	//
704	// Although ADD_TLS does not explicitly use the thread pointer
705	// register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
706	// instruction will have a relocation specifier, @got@tprel, that is used to
707	// generate a GOT entry. The linker replaces this entry with an offset for a
708	// for a thread local variable, which will be relative to the thread pointer.
709	if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
710	return true;
711	// When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
712	// node is produced instead to represent the aforementioned situation.
713	LoadSDNode *LD = dyn_cast<LoadSDNode>(Val&: ADDTLSOp1);
714	if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
715	return true;
716
717	// A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
718	// to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
719	// later returning it into R3.
720	if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
721	return true;
722
723	// The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
724	RegisterSDNode *AddFirstOpReg =
725	dyn_cast_or_null<RegisterSDNode>(Val: ADDTLSOp1.getNode());
726	if (AddFirstOpReg &&
727	AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
728	return true;
729
730	return false;
731	}
732
733	// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
734	// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
735	// operation, can be optimized to use an X-Form load or store, allowing the
736	// ADD_TLS node to be removed completely.
737	static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) {
738
739	// Do not do this transformation at -O0.
740	if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
741	return false;
742
743	// In order to perform this optimization inside tryTLSXForm[Load\|Store],
744	// Base is expected to be an ADD_TLS node.
745	if (Base.getOpcode() != PPCISD::ADD_TLS)
746	return false;
747	for (auto *ADDTLSUse : Base.getNode()->uses()) {
748	// The optimization to convert the D-Form load/store into its X-Form
749	// counterpart should only occur if the source value offset of the load/
750	// store is 0. This also means that The offset should always be undefined.
751	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: ADDTLSUse)) {
752	if (LD->getSrcValueOffset() != `0` \|\| !LD->getOffset().isUndef())
753	return false;
754	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: ADDTLSUse)) {
755	if (ST->getSrcValueOffset() != `0` \|\| !ST->getOffset().isUndef())
756	return false;
757	} else // Don't optimize if there are ADD_TLS users that aren't load/stores.
758	return false;
759	}
760
761	if (Base.getOperand(i: `1`).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
762	return false;
763
764	// Does the ADD_TLS node of the load/store use the thread pointer?
765	// If the thread pointer is not used as one of the operands of ADD_TLS,
766	// then this optimization is not valid.
767	return isThreadPointerAcquisitionNode(Base, CurDAG);
768	}
769
770	bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
771	SDValue Base = ST->getBasePtr();
772	if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
773	return false;
774
775	SDLoc dl(ST);
776	EVT MemVT = ST->getMemoryVT();
777	EVT RegVT = ST->getValue().getValueType();
778
779	unsigned Opcode;
780	switch (MemVT.getSimpleVT().SimpleTy) {
781	default:
782	return false;
783	case MVT::i8: {
784	Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
785	break;
786	}
787	case MVT::i16: {
788	Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
789	break;
790	}
791	case MVT::i32: {
792	Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
793	break;
794	}
795	case MVT::i64: {
796	Opcode = PPC::STDXTLS;
797	break;
798	}
799	case MVT::f32: {
800	Opcode = PPC::STFSXTLS;
801	break;
802	}
803	case MVT::f64: {
804	Opcode = PPC::STFDXTLS;
805	break;
806	}
807	}
808	SDValue Chain = ST->getChain();
809	SDVTList VTs = ST->getVTList();
810	SDValue Ops[] = {ST->getValue(), Base.getOperand(i: `0`), Base.getOperand(i: `1`),
811	Chain};
812	SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
813	transferMemOperands(N: ST, Result: MN);
814	ReplaceNode(F: ST, T: MN);
815	return true;
816	}
817
818	bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
819	SDValue Base = LD->getBasePtr();
820	if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
821	return false;
822
823	SDLoc dl(LD);
824	EVT MemVT = LD->getMemoryVT();
825	EVT RegVT = LD->getValueType(ResNo: `0`);
826	bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
827	unsigned Opcode;
828	switch (MemVT.getSimpleVT().SimpleTy) {
829	default:
830	return false;
831	case MVT::i8: {
832	Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
833	break;
834	}
835	case MVT::i16: {
836	if (RegVT == MVT::i32)
837	Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
838	else
839	Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
840	break;
841	}
842	case MVT::i32: {
843	if (RegVT == MVT::i32)
844	Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
845	else
846	Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
847	break;
848	}
849	case MVT::i64: {
850	Opcode = PPC::LDXTLS;
851	break;
852	}
853	case MVT::f32: {
854	Opcode = PPC::LFSXTLS;
855	break;
856	}
857	case MVT::f64: {
858	Opcode = PPC::LFDXTLS;
859	break;
860	}
861	}
862	SDValue Chain = LD->getChain();
863	SDVTList VTs = LD->getVTList();
864	SDValue Ops[] = {Base.getOperand(i: `0`), Base.getOperand(i: `1`), Chain};
865	SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
866	transferMemOperands(N: LD, Result: MN);
867	ReplaceNode(F: LD, T: MN);
868	return true;
869	}
870
871	/// Turn an or of two masked values into the rotate left word immediate then
872	/// mask insert (rlwimi) instruction.
873	bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
874	SDValue Op0 = N->getOperand(Num: `0`);
875	SDValue Op1 = N->getOperand(Num: `1`);
876	SDLoc dl(N);
877
878	KnownBits LKnown = CurDAG->computeKnownBits(Op: Op0);
879	KnownBits RKnown = CurDAG->computeKnownBits(Op: Op1);
880
881	unsigned TargetMask = LKnown.Zero.getZExtValue();
882	unsigned InsertMask = RKnown.Zero.getZExtValue();
883
884	if ((TargetMask \| InsertMask) == `0xFFFFFFFF`) {
885	unsigned Op0Opc = Op0.getOpcode();
886	unsigned Op1Opc = Op1.getOpcode();
887	unsigned Value, SH = `0`;
888	TargetMask = ~TargetMask;
889	InsertMask = ~InsertMask;
890
891	// If the LHS has a foldable shift and the RHS does not, then swap it to the
892	// RHS so that we can fold the shift into the insert.
893	if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
894	if (Op0.getOperand(i: `0`).getOpcode() == ISD::SHL \|\|
895	Op0.getOperand(i: `0`).getOpcode() == ISD::SRL) {
896	if (Op1.getOperand(i: `0`).getOpcode() != ISD::SHL &&
897	Op1.getOperand(i: `0`).getOpcode() != ISD::SRL) {
898	std::swap(a&: Op0, b&: Op1);
899	std::swap(a&: Op0Opc, b&: Op1Opc);
900	std::swap(a&: TargetMask, b&: InsertMask);
901	}
902	}
903	} else if (Op0Opc == ISD::SHL \|\| Op0Opc == ISD::SRL) {
904	if (Op1Opc == ISD::AND && Op1.getOperand(i: `0`).getOpcode() != ISD::SHL &&
905	Op1.getOperand(i: `0`).getOpcode() != ISD::SRL) {
906	std::swap(a&: Op0, b&: Op1);
907	std::swap(a&: Op0Opc, b&: Op1Opc);
908	std::swap(a&: TargetMask, b&: InsertMask);
909	}
910	}
911
912	unsigned MB, ME;
913	if (isRunOfOnes(Val: InsertMask, MB, ME)) {
914	if ((Op1Opc == ISD::SHL \|\| Op1Opc == ISD::SRL) &&
915	isInt32Immediate(N: Op1.getOperand(i: `1`), Imm&: Value)) {
916	Op1 = Op1.getOperand(i: `0`);
917	SH = (Op1Opc == ISD::SHL) ? Value : `32` - Value;
918	}
919	if (Op1Opc == ISD::AND) {
920	// The AND mask might not be a constant, and we need to make sure that
921	// if we're going to fold the masking with the insert, all bits not
922	// know to be zero in the mask are known to be one.
923	KnownBits MKnown = CurDAG->computeKnownBits(Op: Op1.getOperand(i: `1`));
924	bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
925
926	unsigned SHOpc = Op1.getOperand(i: `0`).getOpcode();
927	if ((SHOpc == ISD::SHL \|\| SHOpc == ISD::SRL) && CanFoldMask &&
928	isInt32Immediate(N: Op1.getOperand(i: `0`).getOperand(i: `1`), Imm&: Value)) {
929	// Note that Value must be in range here (less than 32) because
930	// otherwise there would not be any bits set in InsertMask.
931	Op1 = Op1.getOperand(i: `0`).getOperand(i: `0`);
932	SH = (SHOpc == ISD::SHL) ? Value : `32` - Value;
933	}
934	}
935
936	SH &= `31`;
937	SDValue Ops[] = { Op0, Op1, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
938	getI32Imm(Imm: ME, dl) };
939	ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
940	return true;
941	}
942	}
943	return false;
944	}
945
946	static unsigned allUsesTruncate(SelectionDAG CurDAG, SDNode N) {
947	unsigned MaxTruncation = `0`;
948	// Cannot use range-based for loop here as we need the actual use (i.e. we
949	// need the operand number corresponding to the use). A range-based for
950	// will unbox the use and provide an SDNode.*
951	for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
952	Use != UseEnd; ++Use) {
953	unsigned Opc =
954	Use ->isMachineOpcode() ? Use ->getMachineOpcode() : Use ->getOpcode();
955	switch (Opc) {
956	default: return `0`;
957	case ISD::TRUNCATE:
958	if (Use ->isMachineOpcode())
959	return `0`;
960	MaxTruncation =
961	std::max(a: MaxTruncation, b: (unsigned)Use ->getValueType(ResNo: `0`).getSizeInBits());
962	continue;
963	case ISD::STORE: {
964	if (Use ->isMachineOpcode())
965	return `0`;
966	StoreSDNode STN = cast<StoreSDNode>(Val: Use);
967	unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
968	if (MemVTSize == `64` \|\| Use.getOperandNo() != `0`)
969	return `0`;
970	MaxTruncation = std::max(a: MaxTruncation, b: MemVTSize);
971	continue;
972	}
973	case PPC::STW8:
974	case PPC::STWX8:
975	case PPC::STWU8:
976	case PPC::STWUX8:
977	if (Use.getOperandNo() != `0`)
978	return `0`;
979	MaxTruncation = std::max(a: MaxTruncation, b: `32u`);
980	continue;
981	case PPC::STH8:
982	case PPC::STHX8:
983	case PPC::STHU8:
984	case PPC::STHUX8:
985	if (Use.getOperandNo() != `0`)
986	return `0`;
987	MaxTruncation = std::max(a: MaxTruncation, b: `16u`);
988	continue;
989	case PPC::STB8:
990	case PPC::STBX8:
991	case PPC::STBU8:
992	case PPC::STBUX8:
993	if (Use.getOperandNo() != `0`)
994	return `0`;
995	MaxTruncation = std::max(a: MaxTruncation, b: `8u`);
996	continue;
997	}
998	}
999	return MaxTruncation;
1000	}
1001
1002	// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1003	// zeros and return the number of bits by the left of these consecutive zeros.
1004	static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1005	unsigned HiTZ = llvm::countr_zero<uint32_t>(Val: Hi_32(Value: Imm));
1006	unsigned LoLZ = llvm::countl_zero<uint32_t>(Val: Lo_32(Value: Imm));
1007	if ((HiTZ + LoLZ) >= Num)
1008	return (`32` + HiTZ);
1009	return `0`;
1010	}
1011
1012	// Direct materialization of 64-bit constants by enumerated patterns.
1013	static SDNode selectI64ImmDirect(SelectionDAG CurDAG, const SDLoc &dl,
1014	uint64_t Imm, unsigned &InstCnt) {
1015	unsigned TZ = llvm::countr_zero<uint64_t>(Val: Imm);
1016	unsigned LZ = llvm::countl_zero<uint64_t>(Val: Imm);
1017	unsigned TO = llvm::countr_one<uint64_t>(Value: Imm);
1018	unsigned LO = llvm::countl_one<uint64_t>(Value: Imm);
1019	unsigned Hi32 = Hi_32(Value: Imm);
1020	unsigned Lo32 = Lo_32(Value: Imm);
1021	SDNode Result = nullptr*;
1022	unsigned Shift = `0`;
1023
1024	auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1025	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1026	};
1027
1028	// Following patterns use 1 instructions to materialize the Imm.
1029	InstCnt = `1`;
1030	// 1-1) Patterns : {zeros}{15-bit valve}
1031	// {ones}{15-bit valve}
1032	if (isInt<`16`>(x: Imm)) {
1033	SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1034	return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1035	}
1036	// 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1037	// {ones}{15-bit valve}{16 zeros}
1038	if (TZ > `15` && (LZ > `32` \|\| LO > `32`))
1039	return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1040	getI32Imm((Imm >> `16`) & `0xffff`));
1041
1042	// Following patterns use 2 instructions to materialize the Imm.
1043	InstCnt = `2`;
1044	assert(LZ < `64` && "Unexpected leading zeros here.");
1045	// Count of ones follwing the leading zeros.
1046	unsigned FO = llvm::countl_one<uint64_t>(Value: Imm << LZ);
1047	// 2-1) Patterns : {zeros}{31-bit value}
1048	// {ones}{31-bit value}
1049	if (isInt<`32`>(x: Imm)) {
1050	uint64_t ImmHi16 = (Imm >> `16`) & `0xffff`;
1051	unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1052	Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1053	return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1054	getI32Imm(Imm & `0xffff`));
1055	}
1056	// 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1057	// {zeros}{15-bit value}{zeros}
1058	// {zeros}{ones}{15-bit value}
1059	// {ones}{15-bit value}{zeros}
1060	// We can take advantage of LI's sign-extension semantics to generate leading
1061	// ones, and then use RLDIC to mask off the ones in both sides after rotation.
1062	if ((LZ + FO + TZ) > `48`) {
1063	Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1064	getI32Imm((Imm >> TZ) & `0xffff`));
1065	return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, `0`),
1066	getI32Imm(TZ), getI32Imm(LZ));
1067	}
1068	// 2-3) Pattern : {zeros}{15-bit value}{ones}
1069	// Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1070	// therefore we can take advantage of LI's sign-extension semantics, and then
1071	// mask them off after rotation.
1072	//
1073	// +--LZ--\|\|-15-bit-\|\|--TO--+ +-------------\|--16-bit--+
1074	// \|00000001bbbbbbbbb1111111\| -> \|00000000000001bbbbbbbbb1\|
1075	// +------------------------+ +------------------------+
1076	// 63 0 63 0
1077	// Imm (Imm >> (48 - LZ) & 0xffff)
1078	// +----sext-----\|--16-bit--+ +clear-\|-----------------+
1079	// \|11111111111111bbbbbbbbb1\| -> \|00000001bbbbbbbbb1111111\|
1080	// +------------------------+ +------------------------+
1081	// 63 0 63 0
1082	// LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1083	if ((LZ + TO) > `48`) {
1084	// Since the immediates with (LZ > 32) have been handled by previous
1085	// patterns, here we have (LZ <= 32) to make sure we will not shift right
1086	// the Imm by a negative value.
1087	assert(LZ <= `32` && "Unexpected shift value.");
1088	Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1089	getI32Imm((Imm >> (`48` - LZ) & `0xffff`)));
1090	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1091	getI32Imm(`48` - LZ), getI32Imm(LZ));
1092	}
1093	// 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1094	// {ones}{15-bit value}{ones}
1095	// We can take advantage of LI's sign-extension semantics to generate leading
1096	// ones, and then use RLDICL to mask off the ones in left sides (if required)
1097	// after rotation.
1098	//
1099	// +-LZ-FO\|\|-15-bit-\|\|--TO--+ +-------------\|--16-bit--+
1100	// \|00011110bbbbbbbbb1111111\| -> \|000000000011110bbbbbbbbb\|
1101	// +------------------------+ +------------------------+
1102	// 63 0 63 0
1103	// Imm (Imm >> TO) & 0xffff
1104	// +----sext-----\|--16-bit--+ +LZ\|---------------------+
1105	// \|111111111111110bbbbbbbbb\| -> \|00011110bbbbbbbbb1111111\|
1106	// +------------------------+ +------------------------+
1107	// 63 0 63 0
1108	// LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1109	if ((LZ + FO + TO) > `48`) {
1110	Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1111	getI32Imm((Imm >> TO) & `0xffff`));
1112	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1113	getI32Imm(TO), getI32Imm(LZ));
1114	}
1115	// 2-5) Pattern : {32 zeros}{**}{0}{15-bit value}
1116	// If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1117	// value, we can use LI for Lo16 without generating leading ones then add the
1118	// Hi16(in Lo32).
1119	if (LZ == `32` && ((Lo32 & `0x8000`) == `0`)) {
1120	Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1121	getI32Imm(Lo32 & `0xffff`));
1122	return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, `0`),
1123	getI32Imm(Lo32 >> `16`));
1124	}
1125	// 2-6) Patterns : {***}{49 zeros}{***}
1126	// {***}{49 ones}{***}
1127	// If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1128	// bits remain on both sides. Rotate right the Imm to construct an int<16>
1129	// value, use LI for int<16> value and then use RLDICL without mask to rotate
1130	// it back.
1131	//
1132	// 1) findContiguousZerosAtLeast(Imm, 49)
1133	// +------\|--zeros-\|------+ +---ones--\|\|---15 bit--+
1134	// \|bbbbbb0000000000aaaaaa\| -> \|0000000000aaaaaabbbbbb\|
1135	// +----------------------+ +----------------------+
1136	// 63 0 63 0
1137	//
1138	// 2) findContiguousZerosAtLeast(~Imm, 49)
1139	// +------\|--ones--\|------+ +---ones--\|\|---15 bit--+
1140	// \|bbbbbb1111111111aaaaaa\| -> \|1111111111aaaaaabbbbbb\|
1141	// +----------------------+ +----------------------+
1142	// 63 0 63 0
1143	if ((Shift = findContiguousZerosAtLeast(Imm, Num: `49`)) \|\|
1144	(Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: `49`))) {
1145	uint64_t RotImm = APInt (`64`, Imm).rotr(rotateAmt: Shift).getZExtValue();
1146	Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1147	getI32Imm(RotImm & `0xffff`));
1148	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1149	getI32Imm(Shift), getI32Imm(`0`));
1150	}
1151	// 2-7) Patterns : High word == Low word
1152	// This may require 2 to 3 instructions, depending on whether Lo32 can be
1153	// materialized in 1 instruction.
1154	if (Hi32 == Lo32) {
1155	// Handle the first 32 bits.
1156	uint64_t ImmHi16 = (Lo32 >> `16`) & `0xffff`;
1157	uint64_t ImmLo16 = Lo32 & `0xffff`;
1158	if (isInt<`16`>(x: Lo32))
1159	Result =
1160	CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1161	else if (!ImmLo16)
1162	Result =
1163	CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1164	else {
1165	InstCnt = `3`;
1166	Result =
1167	CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1168	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1169	SDValue(Result, `0`), getI32Imm(ImmLo16));
1170	}
1171	// Use rldimi to insert the Low word into High word.
1172	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`32`),
1173	getI32Imm(`0`)};
1174	return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1175	}
1176
1177	// Following patterns use 3 instructions to materialize the Imm.
1178	InstCnt = `3`;
1179	// 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1180	// {zeros}{31-bit value}{zeros}
1181	// {zeros}{ones}{31-bit value}
1182	// {ones}{31-bit value}{zeros}
1183	// We can take advantage of LIS's sign-extension semantics to generate leading
1184	// ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1185	// ones in both sides after rotation.
1186	if ((LZ + FO + TZ) > `32`) {
1187	uint64_t ImmHi16 = (Imm >> (TZ + `16`)) & `0xffff`;
1188	unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1189	Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1190	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1191	getI32Imm((Imm >> TZ) & `0xffff`));
1192	return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, `0`),
1193	getI32Imm(TZ), getI32Imm(LZ));
1194	}
1195	// 3-2) Pattern : {zeros}{31-bit value}{ones}
1196	// Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1197	// value, therefore we can take advantage of LIS's sign-extension semantics,
1198	// add the remaining bits with ORI, and then mask them off after rotation.
1199	// This is similar to Pattern 2-3, please refer to the diagram there.
1200	if ((LZ + TO) > `32`) {
1201	// Since the immediates with (LZ > 32) have been handled by previous
1202	// patterns, here we have (LZ <= 32) to make sure we will not shift right
1203	// the Imm by a negative value.
1204	assert(LZ <= `32` && "Unexpected shift value.");
1205	Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1206	getI32Imm((Imm >> (`48` - LZ)) & `0xffff`));
1207	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1208	getI32Imm((Imm >> (`32` - LZ)) & `0xffff`));
1209	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1210	getI32Imm(`32` - LZ), getI32Imm(LZ));
1211	}
1212	// 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1213	// {ones}{31-bit value}{ones}
1214	// We can take advantage of LIS's sign-extension semantics to generate leading
1215	// ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1216	// ones in left sides (if required) after rotation.
1217	// This is similar to Pattern 2-4, please refer to the diagram there.
1218	if ((LZ + FO + TO) > `32`) {
1219	Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1220	getI32Imm((Imm >> (TO + `16`)) & `0xffff`));
1221	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1222	getI32Imm((Imm >> TO) & `0xffff`));
1223	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1224	getI32Imm(TO), getI32Imm(LZ));
1225	}
1226	// 3-4) Patterns : {***}{33 zeros}{***}
1227	// {***}{33 ones}{***}
1228	// If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1229	// bits remain on both sides. Rotate right the Imm to construct an int<32>
1230	// value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1231	// rotate it back.
1232	// This is similar to Pattern 2-6, please refer to the diagram there.
1233	if ((Shift = findContiguousZerosAtLeast(Imm, Num: `33`)) \|\|
1234	(Shift = findContiguousZerosAtLeast(Imm: ~Imm, Num: `33`))) {
1235	uint64_t RotImm = APInt (`64`, Imm).rotr(rotateAmt: Shift).getZExtValue();
1236	uint64_t ImmHi16 = (RotImm >> `16`) & `0xffff`;
1237	unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1238	Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1239	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1240	getI32Imm(RotImm & `0xffff`));
1241	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1242	getI32Imm(Shift), getI32Imm(`0`));
1243	}
1244
1245	InstCnt = `0`;
1246	return nullptr;
1247	}
1248
1249	// Try to select instructions to generate a 64 bit immediate using prefix as
1250	// well as non prefix instructions. The function will return the SDNode
1251	// to materialize that constant or it will return nullptr if it does not
1252	// find one. The variable InstCnt is set to the number of instructions that
1253	// were selected.
1254	static SDNode selectI64ImmDirectPrefix(SelectionDAG CurDAG, const SDLoc &dl,
1255	uint64_t Imm, unsigned &InstCnt) {
1256	unsigned TZ = llvm::countr_zero<uint64_t>(Val: Imm);
1257	unsigned LZ = llvm::countl_zero<uint64_t>(Val: Imm);
1258	unsigned TO = llvm::countr_one<uint64_t>(Value: Imm);
1259	unsigned FO = llvm::countl_one<uint64_t>(Value: LZ == `64` ? `0` : (Imm << LZ));
1260	unsigned Hi32 = Hi_32(Value: Imm);
1261	unsigned Lo32 = Lo_32(Value: Imm);
1262
1263	auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1264	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1265	};
1266
1267	auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1268	return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1269	};
1270
1271	// Following patterns use 1 instruction to materialize Imm.
1272	InstCnt = `1`;
1273
1274	// The pli instruction can materialize up to 34 bits directly.
1275	// If a constant fits within 34-bits, emit the pli instruction here directly.
1276	if (isInt<`34`>(Imm))
1277	return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1278	CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1279
1280	// Require at least two instructions.
1281	InstCnt = `2`;
1282	SDNode Result = nullptr*;
1283	// Patterns : {zeros}{ones}{33-bit value}{zeros}
1284	// {zeros}{33-bit value}{zeros}
1285	// {zeros}{ones}{33-bit value}
1286	// {ones}{33-bit value}{zeros}
1287	// We can take advantage of PLI's sign-extension semantics to generate leading
1288	// ones, and then use RLDIC to mask off the ones on both sides after rotation.
1289	if ((LZ + FO + TZ) > `30`) {
1290	APInt SignedInt34 = APInt (`34`, (Imm >> TZ) & `0x3ffffffff`);
1291	APInt Extended = SignedInt34.sext(width: `64`);
1292	Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1293	getI64Imm(*Extended.getRawData()));
1294	return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, `0`),
1295	getI32Imm(TZ), getI32Imm(LZ));
1296	}
1297	// Pattern : {zeros}{33-bit value}{ones}
1298	// Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1299	// therefore we can take advantage of PLI's sign-extension semantics, and then
1300	// mask them off after rotation.
1301	//
1302	// +--LZ--\|\|-33-bit-\|\|--TO--+ +-------------\|--34-bit--+
1303	// \|00000001bbbbbbbbb1111111\| -> \|00000000000001bbbbbbbbb1\|
1304	// +------------------------+ +------------------------+
1305	// 63 0 63 0
1306	//
1307	// +----sext-----\|--34-bit--+ +clear-\|-----------------+
1308	// \|11111111111111bbbbbbbbb1\| -> \|00000001bbbbbbbbb1111111\|
1309	// +------------------------+ +------------------------+
1310	// 63 0 63 0
1311	if ((LZ + TO) > `30`) {
1312	APInt SignedInt34 = APInt (`34`, (Imm >> (`30` - LZ)) & `0x3ffffffff`);
1313	APInt Extended = SignedInt34.sext(width: `64`);
1314	Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1315	getI64Imm(*Extended.getRawData()));
1316	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1317	getI32Imm(`30` - LZ), getI32Imm(LZ));
1318	}
1319	// Patterns : {zeros}{ones}{33-bit value}{ones}
1320	// {ones}{33-bit value}{ones}
1321	// Similar to LI we can take advantage of PLI's sign-extension semantics to
1322	// generate leading ones, and then use RLDICL to mask off the ones in left
1323	// sides (if required) after rotation.
1324	if ((LZ + FO + TO) > `30`) {
1325	APInt SignedInt34 = APInt (`34`, (Imm >> TO) & `0x3ffffffff`);
1326	APInt Extended = SignedInt34.sext(width: `64`);
1327	Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1328	getI64Imm(*Extended.getRawData()));
1329	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, `0`),
1330	getI32Imm(TO), getI32Imm(LZ));
1331	}
1332	// Patterns : {***}{31 zeros}{***}
1333	// : {***}{31 ones}{***}
1334	// If Imm contains 31 consecutive zeros/ones then the remaining bit count
1335	// is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1336	// for the int<33> value and then use RLDICL without a mask to rotate it back.
1337	//
1338	// +------\|--ones--\|------+ +---ones--\|\|---33 bit--+
1339	// \|bbbbbb1111111111aaaaaa\| -> \|1111111111aaaaaabbbbbb\|
1340	// +----------------------+ +----------------------+
1341	// 63 0 63 0
1342	for (unsigned Shift = `0`; Shift < `63`; ++Shift) {
1343	uint64_t RotImm = APInt (`64`, Imm).rotr(rotateAmt: Shift).getZExtValue();
1344	if (isInt<`34`>(x: RotImm)) {
1345	Result =
1346	CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1347	return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1348	SDValue(Result, `0`), getI32Imm(Shift),
1349	getI32Imm(`0`));
1350	}
1351	}
1352
1353	// Patterns : High word == Low word
1354	// This is basically a splat of a 32 bit immediate.
1355	if (Hi32 == Lo32) {
1356	Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1357	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`32`),
1358	getI32Imm(`0`)};
1359	return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1360	}
1361
1362	InstCnt = `3`;
1363	// Catch-all
1364	// This pattern can form any 64 bit immediate in 3 instructions.
1365	SDNode *ResultHi =
1366	CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1367	SDNode *ResultLo =
1368	CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1369	SDValue Ops[] = {SDValue (ResultLo, `0`), SDValue (ResultHi, `0`), getI32Imm(`32`),
1370	getI32Imm(`0`)};
1371	return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1372	}
1373
1374	static SDNode selectI64Imm(SelectionDAG CurDAG, const SDLoc &dl, uint64_t Imm,
1375	unsigned InstCnt = nullptr*) {
1376	unsigned InstCntDirect = `0`;
1377	// No more than 3 instructions are used if we can select the i64 immediate
1378	// directly.
1379	SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCnt&: InstCntDirect);
1380
1381	const PPCSubtarget &Subtarget =
1382	CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
1383
1384	// If we have prefixed instructions and there is a chance we can
1385	// materialize the constant with fewer prefixed instructions than
1386	// non-prefixed, try that.
1387	if (Subtarget.hasPrefixInstrs() && InstCntDirect != `1`) {
1388	unsigned InstCntDirectP = `0`;
1389	SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCnt&: InstCntDirectP);
1390	// Use the prefix case in either of two cases:
1391	// 1) We have no result from the non-prefix case to use.
1392	// 2) The non-prefix case uses more instructions than the prefix case.
1393	// If the prefix and non-prefix cases use the same number of instructions
1394	// we will prefer the non-prefix case.
1395	if (ResultP && (!Result \|\| InstCntDirectP < InstCntDirect)) {
1396	if (InstCnt)
1397	*InstCnt = InstCntDirectP;
1398	return ResultP;
1399	}
1400	}
1401
1402	if (Result) {
1403	if (InstCnt)
1404	*InstCnt = InstCntDirect;
1405	return Result;
1406	}
1407	auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1408	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1409	};
1410
1411	uint32_t Hi16OfLo32 = (Lo_32(Value: Imm) >> `16`) & `0xffff`;
1412	uint32_t Lo16OfLo32 = Lo_32(Value: Imm) & `0xffff`;
1413
1414	// Try to use 4 instructions to materialize the immediate which is "almost" a
1415	// splat of a 32 bit immediate.
1416	if (Hi16OfLo32 && Lo16OfLo32) {
1417	uint32_t Hi16OfHi32 = (Hi_32(Value: Imm) >> `16`) & `0xffff`;
1418	uint32_t Lo16OfHi32 = Hi_32(Value: Imm) & `0xffff`;
1419	bool IsSelected = false;
1420
1421	auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1422	SDNode *Result =
1423	CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1424	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1425	SDValue(Result, `0`), getI32Imm(Lo16));
1426	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`32`),
1427	getI32Imm(`0`)};
1428	return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1429	};
1430
1431	if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1432	IsSelected = true;
1433	Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1434	// Modify Hi16OfHi32.
1435	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`48`),
1436	getI32Imm(`0`)};
1437	Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1438	} else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1439	IsSelected = true;
1440	Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1441	// Modify Lo16OfLo32.
1442	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`16`),
1443	getI32Imm(`16`), getI32Imm(`31`)};
1444	Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1445	} else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1446	IsSelected = true;
1447	Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1448	// Modify Hi16OfLo32.
1449	SDValue Ops[] = {SDValue (Result, `0`), SDValue (Result, `0`), getI32Imm(`16`),
1450	getI32Imm(`0`), getI32Imm(`15`)};
1451	Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1452	}
1453	if (IsSelected == true) {
1454	if (InstCnt)
1455	*InstCnt = `4`;
1456	return Result;
1457	}
1458	}
1459
1460	// Handle the upper 32 bit value.
1461	Result =
1462	selectI64ImmDirect(CurDAG, dl, Imm: Imm & `0xffffffff00000000`, InstCnt&: InstCntDirect);
1463	// Add in the last bits as required.
1464	if (Hi16OfLo32) {
1465	Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1466	SDValue(Result, `0`), getI32Imm(Hi16OfLo32));
1467	++InstCntDirect;
1468	}
1469	if (Lo16OfLo32) {
1470	Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, `0`),
1471	getI32Imm(Lo16OfLo32));
1472	++InstCntDirect;
1473	}
1474	if (InstCnt)
1475	*InstCnt = InstCntDirect;
1476	return Result;
1477	}
1478
1479	// Select a 64-bit constant.
1480	static SDNode selectI64Imm(SelectionDAG CurDAG, SDNode *N) {
1481	SDLoc dl(N);
1482
1483	// Get 64 bit value.
1484	int64_t Imm = N->getAsZExtVal();
1485	if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1486	uint64_t SextImm = SignExtend64(X: Imm, B: MinSize);
1487	SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1488	if (isInt<`16`>(SextImm))
1489	return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1490	}
1491	return selectI64Imm(CurDAG, dl, Imm);
1492	}
1493
1494	namespace {
1495
1496	class BitPermutationSelector {
1497	struct ValueBit {
1498	SDValue V;
1499
1500	// The bit number in the value, using a convention where bit 0 is the
1501	// lowest-order bit.
1502	unsigned Idx;
1503
1504	// ConstZero means a bit we need to mask off.
1505	// Variable is a bit comes from an input variable.
1506	// VariableKnownToBeZero is also a bit comes from an input variable,
1507	// but it is known to be already zero. So we do not need to mask them.
1508	enum Kind {
1509	ConstZero,
1510	Variable,
1511	VariableKnownToBeZero
1512	} K;
1513
1514	ValueBit(SDValue V, unsigned I, Kind K = Variable)
1515	: V (V), Idx(I), K(K) {}
1516	ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1517
1518	bool isZero() const {
1519	return K == ConstZero \|\| K == VariableKnownToBeZero;
1520	}
1521
1522	bool hasValue() const {
1523	return K == Variable \|\| K == VariableKnownToBeZero;
1524	}
1525
1526	SDValue getValue() const {
1527	assert(hasValue() && "Cannot get the value of a constant bit");
1528	return V;
1529	}
1530
1531	unsigned getValueBitIndex() const {
1532	assert(hasValue() && "Cannot get the value bit index of a constant bit");
1533	return Idx;
1534	}
1535	};
1536
1537	// A bit group has the same underlying value and the same rotate factor.
1538	struct BitGroup {
1539	SDValue V;
1540	unsigned RLAmt;
1541	unsigned StartIdx, EndIdx;
1542
1543	// This rotation amount assumes that the lower 32 bits of the quantity are
1544	// replicated in the high 32 bits by the rotation operator (which is done
1545	// by rlwinm and friends in 64-bit mode).
1546	bool Repl32;
1547	// Did converting to Repl32 == true change the rotation factor? If it did,
1548	// it decreased it by 32.
1549	bool Repl32CR;
1550	// Was this group coalesced after setting Repl32 to true?
1551	bool Repl32Coalesced;
1552
1553	BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1554	: V (V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1555	Repl32Coalesced(false) {
1556	LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1557	<< " [" << S << ", " << E << "]\n");
1558	}
1559	};
1560
1561	// Information on each (Value, RLAmt) pair (like the number of groups
1562	// associated with each) used to choose the lowering method.
1563	struct ValueRotInfo {
1564	SDValue V;
1565	unsigned RLAmt = std::numeric_limits<unsigned>::max();
1566	unsigned NumGroups = `0`;
1567	unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1568	bool Repl32 = false;
1569
1570	ValueRotInfo() = default;
1571
1572	// For sorting (in reverse order) by NumGroups, and then by
1573	// FirstGroupStartIdx.
1574	bool operator < (const ValueRotInfo &Other) const {
1575	// We need to sort so that the non-Repl32 come first because, when we're
1576	// doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1577	// masking operation.
1578	if (Repl32 < Other.Repl32)
1579	return true;
1580	else if (Repl32 > Other.Repl32)
1581	return false;
1582	else if (NumGroups > Other.NumGroups)
1583	return true;
1584	else if (NumGroups < Other.NumGroups)
1585	return false;
1586	else if (RLAmt == `0` && Other.RLAmt != `0`)
1587	return true;
1588	else if (RLAmt != `0` && Other.RLAmt == `0`)
1589	return false;
1590	else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1591	return true;
1592	return false;
1593	}
1594	};
1595
1596	using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, `64`>>;
1597	using ValueBitsMemoizer =
1598	DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1599	ValueBitsMemoizer Memoizer;
1600
1601	// Return a pair of bool and a SmallVector pointer to a memoization entry.
1602	// The bool is true if something interesting was deduced, otherwise if we're
1603	// providing only a generic representation of V (or something else likewise
1604	// uninteresting for instruction selection) through the SmallVector.
1605	std::pair<bool, SmallVector<ValueBit, `64`> *> getValueBits(SDValue V,
1606	unsigned NumBits) {
1607	auto &ValueEntry = Memoizer [V];
1608	if (ValueEntry)
1609	return std::make_pair(x&: ValueEntry ->first, y: &ValueEntry ->second);
1610	ValueEntry.reset(p: new ValueBitsMemoizedValue ());
1611	bool &Interesting = ValueEntry ->first;
1612	SmallVector<ValueBit, `64`> &Bits = ValueEntry ->second;
1613	Bits.resize(N: NumBits);
1614
1615	switch (V.getOpcode()) {
1616	default: break;
1617	case ISD::ROTL:
1618	if (isa<ConstantSDNode>(Val: V.getOperand(i: `1`))) {
1619	assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1620	unsigned RotAmt = V.getConstantOperandVal(i: `1`) & (NumBits - `1`);
1621
1622	const auto &LHSBits = *getValueBits(V: V.getOperand(i: `0`), NumBits).second;
1623
1624	for (unsigned i = `0`; i < NumBits; ++i)
1625	Bits [i] = LHSBits [i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1626
1627	return std::make_pair(x&: Interesting = true, y: &Bits);
1628	}
1629	break;
1630	case ISD::SHL:
1631	case PPCISD::SHL:
1632	if (isa<ConstantSDNode>(Val: V.getOperand(i: `1`))) {
1633	// sld takes 7 bits, slw takes 6.
1634	unsigned ShiftAmt = V.getConstantOperandVal(i: `1`) & ((NumBits << `1`) - `1`);
1635
1636	const auto &LHSBits = *getValueBits(V: V.getOperand(i: `0`), NumBits).second;
1637
1638	if (ShiftAmt >= NumBits) {
1639	for (unsigned i = `0`; i < NumBits; ++i)
1640	Bits [i] = ValueBit (ValueBit::ConstZero);
1641	} else {
1642	for (unsigned i = ShiftAmt; i < NumBits; ++i)
1643	Bits [i] = LHSBits [i - ShiftAmt];
1644	for (unsigned i = `0`; i < ShiftAmt; ++i)
1645	Bits [i] = ValueBit (ValueBit::ConstZero);
1646	}
1647
1648	return std::make_pair(x&: Interesting = true, y: &Bits);
1649	}
1650	break;
1651	case ISD::SRL:
1652	case PPCISD::SRL:
1653	if (isa<ConstantSDNode>(Val: V.getOperand(i: `1`))) {
1654	// srd takes lowest 7 bits, srw takes 6.
1655	unsigned ShiftAmt = V.getConstantOperandVal(i: `1`) & ((NumBits << `1`) - `1`);
1656
1657	const auto &LHSBits = *getValueBits(V: V.getOperand(i: `0`), NumBits).second;
1658
1659	if (ShiftAmt >= NumBits) {
1660	for (unsigned i = `0`; i < NumBits; ++i)
1661	Bits [i] = ValueBit (ValueBit::ConstZero);
1662	} else {
1663	for (unsigned i = `0`; i < NumBits - ShiftAmt; ++i)
1664	Bits [i] = LHSBits [i + ShiftAmt];
1665	for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1666	Bits [i] = ValueBit (ValueBit::ConstZero);
1667	}
1668
1669	return std::make_pair(x&: Interesting = true, y: &Bits);
1670	}
1671	break;
1672	case ISD::AND:
1673	if (isa<ConstantSDNode>(Val: V.getOperand(i: `1`))) {
1674	uint64_t Mask = V.getConstantOperandVal(i: `1`);
1675
1676	const SmallVector<ValueBit, `64`> *LHSBits;
1677	// Mark this as interesting, only if the LHS was also interesting. This
1678	// prevents the overall procedure from matching a single immediate 'and'
1679	// (which is non-optimal because such an and might be folded with other
1680	// things if we don't select it here).
1681	std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: `0`), NumBits);
1682
1683	for (unsigned i = `0`; i < NumBits; ++i)
1684	if (((Mask >> i) & `1`) == `1`)
1685	Bits [i] = (*LHSBits)[i];
1686	else {
1687	// AND instruction masks this bit. If the input is already zero,
1688	// we have nothing to do here. Otherwise, make the bit ConstZero.
1689	if ((*LHSBits)[i].isZero())
1690	Bits [i] = (*LHSBits)[i];
1691	else
1692	Bits [i] = ValueBit (ValueBit::ConstZero);
1693	}
1694
1695	return std::make_pair(x&: Interesting, y: &Bits);
1696	}
1697	break;
1698	case ISD::OR: {
1699	const auto &LHSBits = *getValueBits(V: V.getOperand(i: `0`), NumBits).second;
1700	const auto &RHSBits = *getValueBits(V: V.getOperand(i: `1`), NumBits).second;
1701
1702	bool AllDisjoint = true;
1703	SDValue LastVal = SDValue ();
1704	unsigned LastIdx = `0`;
1705	for (unsigned i = `0`; i < NumBits; ++i) {
1706	if (LHSBits [i].isZero() && RHSBits [i].isZero()) {
1707	// If both inputs are known to be zero and one is ConstZero and
1708	// another is VariableKnownToBeZero, we can select whichever
1709	// we like. To minimize the number of bit groups, we select
1710	// VariableKnownToBeZero if this bit is the next bit of the same
1711	// input variable from the previous bit. Otherwise, we select
1712	// ConstZero.
1713	if (LHSBits [i].hasValue() && LHSBits [i].getValue() == LastVal &&
1714	LHSBits [i].getValueBitIndex() == LastIdx + `1`)
1715	Bits [i] = LHSBits [i];
1716	else if (RHSBits [i].hasValue() && RHSBits [i].getValue() == LastVal &&
1717	RHSBits [i].getValueBitIndex() == LastIdx + `1`)
1718	Bits [i] = RHSBits [i];
1719	else
1720	Bits [i] = ValueBit (ValueBit::ConstZero);
1721	}
1722	else if (LHSBits [i].isZero())
1723	Bits [i] = RHSBits [i];
1724	else if (RHSBits [i].isZero())
1725	Bits [i] = LHSBits [i];
1726	else {
1727	AllDisjoint = false;
1728	break;
1729	}
1730	// We remember the value and bit index of this bit.
1731	if (Bits [i].hasValue()) {
1732	LastVal = Bits [i].getValue();
1733	LastIdx = Bits [i].getValueBitIndex();
1734	}
1735	else {
1736	if (LastVal) LastVal = SDValue ();
1737	LastIdx = `0`;
1738	}
1739	}
1740
1741	if (!AllDisjoint)
1742	break;
1743
1744	return std::make_pair(x&: Interesting = true, y: &Bits);
1745	}
1746	case ISD::ZERO_EXTEND: {
1747	// We support only the case with zero extension from i32 to i64 so far.
1748	if (V.getValueType() != MVT::i64 \|\|
1749	V.getOperand(`0`).getValueType() != MVT::i32)
1750	break;
1751
1752	const SmallVector<ValueBit, `64`> *LHSBits;
1753	const unsigned NumOperandBits = `32`;
1754	std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: `0`),
1755	NumBits: NumOperandBits);
1756
1757	for (unsigned i = `0`; i < NumOperandBits; ++i)
1758	Bits [i] = (*LHSBits)[i];
1759
1760	for (unsigned i = NumOperandBits; i < NumBits; ++i)
1761	Bits [i] = ValueBit (ValueBit::ConstZero);
1762
1763	return std::make_pair(x&: Interesting, y: &Bits);
1764	}
1765	case ISD::TRUNCATE: {
1766	EVT FromType = V.getOperand(i: `0`).getValueType();
1767	EVT ToType = V.getValueType();
1768	// We support only the case with truncate from i64 to i32.
1769	if (FromType != MVT::i64 \|\| ToType != MVT::i32)
1770	break;
1771	const unsigned NumAllBits = FromType.getSizeInBits();
1772	SmallVector<ValueBit, `64`> *InBits;
1773	std::tie(args&: Interesting, args&: InBits) = getValueBits(V: V.getOperand(i: `0`),
1774	NumBits: NumAllBits);
1775	const unsigned NumValidBits = ToType.getSizeInBits();
1776
1777	// A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1778	// So, we cannot include this truncate.
1779	bool UseUpper32bit = false;
1780	for (unsigned i = `0`; i < NumValidBits; ++i)
1781	if ((InBits)[i].hasValue() && (InBits)[i].getValueBitIndex() >= `32`) {
1782	UseUpper32bit = true;
1783	break;
1784	}
1785	if (UseUpper32bit)
1786	break;
1787
1788	for (unsigned i = `0`; i < NumValidBits; ++i)
1789	Bits [i] = (*InBits)[i];
1790
1791	return std::make_pair(x&: Interesting, y: &Bits);
1792	}
1793	case ISD::AssertZext: {
1794	// For AssertZext, we look through the operand and
1795	// mark the bits known to be zero.
1796	const SmallVector<ValueBit, `64`> *LHSBits;
1797	std::tie(args&: Interesting, args&: LHSBits) = getValueBits(V: V.getOperand(i: `0`),
1798	NumBits);
1799
1800	EVT FromType = cast<VTSDNode>(Val: V.getOperand(i: `1`))->getVT();
1801	const unsigned NumValidBits = FromType.getSizeInBits();
1802	for (unsigned i = `0`; i < NumValidBits; ++i)
1803	Bits [i] = (*LHSBits)[i];
1804
1805	// These bits are known to be zero but the AssertZext may be from a value
1806	// that already has some constant zero bits (i.e. from a masking and).
1807	for (unsigned i = NumValidBits; i < NumBits; ++i)
1808	Bits [i] = (*LHSBits)[i].hasValue()
1809	? ValueBit ((*LHSBits)[i].getValue(),
1810	(*LHSBits)[i].getValueBitIndex(),
1811	ValueBit::VariableKnownToBeZero)
1812	: ValueBit (ValueBit::ConstZero);
1813
1814	return std::make_pair(x&: Interesting, y: &Bits);
1815	}
1816	case ISD::LOAD:
1817	LoadSDNode *LD = cast<LoadSDNode>(Val&: V);
1818	if (ISD::isZEXTLoad(N: V.getNode()) && V.getResNo() == `0`) {
1819	EVT VT = LD->getMemoryVT();
1820	const unsigned NumValidBits = VT.getSizeInBits();
1821
1822	for (unsigned i = `0`; i < NumValidBits; ++i)
1823	Bits [i] = ValueBit (V, i);
1824
1825	// These bits are known to be zero.
1826	for (unsigned i = NumValidBits; i < NumBits; ++i)
1827	Bits [i] = ValueBit (V, i, ValueBit::VariableKnownToBeZero);
1828
1829	// Zero-extending load itself cannot be optimized. So, it is not
1830	// interesting by itself though it gives useful information.
1831	return std::make_pair(x&: Interesting = false, y: &Bits);
1832	}
1833	break;
1834	}
1835
1836	for (unsigned i = `0`; i < NumBits; ++i)
1837	Bits [i] = ValueBit (V, i);
1838
1839	return std::make_pair(x&: Interesting = false, y: &Bits);
1840	}
1841
1842	// For each value (except the constant ones), compute the left-rotate amount
1843	// to get it from its original to final position.
1844	void computeRotationAmounts() {
1845	NeedMask = false;
1846	RLAmt.resize(N: Bits.size());
1847	for (unsigned i = `0`; i < Bits.size(); ++i)
1848	if (Bits [i].hasValue()) {
1849	unsigned VBI = Bits [i].getValueBitIndex();
1850	if (i >= VBI)
1851	RLAmt [i] = i - VBI;
1852	else
1853	RLAmt [i] = Bits.size() - (VBI - i);
1854	} else if (Bits [i].isZero()) {
1855	NeedMask = true;
1856	RLAmt [i] = UINT32_MAX;
1857	} else {
1858	llvm_unreachable("Unknown value bit type");
1859	}
1860	}
1861
1862	// Collect groups of consecutive bits with the same underlying value and
1863	// rotation factor. If we're doing late masking, we ignore zeros, otherwise
1864	// they break up groups.
1865	void collectBitGroups(bool LateMask) {
1866	BitGroups.clear();
1867
1868	unsigned LastRLAmt = RLAmt [`0`];
1869	SDValue LastValue = Bits [`0`].hasValue() ? Bits [`0`].getValue() : SDValue ();
1870	unsigned LastGroupStartIdx = `0`;
1871	bool IsGroupOfZeros = !Bits [LastGroupStartIdx].hasValue();
1872	for (unsigned i = `1`; i < Bits.size(); ++i) {
1873	unsigned ThisRLAmt = RLAmt [i];
1874	SDValue ThisValue = Bits [i].hasValue() ? Bits [i].getValue() : SDValue ();
1875	if (LateMask && !ThisValue) {
1876	ThisValue = LastValue;
1877	ThisRLAmt = LastRLAmt;
1878	// If we're doing late masking, then the first bit group always starts
1879	// at zero (even if the first bits were zero).
1880	if (BitGroups.empty())
1881	LastGroupStartIdx = `0`;
1882	}
1883
1884	// If this bit is known to be zero and the current group is a bit group
1885	// of zeros, we do not need to terminate the current bit group even the
1886	// Value or RLAmt does not match here. Instead, we terminate this group
1887	// when the first non-zero bit appears later.
1888	if (IsGroupOfZeros && Bits [i].isZero())
1889	continue;
1890
1891	// If this bit has the same underlying value and the same rotate factor as
1892	// the last one, then they're part of the same group.
1893	if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1894	// We cannot continue the current group if this bits is not known to
1895	// be zero in a bit group of zeros.
1896	if (!(IsGroupOfZeros && ThisValue && !Bits [i].isZero()))
1897	continue;
1898
1899	if (LastValue.getNode())
1900	BitGroups.push_back(Elt: BitGroup (LastValue, LastRLAmt, LastGroupStartIdx,
1901	i-`1`));
1902	LastRLAmt = ThisRLAmt;
1903	LastValue = ThisValue;
1904	LastGroupStartIdx = i;
1905	IsGroupOfZeros = !Bits [LastGroupStartIdx].hasValue();
1906	}
1907	if (LastValue.getNode())
1908	BitGroups.push_back(Elt: BitGroup (LastValue, LastRLAmt, LastGroupStartIdx,
1909	Bits.size()-`1`));
1910
1911	if (BitGroups.empty())
1912	return;
1913
1914	// We might be able to combine the first and last groups.
1915	if (BitGroups.size() > `1`) {
1916	// If the first and last groups are the same, then remove the first group
1917	// in favor of the last group, making the ending index of the last group
1918	// equal to the ending index of the to-be-removed first group.
1919	if (BitGroups [`0`].StartIdx == `0` &&
1920	BitGroups [BitGroups.size()-`1`].EndIdx == Bits.size()-`1` &&
1921	BitGroups [`0`].V == BitGroups [BitGroups.size()-`1`].V &&
1922	BitGroups [`0`].RLAmt == BitGroups [BitGroups.size()-`1`].RLAmt) {
1923	LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1924	BitGroups [BitGroups.size()-`1`].EndIdx = BitGroups [`0`].EndIdx;
1925	BitGroups.erase(CI: BitGroups.begin());
1926	}
1927	}
1928	}
1929
1930	// Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1931	// associated with each. If the number of groups are same, we prefer a group
1932	// which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1933	// instruction. If there is a degeneracy, pick the one that occurs
1934	// first (in the final value).
1935	void collectValueRotInfo() {
1936	ValueRots.clear();
1937
1938	for (auto &BG : BitGroups) {
1939	unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? `64` : `0`);
1940	ValueRotInfo &VRI = ValueRots [std::make_pair(x&: BG.V, y&: RLAmtKey)];
1941	VRI.V = BG.V;
1942	VRI.RLAmt = BG.RLAmt;
1943	VRI.Repl32 = BG.Repl32;
1944	VRI.NumGroups += `1`;
1945	VRI.FirstGroupStartIdx = std::min(a: VRI.FirstGroupStartIdx, b: BG.StartIdx);
1946	}
1947
1948	// Now that we've collected the various ValueRotInfo instances, we need to
1949	// sort them.
1950	ValueRotsVec.clear();
1951	for (auto &I : ValueRots) {
1952	ValueRotsVec.push_back(Elt: I.second);
1953	}
1954	llvm::sort(C&: ValueRotsVec);
1955	}
1956
1957	// In 64-bit mode, rlwinm and friends have a rotation operator that
1958	// replicates the low-order 32 bits into the high-order 32-bits. The mask
1959	// indices of these instructions can only be in the lower 32 bits, so they
1960	// can only represent some 64-bit bit groups. However, when they can be used,
1961	// the 32-bit replication can be used to represent, as a single bit group,
1962	// otherwise separate bit groups. We'll convert to replicated-32-bit bit
1963	// groups when possible. Returns true if any of the bit groups were
1964	// converted.
1965	void assignRepl32BitGroups() {
1966	// If we have bits like this:
1967	//
1968	// Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1969	// V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1970	// Groups: \| RLAmt = 8 \| RLAmt = 40 \|
1971	//
1972	// But, making use of a 32-bit operation that replicates the low-order 32
1973	// bits into the high-order 32 bits, this can be one bit group with a RLAmt
1974	// of 8.
1975
1976	auto IsAllLow32 = [this](BitGroup & BG) {
1977	if (BG.StartIdx <= BG.EndIdx) {
1978	for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1979	if (!Bits [i].hasValue())
1980	continue;
1981	if (Bits [i].getValueBitIndex() >= `32`)
1982	return false;
1983	}
1984	} else {
1985	for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1986	if (!Bits [i].hasValue())
1987	continue;
1988	if (Bits [i].getValueBitIndex() >= `32`)
1989	return false;
1990	}
1991	for (unsigned i = `0`; i <= BG.EndIdx; ++i) {
1992	if (!Bits [i].hasValue())
1993	continue;
1994	if (Bits [i].getValueBitIndex() >= `32`)
1995	return false;
1996	}
1997	}
1998
1999	return true;
2000	};
2001
2002	for (auto &BG : BitGroups) {
2003	// If this bit group has RLAmt of 0 and will not be merged with
2004	// another bit group, we don't benefit from Repl32. We don't mark
2005	// such group to give more freedom for later instruction selection.
2006	if (BG.RLAmt == `0`) {
2007	auto PotentiallyMerged = [this](BitGroup & BG) {
2008	for (auto &BG2 : BitGroups)
2009	if (&BG != &BG2 && BG.V == BG2.V &&
2010	(BG2.RLAmt == `0` \|\| BG2.RLAmt == `32`))
2011	return true;
2012	return false;
2013	};
2014	if (!PotentiallyMerged(BG))
2015	continue;
2016	}
2017	if (BG.StartIdx < `32` && BG.EndIdx < `32`) {
2018	if (IsAllLow32(BG)) {
2019	if (BG.RLAmt >= `32`) {
2020	BG.RLAmt -= `32`;
2021	BG.Repl32CR = true;
2022	}
2023
2024	BG.Repl32 = true;
2025
2026	LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2027	<< BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2028	<< BG.StartIdx << ", " << BG.EndIdx << "]\n");
2029	}
2030	}
2031	}
2032
2033	// Now walk through the bit groups, consolidating where possible.
2034	for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2035	// We might want to remove this bit group by merging it with the previous
2036	// group (which might be the ending group).
2037	auto IP = (I == BitGroups.begin()) ?
2038	std::prev(x: BitGroups.end()) : std::prev(x: I);
2039	if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2040	I->StartIdx == (IP->EndIdx + `1`) % `64` && I != IP) {
2041
2042	LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2043	<< I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2044	<< I->StartIdx << ", " << I->EndIdx
2045	<< "] with group with range [" << IP->StartIdx << ", "
2046	<< IP->EndIdx << "]\n");
2047
2048	IP->EndIdx = I->EndIdx;
2049	IP->Repl32CR = IP->Repl32CR \|\| I->Repl32CR;
2050	IP->Repl32Coalesced = true;
2051	I = BitGroups.erase(CI: I);
2052	continue;
2053	} else {
2054	// There is a special case worth handling: If there is a single group
2055	// covering the entire upper 32 bits, and it can be merged with both
2056	// the next and previous groups (which might be the same group), then
2057	// do so. If it is the same group (so there will be only one group in
2058	// total), then we need to reverse the order of the range so that it
2059	// covers the entire 64 bits.
2060	if (I->StartIdx == `32` && I->EndIdx == `63`) {
2061	assert(std::next(I) == BitGroups.end() &&
2062	"bit group ends at index 63 but there is another?");
2063	auto IN = BitGroups.begin();
2064
2065	if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2066	(I->RLAmt % `32`) == IP->RLAmt && (I->RLAmt % `32`) == IN->RLAmt &&
2067	IP->EndIdx == `31` && IN->StartIdx == `0` && I != IP &&
2068	IsAllLow32(*I)) {
2069
2070	LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2071	<< " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2072	<< ", " << I->EndIdx
2073	<< "] with 32-bit replicated groups with ranges ["
2074	<< IP->StartIdx << ", " << IP->EndIdx << "] and ["
2075	<< IN->StartIdx << ", " << IN->EndIdx << "]\n");
2076
2077	if (IP == IN) {
2078	// There is only one other group; change it to cover the whole
2079	// range (backward, so that it can still be Repl32 but cover the
2080	// whole 64-bit range).
2081	IP->StartIdx = `31`;
2082	IP->EndIdx = `30`;
2083	IP->Repl32CR = IP->Repl32CR \|\| I->RLAmt >= `32`;
2084	IP->Repl32Coalesced = true;
2085	I = BitGroups.erase(CI: I);
2086	} else {
2087	// There are two separate groups, one before this group and one
2088	// after us (at the beginning). We're going to remove this group,
2089	// but also the group at the very beginning.
2090	IP->EndIdx = IN->EndIdx;
2091	IP->Repl32CR = IP->Repl32CR \|\| IN->Repl32CR \|\| I->RLAmt >= `32`;
2092	IP->Repl32Coalesced = true;
2093	I = BitGroups.erase(CI: I);
2094	BitGroups.erase(CI: BitGroups.begin());
2095	}
2096
2097	// This must be the last group in the vector (and we might have
2098	// just invalidated the iterator above), so break here.
2099	break;
2100	}
2101	}
2102	}
2103
2104	++I;
2105	}
2106	}
2107
2108	SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2109	return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2110	}
2111
2112	uint64_t getZerosMask() {
2113	uint64_t Mask = `0`;
2114	for (unsigned i = `0`; i < Bits.size(); ++i) {
2115	if (Bits [i].hasValue())
2116	continue;
2117	Mask \|= (UINT64_C(`1`) << i);
2118	}
2119
2120	return ~Mask;
2121	}
2122
2123	// This method extends an input value to 64 bit if input is 32-bit integer.
2124	// While selecting instructions in BitPermutationSelector in 64-bit mode,
2125	// an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2126	// In such case, we extend it to 64 bit to be consistent with other values.
2127	SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2128	if (V.getValueSizeInBits() == `64`)
2129	return V;
2130
2131	assert(V.getValueSizeInBits() == `32`);
2132	SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2133	SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2134	MVT::i64), `0`);
2135	SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2136	MVT::i64, ImDef, V,
2137	SubRegIdx), `0`);
2138	return ExtVal;
2139	}
2140
2141	SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2142	if (V.getValueSizeInBits() == `32`)
2143	return V;
2144
2145	assert(V.getValueSizeInBits() == `64`);
2146	SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2147	SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2148	MVT::i32, V, SubRegIdx), `0`);
2149	return SubVal;
2150	}
2151
2152	// Depending on the number of groups for a particular value, it might be
2153	// better to rotate, mask explicitly (using andi/andis), and then or the
2154	// result. Select this part of the result first.
2155	void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2156	if (BPermRewriterNoMasking)
2157	return;
2158
2159	for (ValueRotInfo &VRI : ValueRotsVec) {
2160	unsigned Mask = `0`;
2161	for (unsigned i = `0`; i < Bits.size(); ++i) {
2162	if (!Bits [i].hasValue() \|\| Bits [i].getValue() != VRI.V)
2163	continue;
2164	if (RLAmt [i] != VRI.RLAmt)
2165	continue;
2166	Mask \|= (`1u` << i);
2167	}
2168
2169	// Compute the masks for andi/andis that would be necessary.
2170	unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> `16`;
2171	assert((ANDIMask != `0` \|\| ANDISMask != `0`) &&
2172	"No set bits in mask for value bit groups");
2173	bool NeedsRotate = VRI.RLAmt != `0`;
2174
2175	// We're trying to minimize the number of instructions. If we have one
2176	// group, using one of andi/andis can break even. If we have three
2177	// groups, we can use both andi and andis and break even (to use both
2178	// andi and andis we also need to or the results together). We need four
2179	// groups if we also need to rotate. To use andi/andis we need to do more
2180	// than break even because rotate-and-mask instructions tend to be easier
2181	// to schedule.
2182
2183	// FIXME: We've biased here against using andi/andis, which is right for
2184	// POWER cores, but not optimal everywhere. For example, on the A2,
2185	// andi/andis have single-cycle latency whereas the rotate-and-mask
2186	// instructions take two cycles, and it would be better to bias toward
2187	// andi/andis in break-even cases.
2188
2189	unsigned NumAndInsts = (unsigned) NeedsRotate +
2190	(unsigned) (ANDIMask != `0`) +
2191	(unsigned) (ANDISMask != `0`) +
2192	(unsigned) (ANDIMask != `0` && ANDISMask != `0`) +
2193	(unsigned) (bool) Res;
2194
2195	LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2196	<< " RL: " << VRI.RLAmt << ":"
2197	<< "\n\t\t\tisel using masking: " << NumAndInsts
2198	<< " using rotates: " << VRI.NumGroups << "\n");
2199
2200	if (NumAndInsts >= VRI.NumGroups)
2201	continue;
2202
2203	LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2204
2205	if (InstCnt) *InstCnt += NumAndInsts;
2206
2207	SDValue VRot;
2208	if (VRI.RLAmt) {
2209	SDValue Ops[] =
2210	{ TruncateToInt32(V: VRI.V, dl), getI32Imm(Imm: VRI.RLAmt, dl),
2211	getI32Imm(Imm: `0`, dl), getI32Imm(Imm: `31`, dl) };
2212	VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2213	Ops), `0`);
2214	} else {
2215	VRot = TruncateToInt32(V: VRI.V, dl);
2216	}
2217
2218	SDValue ANDIVal, ANDISVal;
2219	if (ANDIMask != `0`)
2220	ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2221	VRot, getI32Imm(ANDIMask, dl)),
2222	`0`);
2223	if (ANDISMask != `0`)
2224	ANDISVal =
2225	SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2226	getI32Imm(ANDISMask, dl)),
2227	`0`);
2228
2229	SDValue TotalVal;
2230	if (!ANDIVal)
2231	TotalVal = ANDISVal;
2232	else if (!ANDISVal)
2233	TotalVal = ANDIVal;
2234	else
2235	TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2236	ANDIVal, ANDISVal), `0`);
2237
2238	if (!Res)
2239	Res = TotalVal;
2240	else
2241	Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242	Res, TotalVal), `0`);
2243
2244	// Now, remove all groups with this underlying value and rotation
2245	// factor.
2246	eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2247	return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2248	});
2249	}
2250	}
2251
2252	// Instruction selection for the 32-bit case.
2253	SDNode Select32(SDNode N, bool LateMask, unsigned *InstCnt) {
2254	SDLoc dl(N);
2255	SDValue Res;
2256
2257	if (InstCnt) *InstCnt = `0`;
2258
2259	// Take care of cases that should use andi/andis first.
2260	SelectAndParts32(dl, Res, InstCnt);
2261
2262	// If we've not yet selected a 'starting' instruction, and we have no zeros
2263	// to fill in, select the (Value, RLAmt) with the highest priority (largest
2264	// number of groups), and start with this rotated value.
2265	if ((!NeedMask \|\| LateMask) && !Res) {
2266	ValueRotInfo &VRI = ValueRotsVec [`0`];
2267	if (VRI.RLAmt) {
2268	if (InstCnt) *InstCnt += `1`;
2269	SDValue Ops[] =
2270	{ TruncateToInt32(V: VRI.V, dl), getI32Imm(Imm: VRI.RLAmt, dl),
2271	getI32Imm(Imm: `0`, dl), getI32Imm(Imm: `31`, dl) };
2272	Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2273	`0`);
2274	} else {
2275	Res = TruncateToInt32(V: VRI.V, dl);
2276	}
2277
2278	// Now, remove all groups with this underlying value and rotation factor.
2279	eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2280	return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2281	});
2282	}
2283
2284	if (InstCnt) *InstCnt += BitGroups.size();
2285
2286	// Insert the other groups (one at a time).
2287	for (auto &BG : BitGroups) {
2288	if (!Res) {
2289	SDValue Ops[] =
2290	{ TruncateToInt32(V: BG.V, dl), getI32Imm(Imm: BG.RLAmt, dl),
2291	getI32Imm(Imm: Bits.size() - BG.EndIdx - `1`, dl),
2292	getI32Imm(Imm: Bits.size() - BG.StartIdx - `1`, dl) };
2293	Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), `0`);
2294	} else {
2295	SDValue Ops[] =
2296	{ Res, TruncateToInt32(V: BG.V, dl), getI32Imm(Imm: BG.RLAmt, dl),
2297	getI32Imm(Imm: Bits.size() - BG.EndIdx - `1`, dl),
2298	getI32Imm(Imm: Bits.size() - BG.StartIdx - `1`, dl) };
2299	Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), `0`);
2300	}
2301	}
2302
2303	if (LateMask) {
2304	unsigned Mask = (unsigned) getZerosMask();
2305
2306	unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> `16`;
2307	assert((ANDIMask != `0` \|\| ANDISMask != `0`) &&
2308	"No set bits in zeros mask?");
2309
2310	if (InstCnt) InstCnt += (unsigned*) (ANDIMask != `0`) +
2311	(unsigned) (ANDISMask != `0`) +
2312	(unsigned) (ANDIMask != `0` && ANDISMask != `0`);
2313
2314	SDValue ANDIVal, ANDISVal;
2315	if (ANDIMask != `0`)
2316	ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2317	Res, getI32Imm(ANDIMask, dl)),
2318	`0`);
2319	if (ANDISMask != `0`)
2320	ANDISVal =
2321	SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2322	getI32Imm(ANDISMask, dl)),
2323	`0`);
2324
2325	if (!ANDIVal)
2326	Res = ANDISVal;
2327	else if (!ANDISVal)
2328	Res = ANDIVal;
2329	else
2330	Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2331	ANDIVal, ANDISVal), `0`);
2332	}
2333
2334	return Res.getNode();
2335	}
2336
2337	unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2338	unsigned MaskStart, unsigned MaskEnd,
2339	bool IsIns) {
2340	// In the notation used by the instructions, 'start' and 'end' are reversed
2341	// because bits are counted from high to low order.
2342	unsigned InstMaskStart = `64` - MaskEnd - `1`,
2343	InstMaskEnd = `64` - MaskStart - `1`;
2344
2345	if (Repl32)
2346	return `1`;
2347
2348	if ((!IsIns && (InstMaskEnd == `63` \|\| InstMaskStart == `0`)) \|\|
2349	InstMaskEnd == `63` - RLAmt)
2350	return `1`;
2351
2352	return `2`;
2353	}
2354
2355	// For 64-bit values, not all combinations of rotates and masks are
2356	// available. Produce one if it is available.
2357	SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2358	bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2359	unsigned InstCnt = nullptr*) {
2360	// In the notation used by the instructions, 'start' and 'end' are reversed
2361	// because bits are counted from high to low order.
2362	unsigned InstMaskStart = `64` - MaskEnd - `1`,
2363	InstMaskEnd = `64` - MaskStart - `1`;
2364
2365	if (InstCnt) *InstCnt += `1`;
2366
2367	if (Repl32) {
2368	// This rotation amount assumes that the lower 32 bits of the quantity
2369	// are replicated in the high 32 bits by the rotation operator (which is
2370	// done by rlwinm and friends).
2371	assert(InstMaskStart >= `32` && "Mask cannot start out of range");
2372	assert(InstMaskEnd >= `32` && "Mask cannot end out of range");
2373	SDValue Ops[] =
2374	{ ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2375	getI32Imm(Imm: InstMaskStart - `32`, dl), getI32Imm(Imm: InstMaskEnd - `32`, dl) };
2376	return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2377	Ops), `0`);
2378	}
2379
2380	if (InstMaskEnd == `63`) {
2381	SDValue Ops[] =
2382	{ ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2383	getI32Imm(Imm: InstMaskStart, dl) };
2384	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), `0`);
2385	}
2386
2387	if (InstMaskStart == `0`) {
2388	SDValue Ops[] =
2389	{ ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2390	getI32Imm(Imm: InstMaskEnd, dl) };
2391	return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), `0`);
2392	}
2393
2394	if (InstMaskEnd == `63` - RLAmt) {
2395	SDValue Ops[] =
2396	{ ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2397	getI32Imm(Imm: InstMaskStart, dl) };
2398	return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), `0`);
2399	}
2400
2401	// We cannot do this with a single instruction, so we'll use two. The
2402	// problem is that we're not free to choose both a rotation amount and mask
2403	// start and end independently. We can choose an arbitrary mask start and
2404	// end, but then the rotation amount is fixed. Rotation, however, can be
2405	// inverted, and so by applying an "inverse" rotation first, we can get the
2406	// desired result.
2407	if (InstCnt) *InstCnt += `1`;
2408
2409	// The rotation mask for the second instruction must be MaskStart.
2410	unsigned RLAmt2 = MaskStart;
2411	// The first instruction must rotate V so that the overall rotation amount
2412	// is RLAmt.
2413	unsigned RLAmt1 = (`64` + RLAmt - RLAmt2) % `64`;
2414	if (RLAmt1)
2415	V = SelectRotMask64(V, dl, RLAmt: RLAmt1, Repl32: false, MaskStart: `0`, MaskEnd: `63`);
2416	return SelectRotMask64(V, dl, RLAmt: RLAmt2, Repl32: false, MaskStart, MaskEnd);
2417	}
2418
2419	// For 64-bit values, not all combinations of rotates and masks are
2420	// available. Produce a rotate-mask-and-insert if one is available.
2421	SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2422	unsigned RLAmt, bool Repl32, unsigned MaskStart,
2423	unsigned MaskEnd, unsigned InstCnt = nullptr*) {
2424	// In the notation used by the instructions, 'start' and 'end' are reversed
2425	// because bits are counted from high to low order.
2426	unsigned InstMaskStart = `64` - MaskEnd - `1`,
2427	InstMaskEnd = `64` - MaskStart - `1`;
2428
2429	if (InstCnt) *InstCnt += `1`;
2430
2431	if (Repl32) {
2432	// This rotation amount assumes that the lower 32 bits of the quantity
2433	// are replicated in the high 32 bits by the rotation operator (which is
2434	// done by rlwinm and friends).
2435	assert(InstMaskStart >= `32` && "Mask cannot start out of range");
2436	assert(InstMaskEnd >= `32` && "Mask cannot end out of range");
2437	SDValue Ops[] =
2438	{ ExtendToInt64(V: Base, dl), ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2439	getI32Imm(Imm: InstMaskStart - `32`, dl), getI32Imm(Imm: InstMaskEnd - `32`, dl) };
2440	return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2441	Ops), `0`);
2442	}
2443
2444	if (InstMaskEnd == `63` - RLAmt) {
2445	SDValue Ops[] =
2446	{ ExtendToInt64(V: Base, dl), ExtendToInt64(V, dl), getI32Imm(Imm: RLAmt, dl),
2447	getI32Imm(Imm: InstMaskStart, dl) };
2448	return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), `0`);
2449	}
2450
2451	// We cannot do this with a single instruction, so we'll use two. The
2452	// problem is that we're not free to choose both a rotation amount and mask
2453	// start and end independently. We can choose an arbitrary mask start and
2454	// end, but then the rotation amount is fixed. Rotation, however, can be
2455	// inverted, and so by applying an "inverse" rotation first, we can get the
2456	// desired result.
2457	if (InstCnt) *InstCnt += `1`;
2458
2459	// The rotation mask for the second instruction must be MaskStart.
2460	unsigned RLAmt2 = MaskStart;
2461	// The first instruction must rotate V so that the overall rotation amount
2462	// is RLAmt.
2463	unsigned RLAmt1 = (`64` + RLAmt - RLAmt2) % `64`;
2464	if (RLAmt1)
2465	V = SelectRotMask64(V, dl, RLAmt: RLAmt1, Repl32: false, MaskStart: `0`, MaskEnd: `63`);
2466	return SelectRotMaskIns64(Base, V, dl, RLAmt: RLAmt2, Repl32: false, MaskStart, MaskEnd);
2467	}
2468
2469	void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2470	if (BPermRewriterNoMasking)
2471	return;
2472
2473	// The idea here is the same as in the 32-bit version, but with additional
2474	// complications from the fact that Repl32 might be true. Because we
2475	// aggressively convert bit groups to Repl32 form (which, for small
2476	// rotation factors, involves no other change), and then coalesce, it might
2477	// be the case that a single 64-bit masking operation could handle both
2478	// some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2479	// form allowed coalescing, then we must use a 32-bit rotaton in order to
2480	// completely capture the new combined bit group.
2481
2482	for (ValueRotInfo &VRI : ValueRotsVec) {
2483	uint64_t Mask = `0`;
2484
2485	// We need to add to the mask all bits from the associated bit groups.
2486	// If Repl32 is false, we need to add bits from bit groups that have
2487	// Repl32 true, but are trivially convertable to Repl32 false. Such a
2488	// group is trivially convertable if it overlaps only with the lower 32
2489	// bits, and the group has not been coalesced.
2490	auto MatchingBG = [VRI](const BitGroup &BG) {
2491	if (VRI.V != BG.V)
2492	return false;
2493
2494	unsigned EffRLAmt = BG.RLAmt;
2495	if (!VRI.Repl32 && BG.Repl32) {
2496	if (BG.StartIdx < `32` && BG.EndIdx < `32` && BG.StartIdx <= BG.EndIdx &&
2497	!BG.Repl32Coalesced) {
2498	if (BG.Repl32CR)
2499	EffRLAmt += `32`;
2500	} else {
2501	return false;
2502	}
2503	} else if (VRI.Repl32 != BG.Repl32) {
2504	return false;
2505	}
2506
2507	return VRI.RLAmt == EffRLAmt;
2508	};
2509
2510	for (auto &BG : BitGroups) {
2511	if (!MatchingBG(BG))
2512	continue;
2513
2514	if (BG.StartIdx <= BG.EndIdx) {
2515	for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2516	Mask \|= (UINT64_C(`1`) << i);
2517	} else {
2518	for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2519	Mask \|= (UINT64_C(`1`) << i);
2520	for (unsigned i = `0`; i <= BG.EndIdx; ++i)
2521	Mask \|= (UINT64_C(`1`) << i);
2522	}
2523	}
2524
2525	// We can use the 32-bit andi/andis technique if the mask does not
2526	// require any higher-order bits. This can save an instruction compared
2527	// to always using the general 64-bit technique.
2528	bool Use32BitInsts = isUInt<`32`>(x: Mask);
2529	// Compute the masks for andi/andis that would be necessary.
2530	unsigned ANDIMask = (Mask & UINT16_MAX),
2531	ANDISMask = (Mask >> `16`) & UINT16_MAX;
2532
2533	bool NeedsRotate = VRI.RLAmt \|\| (VRI.Repl32 && !isUInt<`32`>(x: Mask));
2534
2535	unsigned NumAndInsts = (unsigned) NeedsRotate +
2536	(unsigned) (bool) Res;
2537	unsigned NumOfSelectInsts = `0`;
2538	selectI64Imm(CurDAG, dl, Imm: Mask, InstCnt: &NumOfSelectInsts);
2539	assert(NumOfSelectInsts > `0` && "Failed to select an i64 constant.");
2540	if (Use32BitInsts)
2541	NumAndInsts += (unsigned) (ANDIMask != `0`) + (unsigned) (ANDISMask != `0`) +
2542	(unsigned) (ANDIMask != `0` && ANDISMask != `0`);
2543	else
2544	NumAndInsts += NumOfSelectInsts + / and / `1`;
2545
2546	unsigned NumRLInsts = `0`;
2547	bool FirstBG = true;
2548	bool MoreBG = false;
2549	for (auto &BG : BitGroups) {
2550	if (!MatchingBG(BG)) {
2551	MoreBG = true;
2552	continue;
2553	}
2554	NumRLInsts +=
2555	SelectRotMask64Count(RLAmt: BG.RLAmt, Repl32: BG.Repl32, MaskStart: BG.StartIdx, MaskEnd: BG.EndIdx,
2556	IsIns: !FirstBG);
2557	FirstBG = false;
2558	}
2559
2560	LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2561	<< " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2562	<< "\n\t\t\tisel using masking: " << NumAndInsts
2563	<< " using rotates: " << NumRLInsts << "\n");
2564
2565	// When we'd use andi/andis, we bias toward using the rotates (andi only
2566	// has a record form, and is cracked on POWER cores). However, when using
2567	// general 64-bit constant formation, bias toward the constant form,
2568	// because that exposes more opportunities for CSE.
2569	if (NumAndInsts > NumRLInsts)
2570	continue;
2571	// When merging multiple bit groups, instruction or is used.
2572	// But when rotate is used, rldimi can inert the rotated value into any
2573	// register, so instruction or can be avoided.
2574	if ((Use32BitInsts \|\| MoreBG) && NumAndInsts == NumRLInsts)
2575	continue;
2576
2577	LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2578
2579	if (InstCnt) *InstCnt += NumAndInsts;
2580
2581	SDValue VRot;
2582	// We actually need to generate a rotation if we have a non-zero rotation
2583	// factor or, in the Repl32 case, if we care about any of the
2584	// higher-order replicated bits. In the latter case, we generate a mask
2585	// backward so that it actually includes the entire 64 bits.
2586	if (VRI.RLAmt \|\| (VRI.Repl32 && !isUInt<`32`>(x: Mask)))
2587	VRot = SelectRotMask64(V: VRI.V, dl, RLAmt: VRI.RLAmt, Repl32: VRI.Repl32,
2588	MaskStart: VRI.Repl32 ? `31` : `0`, MaskEnd: VRI.Repl32 ? `30` : `63`);
2589	else
2590	VRot = VRI.V;
2591
2592	SDValue TotalVal;
2593	if (Use32BitInsts) {
2594	assert((ANDIMask != `0` \|\| ANDISMask != `0`) &&
2595	"No set bits in mask when using 32-bit ands for 64-bit value");
2596
2597	SDValue ANDIVal, ANDISVal;
2598	if (ANDIMask != `0`)
2599	ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2600	ExtendToInt64(VRot, dl),
2601	getI32Imm(ANDIMask, dl)),
2602	`0`);
2603	if (ANDISMask != `0`)
2604	ANDISVal =
2605	SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2606	ExtendToInt64(VRot, dl),
2607	getI32Imm(ANDISMask, dl)),
2608	`0`);
2609
2610	if (!ANDIVal)
2611	TotalVal = ANDISVal;
2612	else if (!ANDISVal)
2613	TotalVal = ANDIVal;
2614	else
2615	TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2616	ExtendToInt64(ANDIVal, dl), ANDISVal), `0`);
2617	} else {
2618	TotalVal = SDValue (selectI64Imm(CurDAG, dl, Imm: Mask), `0`);
2619	TotalVal =
2620	SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2621	ExtendToInt64(VRot, dl), TotalVal),
2622	`0`);
2623	}
2624
2625	if (!Res)
2626	Res = TotalVal;
2627	else
2628	Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2629	ExtendToInt64(Res, dl), TotalVal),
2630	`0`);
2631
2632	// Now, remove all groups with this underlying value and rotation
2633	// factor.
2634	eraseMatchingBitGroups(F: MatchingBG);
2635	}
2636	}
2637
2638	// Instruction selection for the 64-bit case.
2639	SDNode Select64(SDNode N, bool LateMask, unsigned *InstCnt) {
2640	SDLoc dl(N);
2641	SDValue Res;
2642
2643	if (InstCnt) *InstCnt = `0`;
2644
2645	// Take care of cases that should use andi/andis first.
2646	SelectAndParts64(dl, Res, InstCnt);
2647
2648	// If we've not yet selected a 'starting' instruction, and we have no zeros
2649	// to fill in, select the (Value, RLAmt) with the highest priority (largest
2650	// number of groups), and start with this rotated value.
2651	if ((!NeedMask \|\| LateMask) && !Res) {
2652	// If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2653	// groups will come first, and so the VRI representing the largest number
2654	// of groups might not be first (it might be the first Repl32 groups).
2655	unsigned MaxGroupsIdx = `0`;
2656	if (!ValueRotsVec [`0`].Repl32) {
2657	for (unsigned i = `0`, ie = ValueRotsVec.size(); i < ie; ++i)
2658	if (ValueRotsVec [i].Repl32) {
2659	if (ValueRotsVec [i].NumGroups > ValueRotsVec [`0`].NumGroups)
2660	MaxGroupsIdx = i;
2661	break;
2662	}
2663	}
2664
2665	ValueRotInfo &VRI = ValueRotsVec [MaxGroupsIdx];
2666	bool NeedsRotate = false;
2667	if (VRI.RLAmt) {
2668	NeedsRotate = true;
2669	} else if (VRI.Repl32) {
2670	for (auto &BG : BitGroups) {
2671	if (BG.V != VRI.V \|\| BG.RLAmt != VRI.RLAmt \|\|
2672	BG.Repl32 != VRI.Repl32)
2673	continue;
2674
2675	// We don't need a rotate if the bit group is confined to the lower
2676	// 32 bits.
2677	if (BG.StartIdx < `32` && BG.EndIdx < `32` && BG.StartIdx < BG.EndIdx)
2678	continue;
2679
2680	NeedsRotate = true;
2681	break;
2682	}
2683	}
2684
2685	if (NeedsRotate)
2686	Res = SelectRotMask64(V: VRI.V, dl, RLAmt: VRI.RLAmt, Repl32: VRI.Repl32,
2687	MaskStart: VRI.Repl32 ? `31` : `0`, MaskEnd: VRI.Repl32 ? `30` : `63`,
2688	InstCnt);
2689	else
2690	Res = VRI.V;
2691
2692	// Now, remove all groups with this underlying value and rotation factor.
2693	if (Res)
2694	eraseMatchingBitGroups(F: [VRI](const BitGroup &BG) {
2695	return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2696	BG.Repl32 == VRI.Repl32;
2697	});
2698	}
2699
2700	// Because 64-bit rotates are more flexible than inserts, we might have a
2701	// preference regarding which one we do first (to save one instruction).
2702	if (!Res)
2703	for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2704	if (SelectRotMask64Count(RLAmt: I->RLAmt, Repl32: I->Repl32, MaskStart: I->StartIdx, MaskEnd: I->EndIdx,
2705	IsIns: false) <
2706	SelectRotMask64Count(RLAmt: I->RLAmt, Repl32: I->Repl32, MaskStart: I->StartIdx, MaskEnd: I->EndIdx,
2707	IsIns: true)) {
2708	if (I != BitGroups.begin()) {
2709	BitGroup BG = *I;
2710	BitGroups.erase(CI: I);
2711	BitGroups.insert(I: BitGroups.begin(), Elt: BG);
2712	}
2713
2714	break;
2715	}
2716	}
2717
2718	// Insert the other groups (one at a time).
2719	for (auto &BG : BitGroups) {
2720	if (!Res)
2721	Res = SelectRotMask64(V: BG.V, dl, RLAmt: BG.RLAmt, Repl32: BG.Repl32, MaskStart: BG.StartIdx,
2722	MaskEnd: BG.EndIdx, InstCnt);
2723	else
2724	Res = SelectRotMaskIns64(Base: Res, V: BG.V, dl, RLAmt: BG.RLAmt, Repl32: BG.Repl32,
2725	MaskStart: BG.StartIdx, MaskEnd: BG.EndIdx, InstCnt);
2726	}
2727
2728	if (LateMask) {
2729	uint64_t Mask = getZerosMask();
2730
2731	// We can use the 32-bit andi/andis technique if the mask does not
2732	// require any higher-order bits. This can save an instruction compared
2733	// to always using the general 64-bit technique.
2734	bool Use32BitInsts = isUInt<`32`>(x: Mask);
2735	// Compute the masks for andi/andis that would be necessary.
2736	unsigned ANDIMask = (Mask & UINT16_MAX),
2737	ANDISMask = (Mask >> `16`) & UINT16_MAX;
2738
2739	if (Use32BitInsts) {
2740	assert((ANDIMask != `0` \|\| ANDISMask != `0`) &&
2741	"No set bits in mask when using 32-bit ands for 64-bit value");
2742
2743	if (InstCnt) InstCnt += (unsigned*) (ANDIMask != `0`) +
2744	(unsigned) (ANDISMask != `0`) +
2745	(unsigned) (ANDIMask != `0` && ANDISMask != `0`);
2746
2747	SDValue ANDIVal, ANDISVal;
2748	if (ANDIMask != `0`)
2749	ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2750	ExtendToInt64(Res, dl),
2751	getI32Imm(ANDIMask, dl)),
2752	`0`);
2753	if (ANDISMask != `0`)
2754	ANDISVal =
2755	SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2756	ExtendToInt64(Res, dl),
2757	getI32Imm(ANDISMask, dl)),
2758	`0`);
2759
2760	if (!ANDIVal)
2761	Res = ANDISVal;
2762	else if (!ANDISVal)
2763	Res = ANDIVal;
2764	else
2765	Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2766	ExtendToInt64(ANDIVal, dl), ANDISVal), `0`);
2767	} else {
2768	unsigned NumOfSelectInsts = `0`;
2769	SDValue MaskVal =
2770	SDValue (selectI64Imm(CurDAG, dl, Imm: Mask, InstCnt: &NumOfSelectInsts), `0`);
2771	Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2772	ExtendToInt64(Res, dl), MaskVal),
2773	`0`);
2774	if (InstCnt)
2775	InstCnt += NumOfSelectInsts + /* and / `1`;
2776	}
2777	}
2778
2779	return Res.getNode();
2780	}
2781
2782	SDNode Select(SDNode N, bool LateMask, unsigned InstCnt = nullptr*) {
2783	// Fill in BitGroups.
2784	collectBitGroups(LateMask);
2785	if (BitGroups.empty())
2786	return nullptr;
2787
2788	// For 64-bit values, figure out when we can use 32-bit instructions.
2789	if (Bits.size() == `64`)
2790	assignRepl32BitGroups();
2791
2792	// Fill in ValueRotsVec.
2793	collectValueRotInfo();
2794
2795	if (Bits.size() == `32`) {
2796	return Select32(N, LateMask, InstCnt);
2797	} else {
2798	assert(Bits.size() == `64` && "Not 64 bits here?");
2799	return Select64(N, LateMask, InstCnt);
2800	}
2801
2802	return nullptr;
2803	}
2804
2805	void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2806	erase_if(C&: BitGroups, P: F);
2807	}
2808
2809	SmallVector<ValueBit, `64`> Bits;
2810
2811	bool NeedMask = false;
2812	SmallVector<unsigned, `64`> RLAmt;
2813
2814	SmallVector<BitGroup, `16`> BitGroups;
2815
2816	DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2817	SmallVector<ValueRotInfo, `16`> ValueRotsVec;
2818
2819	SelectionDAG CurDAG = nullptr*;
2820
2821	public:
2822	BitPermutationSelector(SelectionDAG *DAG)
2823	: CurDAG(DAG) {}
2824
2825	// Here we try to match complex bit permutations into a set of
2826	// rotate-and-shift/shift/and/or instructions, using a set of heuristics
2827	// known to produce optimal code for common cases (like i32 byte swapping).
2828	SDNode Select(SDNode N) {
2829	Memoizer.clear();
2830	auto Result =
2831	getValueBits(V: SDValue (N, `0`), NumBits: N->getValueType(ResNo: `0`).getSizeInBits());
2832	if (!Result.first)
2833	return nullptr;
2834	Bits = std::move(*Result.second);
2835
2836	LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2837	" selection for: ");
2838	LLVM_DEBUG(N->dump(CurDAG));
2839
2840	// Fill it RLAmt and set NeedMask.
2841	computeRotationAmounts();
2842
2843	if (!NeedMask)
2844	return Select(N, LateMask: false);
2845
2846	// We currently have two techniques for handling results with zeros: early
2847	// masking (the default) and late masking. Late masking is sometimes more
2848	// efficient, but because the structure of the bit groups is different, it
2849	// is hard to tell without generating both and comparing the results. With
2850	// late masking, we ignore zeros in the resulting value when inserting each
2851	// set of bit groups, and then mask in the zeros at the end. With early
2852	// masking, we only insert the non-zero parts of the result at every step.
2853
2854	unsigned InstCnt = `0`, InstCntLateMask = `0`;
2855	LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2856	SDNode RN = Select(N, LateMask: false*, InstCnt: &InstCnt);
2857	LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2858
2859	LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2860	SDNode RNLM = Select(N, LateMask: true*, InstCnt: &InstCntLateMask);
2861	LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2862	<< " instructions\n");
2863
2864	if (InstCnt <= InstCntLateMask) {
2865	LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2866	return RN;
2867	}
2868
2869	LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2870	return RNLM;
2871	}
2872	};
2873
2874	class IntegerCompareEliminator {
2875	SelectionDAG *CurDAG;
2876	PPCDAGToDAGISel *S;
2877	// Conversion type for interpreting results of a 32-bit instruction as
2878	// a 64-bit value or vice versa.
2879	enum ExtOrTruncConversion { Ext, Trunc };
2880
2881	// Modifiers to guide how an ISD::SETCC node's result is to be computed
2882	// in a GPR.
2883	// ZExtOrig - use the original condition code, zero-extend value
2884	// ZExtInvert - invert the condition code, zero-extend value
2885	// SExtOrig - use the original condition code, sign-extend value
2886	// SExtInvert - invert the condition code, sign-extend value
2887	enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2888
2889	// Comparisons against zero to emit GPR code sequences for. Each of these
2890	// sequences may need to be emitted for two or more equivalent patterns.
2891	// For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2892	// matters as well as the extension type: sext (-1/0), zext (1/0).
2893	// GEZExt - (zext (LHS >= 0))
2894	// GESExt - (sext (LHS >= 0))
2895	// LEZExt - (zext (LHS <= 0))
2896	// LESExt - (sext (LHS <= 0))
2897	enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2898
2899	SDNode tryEXTEND(SDNode N);
2900	SDNode tryLogicOpOfCompares(SDNode N);
2901	SDValue computeLogicOpInGPR(SDValue LogicOp);
2902	SDValue signExtendInputIfNeeded(SDValue Input);
2903	SDValue zeroExtendInputIfNeeded(SDValue Input);
2904	SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2905	SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2906	ZeroCompare CmpTy);
2907	SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2908	int64_t RHSValue, SDLoc dl);
2909	SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2910	int64_t RHSValue, SDLoc dl);
2911	SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2912	int64_t RHSValue, SDLoc dl);
2913	SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914	int64_t RHSValue, SDLoc dl);
2915	SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2916
2917	public:
2918	IntegerCompareEliminator(SelectionDAG *DAG,
2919	PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2920	assert(CurDAG->getTargetLoweringInfo()
2921	.getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == `64` &&
2922	"Only expecting to use this on 64 bit targets.");
2923	}
2924	SDNode Select(SDNode N) {
2925	if (CmpInGPR == ICGPR_None)
2926	return nullptr;
2927	switch (N->getOpcode()) {
2928	default: break;
2929	case ISD::ZERO_EXTEND:
2930	if (CmpInGPR == ICGPR_Sext \|\| CmpInGPR == ICGPR_SextI32 \|\|
2931	CmpInGPR == ICGPR_SextI64)
2932	return nullptr;
2933	[[fallthrough]];
2934	case ISD::SIGN_EXTEND:
2935	if (CmpInGPR == ICGPR_Zext \|\| CmpInGPR == ICGPR_ZextI32 \|\|
2936	CmpInGPR == ICGPR_ZextI64)
2937	return nullptr;
2938	return tryEXTEND(N);
2939	case ISD::AND:
2940	case ISD::OR:
2941	case ISD::XOR:
2942	return tryLogicOpOfCompares(N);
2943	}
2944	return nullptr;
2945	}
2946	};
2947
2948	// The obvious case for wanting to keep the value in a GPR. Namely, the
2949	// result of the comparison is actually needed in a GPR.
2950	SDNode IntegerCompareEliminator::tryEXTEND(SDNode N) {
2951	assert((N->getOpcode() == ISD::ZERO_EXTEND \|\|
2952	N->getOpcode() == ISD::SIGN_EXTEND) &&
2953	"Expecting a zero/sign extend node!");
2954	SDValue WideRes;
2955	// If we are zero-extending the result of a logical operation on i1
2956	// values, we can keep the values in GPRs.
2957	if (ISD::isBitwiseLogicOp(N->getOperand(`0`).getOpcode()) &&
2958	N->getOperand(`0`).getValueType() == MVT::i1 &&
2959	N->getOpcode() == ISD::ZERO_EXTEND)
2960	WideRes = computeLogicOpInGPR(LogicOp: N->getOperand(Num: `0`));
2961	else if (N->getOperand(Num: `0`).getOpcode() != ISD::SETCC)
2962	return nullptr;
2963	else
2964	WideRes =
2965	getSETCCInGPR(Compare: N->getOperand(Num: `0`),
2966	ConvOpts: N->getOpcode() == ISD::SIGN_EXTEND ?
2967	SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2968
2969	if (!WideRes)
2970	return nullptr;
2971
2972	SDLoc dl(N);
2973	bool Input32Bit = WideRes.getValueType() == MVT::i32;
2974	bool Output32Bit = N->getValueType(`0`) == MVT::i32;
2975
2976	NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? `1` : `0`;
2977	NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? `0` : `1`;
2978
2979	SDValue ConvOp = WideRes;
2980	if (Input32Bit != Output32Bit)
2981	ConvOp = addExtOrTrunc(NatWidthRes: WideRes, Conv: Input32Bit ? ExtOrTruncConversion::Ext :
2982	ExtOrTruncConversion::Trunc);
2983	return ConvOp.getNode();
2984	}
2985
2986	// Attempt to perform logical operations on the results of comparisons while
2987	// keeping the values in GPRs. Without doing so, these would end up being
2988	// lowered to CR-logical operations which suffer from significant latency and
2989	// low ILP.
2990	SDNode IntegerCompareEliminator::tryLogicOpOfCompares(SDNode N) {
2991	if (N->getValueType(`0`) != MVT::i1)
2992	return nullptr;
2993	assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
2994	"Expected a logic operation on setcc results.");
2995	SDValue LoweredLogical = computeLogicOpInGPR(LogicOp: SDValue (N, `0`));
2996	if (!LoweredLogical)
2997	return nullptr;
2998
2999	SDLoc dl(N);
3000	bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3001	unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3002	SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3003	SDValue LHS = LoweredLogical.getOperand(i: `0`);
3004	SDValue RHS = LoweredLogical.getOperand(i: `1`);
3005	SDValue WideOp;
3006	SDValue OpToConvToRecForm;
3007
3008	// Look through any 32-bit to 64-bit implicit extend nodes to find the
3009	// opcode that is input to the XORI.
3010	if (IsBitwiseNegate &&
3011	LoweredLogical.getOperand(`0`).getMachineOpcode() == PPC::INSERT_SUBREG)
3012	OpToConvToRecForm = LoweredLogical.getOperand(i: `0`).getOperand(i: `1`);
3013	else if (IsBitwiseNegate)
3014	// If the input to the XORI isn't an extension, that's what we're after.
3015	OpToConvToRecForm = LoweredLogical.getOperand(i: `0`);
3016	else
3017	// If this is not an XORI, it is a reg-reg logical op and we can convert
3018	// it to record-form.
3019	OpToConvToRecForm = LoweredLogical;
3020
3021	// Get the record-form version of the node we're looking to use to get the
3022	// CR result from.
3023	uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3024	int NewOpc = PPCInstrInfo::getRecordFormOpcode(Opcode: NonRecOpc);
3025
3026	// Convert the right node to record-form. This is either the logical we're
3027	// looking at or it is the input node to the negation (if we're looking at
3028	// a bitwise negation).
3029	if (NewOpc != -`1` && IsBitwiseNegate) {
3030	// The input to the XORI has a record-form. Use it.
3031	assert(LoweredLogical.getConstantOperandVal(`1`) == `1` &&
3032	"Expected a PPC::XORI8 only for bitwise negation.");
3033	// Emit the record-form instruction.
3034	std::vector<SDValue> Ops;
3035	for (int i = `0`, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3036	Ops.push_back(x: OpToConvToRecForm.getOperand(i));
3037
3038	WideOp =
3039	SDValue(CurDAG->getMachineNode(NewOpc, dl,
3040	OpToConvToRecForm.getValueType(),
3041	MVT::Glue, Ops), `0`);
3042	} else {
3043	assert((NewOpc != -`1` \|\| !IsBitwiseNegate) &&
3044	"No record form available for AND8/OR8/XOR8?");
3045	WideOp =
3046	SDValue(CurDAG->getMachineNode(NewOpc == -`1` ? PPC::ANDI8_rec : NewOpc,
3047	dl, MVT::i64, MVT::Glue, LHS, RHS),
3048	`0`);
3049	}
3050
3051	// Select this node to a single bit from CR0 set by the record-form node
3052	// just created. For bitwise negation, use the EQ bit which is the equivalent
3053	// of negating the result (i.e. it is a bit set when the result of the
3054	// operation is zero).
3055	SDValue SRIdxVal =
3056	CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3057	SDValue CRBit =
3058	SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3059	MVT::i1, CR0Reg, SRIdxVal,
3060	WideOp.getValue(`1`)), `0`);
3061	return CRBit.getNode();
3062	}
3063
3064	// Lower a logical operation on i1 values into a GPR sequence if possible.
3065	// The result can be kept in a GPR if requested.
3066	// Three types of inputs can be handled:
3067	// - SETCC
3068	// - TRUNCATE
3069	// - Logical operation (AND/OR/XOR)
3070	// There is also a special case that is handled (namely a complement operation
3071	// achieved with xor %a, -1).
3072	SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3073	assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) &&
3074	"Can only handle logic operations here.");
3075	assert(LogicOp.getValueType() == MVT::i1 &&
3076	"Can only handle logic operations on i1 values here.");
3077	SDLoc dl(LogicOp);
3078	SDValue LHS, RHS;
3079
3080	// Special case: xor %a, -1
3081	bool IsBitwiseNegation = isBitwiseNot(V: LogicOp);
3082
3083	// Produces a GPR sequence for each operand of the binary logic operation.
3084	// For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3085	// the value in a GPR and for logic operations, it will recursively produce
3086	// a GPR sequence for the operation.
3087	auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3088	unsigned OperandOpcode = Operand.getOpcode();
3089	if (OperandOpcode == ISD::SETCC)
3090	return getSETCCInGPR(Compare: Operand, ConvOpts: SetccInGPROpts::ZExtOrig);
3091	else if (OperandOpcode == ISD::TRUNCATE) {
3092	SDValue InputOp = Operand.getOperand(i: `0`);
3093	EVT InVT = InputOp.getValueType();
3094	return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3095	PPC::RLDICL, dl, InVT, InputOp,
3096	S->getI64Imm(`0`, dl),
3097	S->getI64Imm(`63`, dl)), `0`);
3098	} else if (ISD::isBitwiseLogicOp(Opcode: OperandOpcode))
3099	return computeLogicOpInGPR(LogicOp: Operand);
3100	return SDValue ();
3101	};
3102	LHS = getLogicOperand (LogicOp.getOperand(i: `0`));
3103	RHS = getLogicOperand (LogicOp.getOperand(i: `1`));
3104
3105	// If a GPR sequence can't be produced for the LHS we can't proceed.
3106	// Not producing a GPR sequence for the RHS is only a problem if this isn't
3107	// a bitwise negation operation.
3108	if (!LHS \|\| (!RHS && !IsBitwiseNegation))
3109	return SDValue ();
3110
3111	NumLogicOpsOnComparison ++;
3112
3113	// We will use the inputs as 64-bit values.
3114	if (LHS.getValueType() == MVT::i32)
3115	LHS = addExtOrTrunc(NatWidthRes: LHS, Conv: ExtOrTruncConversion::Ext);
3116	if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3117	RHS = addExtOrTrunc(NatWidthRes: RHS, Conv: ExtOrTruncConversion::Ext);
3118
3119	unsigned NewOpc;
3120	switch (LogicOp.getOpcode()) {
3121	default: llvm_unreachable("Unknown logic operation.");
3122	case ISD::AND: NewOpc = PPC::AND8; break;
3123	case ISD::OR: NewOpc = PPC::OR8; break;
3124	case ISD::XOR: NewOpc = PPC::XOR8; break;
3125	}
3126
3127	if (IsBitwiseNegation) {
3128	RHS = S->getI64Imm(Imm: `1`, dl);
3129	NewOpc = PPC::XORI8;
3130	}
3131
3132	return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), `0`);
3133
3134	}
3135
3136	/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3137	/// Otherwise just reinterpret it as a 64-bit value.
3138	/// Useful when emitting comparison code for 32-bit values without using
3139	/// the compare instruction (which only considers the lower 32-bits).
3140	SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3141	assert(Input.getValueType() == MVT::i32 &&
3142	"Can only sign-extend 32-bit values here.");
3143	unsigned Opc = Input.getOpcode();
3144
3145	// The value was sign extended and then truncated to 32-bits. No need to
3146	// sign extend it again.
3147	if (Opc == ISD::TRUNCATE &&
3148	(Input.getOperand(i: `0`).getOpcode() == ISD::AssertSext \|\|
3149	Input.getOperand(i: `0`).getOpcode() == ISD::SIGN_EXTEND))
3150	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3151
3152	LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Val&: Input);
3153	// The input is a sign-extending load. All ppc sign-extending loads
3154	// sign-extend to the full 64-bits.
3155	if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3156	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3157
3158	ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Val&: Input);
3159	// We don't sign-extend constants.
3160	if (InputConst)
3161	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3162
3163	SDLoc dl(Input);
3164	SignExtensionsAdded ++;
3165	return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3166	MVT::i64, Input), `0`);
3167	}
3168
3169	/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3170	/// Otherwise just reinterpret it as a 64-bit value.
3171	/// Useful when emitting comparison code for 32-bit values without using
3172	/// the compare instruction (which only considers the lower 32-bits).
3173	SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3174	assert(Input.getValueType() == MVT::i32 &&
3175	"Can only zero-extend 32-bit values here.");
3176	unsigned Opc = Input.getOpcode();
3177
3178	// The only condition under which we can omit the actual extend instruction:
3179	// - The value is a positive constant
3180	// - The value comes from a load that isn't a sign-extending load
3181	// An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3182	bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3183	(Input.getOperand(i: `0`).getOpcode() == ISD::AssertZext \|\|
3184	Input.getOperand(i: `0`).getOpcode() == ISD::ZERO_EXTEND);
3185	if (IsTruncateOfZExt)
3186	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3187
3188	ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Val&: Input);
3189	if (InputConst && InputConst->getSExtValue() >= `0`)
3190	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3191
3192	LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Val&: Input);
3193	// The input is a load that doesn't sign-extend (it will be zero-extended).
3194	if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3195	return addExtOrTrunc(NatWidthRes: Input, Conv: ExtOrTruncConversion::Ext);
3196
3197	// None of the above, need to zero-extend.
3198	SDLoc dl(Input);
3199	ZeroExtensionsAdded ++;
3200	return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3201	S->getI64Imm(`0`, dl),
3202	S->getI64Imm(`32`, dl)), `0`);
3203	}
3204
3205	// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3206	// course not actual zero/sign extensions that will generate machine code,
3207	// they're just a way to reinterpret a 32 bit value in a register as a
3208	// 64 bit value and vice-versa.
3209	SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3210	ExtOrTruncConversion Conv) {
3211	SDLoc dl(NatWidthRes);
3212
3213	// For reinterpreting 32-bit values as 64 bit values, we generate
3214	// INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3215	if (Conv == ExtOrTruncConversion::Ext) {
3216	SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), `0`);
3217	SDValue SubRegIdx =
3218	CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3219	return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3220	ImDef, NatWidthRes, SubRegIdx), `0`);
3221	}
3222
3223	assert(Conv == ExtOrTruncConversion::Trunc &&
3224	"Unknown convertion between 32 and 64 bit values.");
3225	// For reinterpreting 64-bit values as 32-bit values, we just need to
3226	// EXTRACT_SUBREG (i.e. extract the low word).
3227	SDValue SubRegIdx =
3228	CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3229	return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3230	NatWidthRes, SubRegIdx), `0`);
3231	}
3232
3233	// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3234	// Handle both zero-extensions and sign-extensions.
3235	SDValue
3236	IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3237	ZeroCompare CmpTy) {
3238	EVT InVT = LHS.getValueType();
3239	bool Is32Bit = InVT == MVT::i32;
3240	SDValue ToExtend;
3241
3242	// Produce the value that needs to be either zero or sign extended.
3243	switch (CmpTy) {
3244	case ZeroCompare::GEZExt:
3245	case ZeroCompare::GESExt:
3246	ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3247	dl, InVT, LHS, LHS), `0`);
3248	break;
3249	case ZeroCompare::LEZExt:
3250	case ZeroCompare::LESExt: {
3251	if (Is32Bit) {
3252	// Upper 32 bits cannot be undefined for this sequence.
3253	LHS = signExtendInputIfNeeded(Input: LHS);
3254	SDValue Neg =
3255	SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), `0`);
3256	ToExtend =
3257	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3258	Neg, S->getI64Imm(`1`, dl),
3259	S->getI64Imm(`63`, dl)), `0`);
3260	} else {
3261	SDValue Addi =
3262	SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3263	S->getI64Imm(~`0ULL`, dl)), `0`);
3264	ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3265	Addi, LHS), `0`);
3266	}
3267	break;
3268	}
3269	}
3270
3271	// For 64-bit sequences, the extensions are the same for the GE/LE cases.
3272	if (!Is32Bit &&
3273	(CmpTy == ZeroCompare::GEZExt \|\| CmpTy == ZeroCompare::LEZExt))
3274	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3275	ToExtend, S->getI64Imm(`1`, dl),
3276	S->getI64Imm(`63`, dl)), `0`);
3277	if (!Is32Bit &&
3278	(CmpTy == ZeroCompare::GESExt \|\| CmpTy == ZeroCompare::LESExt))
3279	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3280	S->getI64Imm(`63`, dl)), `0`);
3281
3282	assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3283	// For 32-bit sequences, the extensions differ between GE/LE cases.
3284	switch (CmpTy) {
3285	case ZeroCompare::GEZExt: {
3286	SDValue ShiftOps[] = { ToExtend, S->getI32Imm(Imm: `1`, dl), S->getI32Imm(Imm: `31`, dl),
3287	S->getI32Imm(Imm: `31`, dl) };
3288	return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3289	ShiftOps), `0`);
3290	}
3291	case ZeroCompare::GESExt:
3292	return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3293	S->getI32Imm(`31`, dl)), `0`);
3294	case ZeroCompare::LEZExt:
3295	return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3296	S->getI32Imm(`1`, dl)), `0`);
3297	case ZeroCompare::LESExt:
3298	return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3299	S->getI32Imm(-`1`, dl)), `0`);
3300	}
3301
3302	// The above case covers all the enumerators so it can't have a default clause
3303	// to avoid compiler warnings.
3304	llvm_unreachable("Unknown zero-comparison type.");
3305	}
3306
3307	/// Produces a zero-extended result of comparing two 32-bit values according to
3308	/// the passed condition code.
3309	SDValue
3310	IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3311	ISD::CondCode CC,
3312	int64_t RHSValue, SDLoc dl) {
3313	if (CmpInGPR == ICGPR_I64 \|\| CmpInGPR == ICGPR_SextI64 \|\|
3314	CmpInGPR == ICGPR_ZextI64 \|\| CmpInGPR == ICGPR_Sext)
3315	return SDValue ();
3316	bool IsRHSZero = RHSValue == `0`;
3317	bool IsRHSOne = RHSValue == `1`;
3318	bool IsRHSNegOne = RHSValue == -`1LL`;
3319	switch (CC) {
3320	default: return SDValue ();
3321	case ISD::SETEQ: {
3322	// (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3323	// (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3324	SDValue Xor = IsRHSZero ? LHS :
3325	SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), `0`);
3326	SDValue Clz =
3327	SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), `0`);
3328	SDValue ShiftOps[] = { Clz, S->getI32Imm(Imm: `27`, dl), S->getI32Imm(Imm: `5`, dl),
3329	S->getI32Imm(Imm: `31`, dl) };
3330	return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3331	ShiftOps), `0`);
3332	}
3333	case ISD::SETNE: {
3334	// (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3335	// (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3336	SDValue Xor = IsRHSZero ? LHS :
3337	SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), `0`);
3338	SDValue Clz =
3339	SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), `0`);
3340	SDValue ShiftOps[] = { Clz, S->getI32Imm(Imm: `27`, dl), S->getI32Imm(Imm: `5`, dl),
3341	S->getI32Imm(Imm: `31`, dl) };
3342	SDValue Shift =
3343	SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), `0`);
3344	return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3345	S->getI32Imm(`1`, dl)), `0`);
3346	}
3347	case ISD::SETGE: {
3348	// (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3349	// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3350	if(IsRHSZero)
3351	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3352
3353	// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3354	// by swapping inputs and falling through.
3355	std::swap(a&: LHS, b&: RHS);
3356	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3357	IsRHSZero = RHSConst && RHSConst->isZero();
3358	[[fallthrough]];
3359	}
3360	case ISD::SETLE: {
3361	if (CmpInGPR == ICGPR_NonExtIn)
3362	return SDValue ();
3363	// (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3364	// (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3365	if(IsRHSZero) {
3366	if (CmpInGPR == ICGPR_NonExtIn)
3367	return SDValue ();
3368	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3369	}
3370
3371	// The upper 32-bits of the register can't be undefined for this sequence.
3372	LHS = signExtendInputIfNeeded(Input: LHS);
3373	RHS = signExtendInputIfNeeded(Input: RHS);
3374	SDValue Sub =
3375	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), `0`);
3376	SDValue Shift =
3377	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3378	S->getI64Imm(`1`, dl), S->getI64Imm(`63`, dl)),
3379	`0`);
3380	return
3381	SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3382	MVT::i64, Shift, S->getI32Imm(`1`, dl)), `0`);
3383	}
3384	case ISD::SETGT: {
3385	// (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3386	// (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3387	// (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3388	// Handle SETLT -1 (which is equivalent to SETGE 0).
3389	if (IsRHSNegOne)
3390	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3391
3392	if (IsRHSZero) {
3393	if (CmpInGPR == ICGPR_NonExtIn)
3394	return SDValue ();
3395	// The upper 32-bits of the register can't be undefined for this sequence.
3396	LHS = signExtendInputIfNeeded(Input: LHS);
3397	RHS = signExtendInputIfNeeded(Input: RHS);
3398	SDValue Neg =
3399	SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), `0`);
3400	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3401	Neg, S->getI32Imm(`1`, dl), S->getI32Imm(`63`, dl)), `0`);
3402	}
3403	// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3404	// (%b < %a) by swapping inputs and falling through.
3405	std::swap(a&: LHS, b&: RHS);
3406	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3407	IsRHSZero = RHSConst && RHSConst->isZero();
3408	IsRHSOne = RHSConst && RHSConst->getSExtValue() == `1`;
3409	[[fallthrough]];
3410	}
3411	case ISD::SETLT: {
3412	// (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3413	// (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3414	// (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3415	// Handle SETLT 1 (which is equivalent to SETLE 0).
3416	if (IsRHSOne) {
3417	if (CmpInGPR == ICGPR_NonExtIn)
3418	return SDValue ();
3419	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3420	}
3421
3422	if (IsRHSZero) {
3423	SDValue ShiftOps[] = { LHS, S->getI32Imm(Imm: `1`, dl), S->getI32Imm(Imm: `31`, dl),
3424	S->getI32Imm(Imm: `31`, dl) };
3425	return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3426	ShiftOps), `0`);
3427	}
3428
3429	if (CmpInGPR == ICGPR_NonExtIn)
3430	return SDValue ();
3431	// The upper 32-bits of the register can't be undefined for this sequence.
3432	LHS = signExtendInputIfNeeded(Input: LHS);
3433	RHS = signExtendInputIfNeeded(Input: RHS);
3434	SDValue SUBFNode =
3435	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), `0`);
3436	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3437	SUBFNode, S->getI64Imm(`1`, dl),
3438	S->getI64Imm(`63`, dl)), `0`);
3439	}
3440	case ISD::SETUGE:
3441	// (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3442	// (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3443	std::swap(a&: LHS, b&: RHS);
3444	[[fallthrough]];
3445	case ISD::SETULE: {
3446	if (CmpInGPR == ICGPR_NonExtIn)
3447	return SDValue ();
3448	// The upper 32-bits of the register can't be undefined for this sequence.
3449	LHS = zeroExtendInputIfNeeded(Input: LHS);
3450	RHS = zeroExtendInputIfNeeded(Input: RHS);
3451	SDValue Subtract =
3452	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), `0`);
3453	SDValue SrdiNode =
3454	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3455	Subtract, S->getI64Imm(`1`, dl),
3456	S->getI64Imm(`63`, dl)), `0`);
3457	return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3458	S->getI32Imm(`1`, dl)), `0`);
3459	}
3460	case ISD::SETUGT:
3461	// (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3462	// (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3463	std::swap(a&: LHS, b&: RHS);
3464	[[fallthrough]];
3465	case ISD::SETULT: {
3466	if (CmpInGPR == ICGPR_NonExtIn)
3467	return SDValue ();
3468	// The upper 32-bits of the register can't be undefined for this sequence.
3469	LHS = zeroExtendInputIfNeeded(Input: LHS);
3470	RHS = zeroExtendInputIfNeeded(Input: RHS);
3471	SDValue Subtract =
3472	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), `0`);
3473	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3474	Subtract, S->getI64Imm(`1`, dl),
3475	S->getI64Imm(`63`, dl)), `0`);
3476	}
3477	}
3478	}
3479
3480	/// Produces a sign-extended result of comparing two 32-bit values according to
3481	/// the passed condition code.
3482	SDValue
3483	IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3484	ISD::CondCode CC,
3485	int64_t RHSValue, SDLoc dl) {
3486	if (CmpInGPR == ICGPR_I64 \|\| CmpInGPR == ICGPR_SextI64 \|\|
3487	CmpInGPR == ICGPR_ZextI64 \|\| CmpInGPR == ICGPR_Zext)
3488	return SDValue ();
3489	bool IsRHSZero = RHSValue == `0`;
3490	bool IsRHSOne = RHSValue == `1`;
3491	bool IsRHSNegOne = RHSValue == -`1LL`;
3492
3493	switch (CC) {
3494	default: return SDValue ();
3495	case ISD::SETEQ: {
3496	// (sext (setcc %a, %b, seteq)) ->
3497	// (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3498	// (sext (setcc %a, 0, seteq)) ->
3499	// (ashr (shl (ctlz %a), 58), 63)
3500	SDValue CountInput = IsRHSZero ? LHS :
3501	SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), `0`);
3502	SDValue Cntlzw =
3503	SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), `0`);
3504	SDValue SHLOps[] = { Cntlzw, S->getI32Imm(Imm: `27`, dl),
3505	S->getI32Imm(Imm: `5`, dl), S->getI32Imm(Imm: `31`, dl) };
3506	SDValue Slwi =
3507	SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), `0`);
3508	return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), `0`);
3509	}
3510	case ISD::SETNE: {
3511	// Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3512	// flip the bit, finally take 2's complement.
3513	// (sext (setcc %a, %b, setne)) ->
3514	// (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3515	// Same as above, but the first xor is not needed.
3516	// (sext (setcc %a, 0, setne)) ->
3517	// (neg (xor (lshr (ctlz %a), 5), 1))
3518	SDValue Xor = IsRHSZero ? LHS :
3519	SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), `0`);
3520	SDValue Clz =
3521	SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), `0`);
3522	SDValue ShiftOps[] =
3523	{ Clz, S->getI32Imm(Imm: `27`, dl), S->getI32Imm(Imm: `5`, dl), S->getI32Imm(Imm: `31`, dl) };
3524	SDValue Shift =
3525	SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), `0`);
3526	SDValue Xori =
3527	SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3528	S->getI32Imm(`1`, dl)), `0`);
3529	return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), `0`);
3530	}
3531	case ISD::SETGE: {
3532	// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3533	// (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3534	if (IsRHSZero)
3535	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3536
3537	// Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3538	// by swapping inputs and falling through.
3539	std::swap(a&: LHS, b&: RHS);
3540	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3541	IsRHSZero = RHSConst && RHSConst->isZero();
3542	[[fallthrough]];
3543	}
3544	case ISD::SETLE: {
3545	if (CmpInGPR == ICGPR_NonExtIn)
3546	return SDValue ();
3547	// (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3548	// (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3549	if (IsRHSZero)
3550	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3551
3552	// The upper 32-bits of the register can't be undefined for this sequence.
3553	LHS = signExtendInputIfNeeded(Input: LHS);
3554	RHS = signExtendInputIfNeeded(Input: RHS);
3555	SDValue SUBFNode =
3556	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3557	LHS, RHS), `0`);
3558	SDValue Srdi =
3559	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3560	SUBFNode, S->getI64Imm(`1`, dl),
3561	S->getI64Imm(`63`, dl)), `0`);
3562	return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3563	S->getI32Imm(-`1`, dl)), `0`);
3564	}
3565	case ISD::SETGT: {
3566	// (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3567	// (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3568	// (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3569	if (IsRHSNegOne)
3570	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3571	if (IsRHSZero) {
3572	if (CmpInGPR == ICGPR_NonExtIn)
3573	return SDValue ();
3574	// The upper 32-bits of the register can't be undefined for this sequence.
3575	LHS = signExtendInputIfNeeded(Input: LHS);
3576	RHS = signExtendInputIfNeeded(Input: RHS);
3577	SDValue Neg =
3578	SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), `0`);
3579	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3580	S->getI64Imm(`63`, dl)), `0`);
3581	}
3582	// Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3583	// (%b < %a) by swapping inputs and falling through.
3584	std::swap(a&: LHS, b&: RHS);
3585	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3586	IsRHSZero = RHSConst && RHSConst->isZero();
3587	IsRHSOne = RHSConst && RHSConst->getSExtValue() == `1`;
3588	[[fallthrough]];
3589	}
3590	case ISD::SETLT: {
3591	// (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3592	// (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3593	// (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3594	if (IsRHSOne) {
3595	if (CmpInGPR == ICGPR_NonExtIn)
3596	return SDValue ();
3597	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3598	}
3599	if (IsRHSZero)
3600	return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3601	S->getI32Imm(`31`, dl)), `0`);
3602
3603	if (CmpInGPR == ICGPR_NonExtIn)
3604	return SDValue ();
3605	// The upper 32-bits of the register can't be undefined for this sequence.
3606	LHS = signExtendInputIfNeeded(Input: LHS);
3607	RHS = signExtendInputIfNeeded(Input: RHS);
3608	SDValue SUBFNode =
3609	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), `0`);
3610	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3611	SUBFNode, S->getI64Imm(`63`, dl)), `0`);
3612	}
3613	case ISD::SETUGE:
3614	// (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3615	// (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3616	std::swap(a&: LHS, b&: RHS);
3617	[[fallthrough]];
3618	case ISD::SETULE: {
3619	if (CmpInGPR == ICGPR_NonExtIn)
3620	return SDValue ();
3621	// The upper 32-bits of the register can't be undefined for this sequence.
3622	LHS = zeroExtendInputIfNeeded(Input: LHS);
3623	RHS = zeroExtendInputIfNeeded(Input: RHS);
3624	SDValue Subtract =
3625	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), `0`);
3626	SDValue Shift =
3627	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3628	S->getI32Imm(`1`, dl), S->getI32Imm(`63`,dl)),
3629	`0`);
3630	return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3631	S->getI32Imm(-`1`, dl)), `0`);
3632	}
3633	case ISD::SETUGT:
3634	// (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3635	// (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3636	std::swap(a&: LHS, b&: RHS);
3637	[[fallthrough]];
3638	case ISD::SETULT: {
3639	if (CmpInGPR == ICGPR_NonExtIn)
3640	return SDValue ();
3641	// The upper 32-bits of the register can't be undefined for this sequence.
3642	LHS = zeroExtendInputIfNeeded(Input: LHS);
3643	RHS = zeroExtendInputIfNeeded(Input: RHS);
3644	SDValue Subtract =
3645	SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), `0`);
3646	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3647	Subtract, S->getI64Imm(`63`, dl)), `0`);
3648	}
3649	}
3650	}
3651
3652	/// Produces a zero-extended result of comparing two 64-bit values according to
3653	/// the passed condition code.
3654	SDValue
3655	IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3656	ISD::CondCode CC,
3657	int64_t RHSValue, SDLoc dl) {
3658	if (CmpInGPR == ICGPR_I32 \|\| CmpInGPR == ICGPR_SextI32 \|\|
3659	CmpInGPR == ICGPR_ZextI32 \|\| CmpInGPR == ICGPR_Sext)
3660	return SDValue ();
3661	bool IsRHSZero = RHSValue == `0`;
3662	bool IsRHSOne = RHSValue == `1`;
3663	bool IsRHSNegOne = RHSValue == -`1LL`;
3664	switch (CC) {
3665	default: return SDValue ();
3666	case ISD::SETEQ: {
3667	// (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3668	// (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3669	SDValue Xor = IsRHSZero ? LHS :
3670	SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), `0`);
3671	SDValue Clz =
3672	SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), `0`);
3673	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3674	S->getI64Imm(`58`, dl),
3675	S->getI64Imm(`63`, dl)), `0`);
3676	}
3677	case ISD::SETNE: {
3678	// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3679	// (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3680	// {addcz.reg, addcz.CA} = (addcarry %a, -1)
3681	// (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3682	SDValue Xor = IsRHSZero ? LHS :
3683	SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), `0`);
3684	SDValue AC =
3685	SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3686	Xor, S->getI32Imm(~`0U`, dl)), `0`);
3687	return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3688	Xor, AC.getValue(`1`)), `0`);
3689	}
3690	case ISD::SETGE: {
3691	// {subc.reg, subc.CA} = (subcarry %a, %b)
3692	// (zext (setcc %a, %b, setge)) ->
3693	// (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3694	// (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3695	if (IsRHSZero)
3696	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3697	std::swap(a&: LHS, b&: RHS);
3698	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3699	IsRHSZero = RHSConst && RHSConst->isZero();
3700	[[fallthrough]];
3701	}
3702	case ISD::SETLE: {
3703	// {subc.reg, subc.CA} = (subcarry %b, %a)
3704	// (zext (setcc %a, %b, setge)) ->
3705	// (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3706	// (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3707	if (IsRHSZero)
3708	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3709	SDValue ShiftL =
3710	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3711	S->getI64Imm(`1`, dl),
3712	S->getI64Imm(`63`, dl)), `0`);
3713	SDValue ShiftR =
3714	SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3715	S->getI64Imm(`63`, dl)), `0`);
3716	SDValue SubtractCarry =
3717	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3718	LHS, RHS), `1`);
3719	return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3720	ShiftR, ShiftL, SubtractCarry), `0`);
3721	}
3722	case ISD::SETGT: {
3723	// {subc.reg, subc.CA} = (subcarry %b, %a)
3724	// (zext (setcc %a, %b, setgt)) ->
3725	// (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3726	// (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3727	if (IsRHSNegOne)
3728	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GEZExt);
3729	if (IsRHSZero) {
3730	SDValue Addi =
3731	SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3732	S->getI64Imm(~`0ULL`, dl)), `0`);
3733	SDValue Nor =
3734	SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), `0`);
3735	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3736	S->getI64Imm(`1`, dl),
3737	S->getI64Imm(`63`, dl)), `0`);
3738	}
3739	std::swap(a&: LHS, b&: RHS);
3740	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3741	IsRHSZero = RHSConst && RHSConst->isZero();
3742	IsRHSOne = RHSConst && RHSConst->getSExtValue() == `1`;
3743	[[fallthrough]];
3744	}
3745	case ISD::SETLT: {
3746	// {subc.reg, subc.CA} = (subcarry %a, %b)
3747	// (zext (setcc %a, %b, setlt)) ->
3748	// (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3749	// (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3750	if (IsRHSOne)
3751	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LEZExt);
3752	if (IsRHSZero)
3753	return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3754	S->getI64Imm(`1`, dl),
3755	S->getI64Imm(`63`, dl)), `0`);
3756	SDValue SRADINode =
3757	SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3758	LHS, S->getI64Imm(`63`, dl)), `0`);
3759	SDValue SRDINode =
3760	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3761	RHS, S->getI64Imm(`1`, dl),
3762	S->getI64Imm(`63`, dl)), `0`);
3763	SDValue SUBFC8Carry =
3764	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3765	RHS, LHS), `1`);
3766	SDValue ADDE8Node =
3767	SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3768	SRDINode, SRADINode, SUBFC8Carry), `0`);
3769	return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3770	ADDE8Node, S->getI64Imm(`1`, dl)), `0`);
3771	}
3772	case ISD::SETUGE:
3773	// {subc.reg, subc.CA} = (subcarry %a, %b)
3774	// (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3775	std::swap(a&: LHS, b&: RHS);
3776	[[fallthrough]];
3777	case ISD::SETULE: {
3778	// {subc.reg, subc.CA} = (subcarry %b, %a)
3779	// (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3780	SDValue SUBFC8Carry =
3781	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3782	LHS, RHS), `1`);
3783	SDValue SUBFE8Node =
3784	SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3785	LHS, LHS, SUBFC8Carry), `0`);
3786	return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3787	SUBFE8Node, S->getI64Imm(`1`, dl)), `0`);
3788	}
3789	case ISD::SETUGT:
3790	// {subc.reg, subc.CA} = (subcarry %b, %a)
3791	// (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3792	std::swap(a&: LHS, b&: RHS);
3793	[[fallthrough]];
3794	case ISD::SETULT: {
3795	// {subc.reg, subc.CA} = (subcarry %a, %b)
3796	// (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3797	SDValue SubtractCarry =
3798	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3799	RHS, LHS), `1`);
3800	SDValue ExtSub =
3801	SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3802	LHS, LHS, SubtractCarry), `0`);
3803	return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3804	ExtSub), `0`);
3805	}
3806	}
3807	}
3808
3809	/// Produces a sign-extended result of comparing two 64-bit values according to
3810	/// the passed condition code.
3811	SDValue
3812	IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3813	ISD::CondCode CC,
3814	int64_t RHSValue, SDLoc dl) {
3815	if (CmpInGPR == ICGPR_I32 \|\| CmpInGPR == ICGPR_SextI32 \|\|
3816	CmpInGPR == ICGPR_ZextI32 \|\| CmpInGPR == ICGPR_Zext)
3817	return SDValue ();
3818	bool IsRHSZero = RHSValue == `0`;
3819	bool IsRHSOne = RHSValue == `1`;
3820	bool IsRHSNegOne = RHSValue == -`1LL`;
3821	switch (CC) {
3822	default: return SDValue ();
3823	case ISD::SETEQ: {
3824	// {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3825	// (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3826	// {addcz.reg, addcz.CA} = (addcarry %a, -1)
3827	// (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3828	SDValue AddInput = IsRHSZero ? LHS :
3829	SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), `0`);
3830	SDValue Addic =
3831	SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3832	AddInput, S->getI32Imm(~`0U`, dl)), `0`);
3833	return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3834	Addic, Addic.getValue(`1`)), `0`);
3835	}
3836	case ISD::SETNE: {
3837	// {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3838	// (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3839	// {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3840	// (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3841	SDValue Xor = IsRHSZero ? LHS :
3842	SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), `0`);
3843	SDValue SC =
3844	SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3845	Xor, S->getI32Imm(`0`, dl)), `0`);
3846	return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3847	SC, SC.getValue(`1`)), `0`);
3848	}
3849	case ISD::SETGE: {
3850	// {subc.reg, subc.CA} = (subcarry %a, %b)
3851	// (zext (setcc %a, %b, setge)) ->
3852	// (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3853	// (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3854	if (IsRHSZero)
3855	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3856	std::swap(a&: LHS, b&: RHS);
3857	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3858	IsRHSZero = RHSConst && RHSConst->isZero();
3859	[[fallthrough]];
3860	}
3861	case ISD::SETLE: {
3862	// {subc.reg, subc.CA} = (subcarry %b, %a)
3863	// (zext (setcc %a, %b, setge)) ->
3864	// (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3865	// (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3866	if (IsRHSZero)
3867	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3868	SDValue ShiftR =
3869	SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3870	S->getI64Imm(`63`, dl)), `0`);
3871	SDValue ShiftL =
3872	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3873	S->getI64Imm(`1`, dl),
3874	S->getI64Imm(`63`, dl)), `0`);
3875	SDValue SubtractCarry =
3876	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3877	LHS, RHS), `1`);
3878	SDValue Adde =
3879	SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3880	ShiftR, ShiftL, SubtractCarry), `0`);
3881	return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), `0`);
3882	}
3883	case ISD::SETGT: {
3884	// {subc.reg, subc.CA} = (subcarry %b, %a)
3885	// (zext (setcc %a, %b, setgt)) ->
3886	// -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3887	// (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3888	if (IsRHSNegOne)
3889	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::GESExt);
3890	if (IsRHSZero) {
3891	SDValue Add =
3892	SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3893	S->getI64Imm(-`1`, dl)), `0`);
3894	SDValue Nor =
3895	SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), `0`);
3896	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3897	S->getI64Imm(`63`, dl)), `0`);
3898	}
3899	std::swap(a&: LHS, b&: RHS);
3900	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
3901	IsRHSZero = RHSConst && RHSConst->isZero();
3902	IsRHSOne = RHSConst && RHSConst->getSExtValue() == `1`;
3903	[[fallthrough]];
3904	}
3905	case ISD::SETLT: {
3906	// {subc.reg, subc.CA} = (subcarry %a, %b)
3907	// (zext (setcc %a, %b, setlt)) ->
3908	// -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3909	// (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3910	if (IsRHSOne)
3911	return getCompoundZeroComparisonInGPR(LHS, dl, CmpTy: ZeroCompare::LESExt);
3912	if (IsRHSZero) {
3913	return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3914	S->getI64Imm(`63`, dl)), `0`);
3915	}
3916	SDValue SRADINode =
3917	SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3918	LHS, S->getI64Imm(`63`, dl)), `0`);
3919	SDValue SRDINode =
3920	SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3921	RHS, S->getI64Imm(`1`, dl),
3922	S->getI64Imm(`63`, dl)), `0`);
3923	SDValue SUBFC8Carry =
3924	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3925	RHS, LHS), `1`);
3926	SDValue ADDE8Node =
3927	SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3928	SRDINode, SRADINode, SUBFC8Carry), `0`);
3929	SDValue XORI8Node =
3930	SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3931	ADDE8Node, S->getI64Imm(`1`, dl)), `0`);
3932	return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3933	XORI8Node), `0`);
3934	}
3935	case ISD::SETUGE:
3936	// {subc.reg, subc.CA} = (subcarry %a, %b)
3937	// (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3938	std::swap(a&: LHS, b&: RHS);
3939	[[fallthrough]];
3940	case ISD::SETULE: {
3941	// {subc.reg, subc.CA} = (subcarry %b, %a)
3942	// (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3943	SDValue SubtractCarry =
3944	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3945	LHS, RHS), `1`);
3946	SDValue ExtSub =
3947	SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3948	LHS, SubtractCarry), `0`);
3949	return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3950	ExtSub, ExtSub), `0`);
3951	}
3952	case ISD::SETUGT:
3953	// {subc.reg, subc.CA} = (subcarry %b, %a)
3954	// (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3955	std::swap(a&: LHS, b&: RHS);
3956	[[fallthrough]];
3957	case ISD::SETULT: {
3958	// {subc.reg, subc.CA} = (subcarry %a, %b)
3959	// (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3960	SDValue SubCarry =
3961	SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3962	RHS, LHS), `1`);
3963	return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3964	LHS, LHS, SubCarry), `0`);
3965	}
3966	}
3967	}
3968
3969	/// Do all uses of this SDValue need the result in a GPR?
3970	/// This is meant to be used on values that have type i1 since
3971	/// it is somewhat meaningless to ask if values of other types
3972	/// should be kept in GPR's.
3973	static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3974	assert(Compare.getOpcode() == ISD::SETCC &&
3975	"An ISD::SETCC node required here.");
3976
3977	// For values that have a single use, the caller should obviously already have
3978	// checked if that use is an extending use. We check the other uses here.
3979	if (Compare.hasOneUse())
3980	return true;
3981	// We want the value in a GPR if it is being extended, used for a select, or
3982	// used in logical operations.
3983	for (auto *CompareUse : Compare.getNode()->uses())
3984	if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3985	CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3986	CompareUse->getOpcode() != ISD::SELECT &&
3987	!ISD::isBitwiseLogicOp(Opcode: CompareUse->getOpcode())) {
3988	OmittedForNonExtendUses ++;
3989	return false;
3990	}
3991	return true;
3992	}
3993
3994	/// Returns an equivalent of a SETCC node but with the result the same width as
3995	/// the inputs. This can also be used for SELECT_CC if either the true or false
3996	/// values is a power of two while the other is zero.
3997	SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3998	SetccInGPROpts ConvOpts) {
3999	assert((Compare.getOpcode() == ISD::SETCC \|\|
4000	Compare.getOpcode() == ISD::SELECT_CC) &&
4001	"An ISD::SETCC node required here.");
4002
4003	// Don't convert this comparison to a GPR sequence because there are uses
4004	// of the i1 result (i.e. uses that require the result in the CR).
4005	if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4006	return SDValue ();
4007
4008	SDValue LHS = Compare.getOperand(i: `0`);
4009	SDValue RHS = Compare.getOperand(i: `1`);
4010
4011	// The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4012	int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? `4` : `2`;
4013	ISD::CondCode CC =
4014	cast<CondCodeSDNode>(Val: Compare.getOperand(i: CCOpNum))->get();
4015	EVT InputVT = LHS.getValueType();
4016	if (InputVT != MVT::i32 && InputVT != MVT::i64)
4017	return SDValue ();
4018
4019	if (ConvOpts == SetccInGPROpts::ZExtInvert \|\|
4020	ConvOpts == SetccInGPROpts::SExtInvert)
4021	CC = ISD::getSetCCInverse(Operation: CC, Type: InputVT);
4022
4023	bool Inputs32Bit = InputVT == MVT::i32;
4024
4025	SDLoc dl(Compare);
4026	ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(Val&: RHS);
4027	int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4028	bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig \|\|
4029	ConvOpts == SetccInGPROpts::SExtInvert;
4030
4031	if (IsSext && Inputs32Bit)
4032	return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4033	else if (Inputs32Bit)
4034	return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4035	else if (IsSext)
4036	return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4037	return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4038	}
4039
4040	} // end anonymous namespace
4041
4042	bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4043	if (N->getValueType(`0`) != MVT::i32 &&
4044	N->getValueType(`0`) != MVT::i64)
4045	return false;
4046
4047	// This optimization will emit code that assumes 64-bit registers
4048	// so we don't want to run it in 32-bit mode. Also don't run it
4049	// on functions that are not to be optimized.
4050	if (TM.getOptLevel() == CodeGenOptLevel::None \|\| !TM.isPPC64())
4051	return false;
4052
4053	// For POWER10, it is more profitable to use the set boolean extension
4054	// instructions rather than the integer compare elimination codegen.
4055	// Users can override this via the command line option, `--ppc-gpr-icmps`.
4056	if (!(CmpInGPR.getNumOccurrences() > `0`) && Subtarget->isISA3_1())
4057	return false;
4058
4059	switch (N->getOpcode()) {
4060	default: break;
4061	case ISD::ZERO_EXTEND:
4062	case ISD::SIGN_EXTEND:
4063	case ISD::AND:
4064	case ISD::OR:
4065	case ISD::XOR: {
4066	IntegerCompareEliminator ICmpElim(CurDAG, this);
4067	if (SDNode *New = ICmpElim.Select(N)) {
4068	ReplaceNode(F: N, T: New);
4069	return true;
4070	}
4071	}
4072	}
4073	return false;
4074	}
4075
4076	bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4077	if (N->getValueType(`0`) != MVT::i32 &&
4078	N->getValueType(`0`) != MVT::i64)
4079	return false;
4080
4081	if (!UseBitPermRewriter)
4082	return false;
4083
4084	switch (N->getOpcode()) {
4085	default: break;
4086	case ISD::SRL:
4087	// If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4088	// uses the BRH instruction.
4089	if (Subtarget->isISA3_1() && N->getValueType(`0`) == MVT::i32 &&
4090	N->getOperand(`0`).getOpcode() == ISD::BSWAP) {
4091	auto &OpRight = N->getOperand(Num: `1`);
4092	ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(Val: OpRight);
4093	if (SRLConst && SRLConst->getSExtValue() == `16`)
4094	return false;
4095	}
4096	[[fallthrough]];
4097	case ISD::ROTL:
4098	case ISD::SHL:
4099	case ISD::AND:
4100	case ISD::OR: {
4101	BitPermutationSelector BPS(CurDAG);
4102	if (SDNode *New = BPS.Select(N)) {
4103	ReplaceNode(F: N, T: New);
4104	return true;
4105	}
4106	return false;
4107	}
4108	}
4109
4110	return false;
4111	}
4112
4113	/// SelectCC - Select a comparison of the specified values with the specified
4114	/// condition code, returning the CR# of the expression.
4115	SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4116	const SDLoc &dl, SDValue Chain) {
4117	// Always select the LHS.
4118	unsigned Opc;
4119
4120	if (LHS.getValueType() == MVT::i32) {
4121	unsigned Imm;
4122	if (CC == ISD::SETEQ \|\| CC == ISD::SETNE) {
4123	if (isInt32Immediate(N: RHS, Imm)) {
4124	// SETEQ/SETNE comparison with 16-bit immediate, fold it.
4125	if (isUInt<`16`>(Imm))
4126	return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4127	getI32Imm(Imm & `0xFFFF`, dl)),
4128	`0`);
4129	// If this is a 16-bit signed immediate, fold it.
4130	if (isInt<`16`>((int)Imm))
4131	return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4132	getI32Imm(Imm & `0xFFFF`, dl)),
4133	`0`);
4134
4135	// For non-equality comparisons, the default code would materialize the
4136	// constant, then compare against it, like this:
4137	// lis r2, 4660
4138	// ori r2, r2, 22136
4139	// cmpw cr0, r3, r2
4140	// Since we are just comparing for equality, we can emit this instead:
4141	// xoris r0,r3,0x1234
4142	// cmplwi cr0,r0,0x5678
4143	// beq cr0,L6
4144	SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4145	getI32Imm(Imm >> `16`, dl)), `0`);
4146	return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4147	getI32Imm(Imm & `0xFFFF`, dl)), `0`);
4148	}
4149	Opc = PPC::CMPLW;
4150	} else if (ISD::isUnsignedIntSetCC(Code: CC)) {
4151	if (isInt32Immediate(RHS, Imm) && isUInt<`16`>(Imm))
4152	return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4153	getI32Imm(Imm & `0xFFFF`, dl)), `0`);
4154	Opc = PPC::CMPLW;
4155	} else {
4156	int16_t SImm;
4157	if (isIntS16Immediate(RHS, SImm))
4158	return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4159	getI32Imm((int)SImm & `0xFFFF`,
4160	dl)),
4161	`0`);
4162	Opc = PPC::CMPW;
4163	}
4164	} else if (LHS.getValueType() == MVT::i64) {
4165	uint64_t Imm;
4166	if (CC == ISD::SETEQ \|\| CC == ISD::SETNE) {
4167	if (isInt64Immediate(N: RHS.getNode(), Imm)) {
4168	// SETEQ/SETNE comparison with 16-bit immediate, fold it.
4169	if (isUInt<`16`>(Imm))
4170	return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4171	getI32Imm(Imm & `0xFFFF`, dl)),
4172	`0`);
4173	// If this is a 16-bit signed immediate, fold it.
4174	if (isInt<`16`>(Imm))
4175	return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4176	getI32Imm(Imm & `0xFFFF`, dl)),
4177	`0`);
4178
4179	// For non-equality comparisons, the default code would materialize the
4180	// constant, then compare against it, like this:
4181	// lis r2, 4660
4182	// ori r2, r2, 22136
4183	// cmpd cr0, r3, r2
4184	// Since we are just comparing for equality, we can emit this instead:
4185	// xoris r0,r3,0x1234
4186	// cmpldi cr0,r0,0x5678
4187	// beq cr0,L6
4188	if (isUInt<`32`>(x: Imm)) {
4189	SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4190	getI64Imm(Imm >> `16`, dl)), `0`);
4191	return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4192	getI64Imm(Imm & `0xFFFF`, dl)),
4193	`0`);
4194	}
4195	}
4196	Opc = PPC::CMPLD;
4197	} else if (ISD::isUnsignedIntSetCC(Code: CC)) {
4198	if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<`16`>(Imm))
4199	return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4200	getI64Imm(Imm & `0xFFFF`, dl)), `0`);
4201	Opc = PPC::CMPLD;
4202	} else {
4203	int16_t SImm;
4204	if (isIntS16Immediate(RHS, SImm))
4205	return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4206	getI64Imm(SImm & `0xFFFF`, dl)),
4207	`0`);
4208	Opc = PPC::CMPD;
4209	}
4210	} else if (LHS.getValueType() == MVT::f32) {
4211	if (Subtarget->hasSPE()) {
4212	switch (CC) {
4213	default:
4214	case ISD::SETEQ:
4215	case ISD::SETNE:
4216	Opc = PPC::EFSCMPEQ;
4217	break;
4218	case ISD::SETLT:
4219	case ISD::SETGE:
4220	case ISD::SETOLT:
4221	case ISD::SETOGE:
4222	case ISD::SETULT:
4223	case ISD::SETUGE:
4224	Opc = PPC::EFSCMPLT;
4225	break;
4226	case ISD::SETGT:
4227	case ISD::SETLE:
4228	case ISD::SETOGT:
4229	case ISD::SETOLE:
4230	case ISD::SETUGT:
4231	case ISD::SETULE:
4232	Opc = PPC::EFSCMPGT;
4233	break;
4234	}
4235	} else
4236	Opc = PPC::FCMPUS;
4237	} else if (LHS.getValueType() == MVT::f64) {
4238	if (Subtarget->hasSPE()) {
4239	switch (CC) {
4240	default:
4241	case ISD::SETEQ:
4242	case ISD::SETNE:
4243	Opc = PPC::EFDCMPEQ;
4244	break;
4245	case ISD::SETLT:
4246	case ISD::SETGE:
4247	case ISD::SETOLT:
4248	case ISD::SETOGE:
4249	case ISD::SETULT:
4250	case ISD::SETUGE:
4251	Opc = PPC::EFDCMPLT;
4252	break;
4253	case ISD::SETGT:
4254	case ISD::SETLE:
4255	case ISD::SETOGT:
4256	case ISD::SETOLE:
4257	case ISD::SETUGT:
4258	case ISD::SETULE:
4259	Opc = PPC::EFDCMPGT;
4260	break;
4261	}
4262	} else
4263	Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4264	} else {
4265	assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4266	assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4267	Opc = PPC::XSCMPUQP;
4268	}
4269	if (Chain)
4270	return SDValue(
4271	CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4272	`0`);
4273	else
4274	return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), `0`);
4275	}
4276
4277	static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
4278	const PPCSubtarget *Subtarget) {
4279	// For SPE instructions, the result is in GT bit of the CR
4280	bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4281
4282	switch (CC) {
4283	case ISD::SETUEQ:
4284	case ISD::SETONE:
4285	case ISD::SETOLE:
4286	case ISD::SETOGE:
4287	llvm_unreachable("Should be lowered by legalize!");
4288	default: llvm_unreachable("Unknown condition!");
4289	case ISD::SETOEQ:
4290	case ISD::SETEQ:
4291	return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4292	case ISD::SETUNE:
4293	case ISD::SETNE:
4294	return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4295	case ISD::SETOLT:
4296	case ISD::SETLT:
4297	return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4298	case ISD::SETULE:
4299	case ISD::SETLE:
4300	return PPC::PRED_LE;
4301	case ISD::SETOGT:
4302	case ISD::SETGT:
4303	return PPC::PRED_GT;
4304	case ISD::SETUGE:
4305	case ISD::SETGE:
4306	return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4307	case ISD::SETO: return PPC::PRED_NU;
4308	case ISD::SETUO: return PPC::PRED_UN;
4309	// These two are invalid for floating point. Assume we have int.
4310	case ISD::SETULT: return PPC::PRED_LT;
4311	case ISD::SETUGT: return PPC::PRED_GT;
4312	}
4313	}
4314
4315	/// getCRIdxForSetCC - Return the index of the condition register field
4316	/// associated with the SetCC condition, and whether or not the field is
4317	/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4318	static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4319	Invert = false;
4320	switch (CC) {
4321	default: llvm_unreachable("Unknown condition!");
4322	case ISD::SETOLT:
4323	case ISD::SETLT: return `0`; // Bit #0 = SETOLT
4324	case ISD::SETOGT:
4325	case ISD::SETGT: return `1`; // Bit #1 = SETOGT
4326	case ISD::SETOEQ:
4327	case ISD::SETEQ: return `2`; // Bit #2 = SETOEQ
4328	case ISD::SETUO: return `3`; // Bit #3 = SETUO
4329	case ISD::SETUGE:
4330	case ISD::SETGE: Invert = true; return `0`; // !Bit #0 = SETUGE
4331	case ISD::SETULE:
4332	case ISD::SETLE: Invert = true; return `1`; // !Bit #1 = SETULE
4333	case ISD::SETUNE:
4334	case ISD::SETNE: Invert = true; return `2`; // !Bit #2 = SETUNE
4335	case ISD::SETO: Invert = true; return `3`; // !Bit #3 = SETO
4336	case ISD::SETUEQ:
4337	case ISD::SETOGE:
4338	case ISD::SETOLE:
4339	case ISD::SETONE:
4340	llvm_unreachable("Invalid branch code: should be expanded by legalize");
4341	// These are invalid for floating point. Assume integer.
4342	case ISD::SETULT: return `0`;
4343	case ISD::SETUGT: return `1`;
4344	}
4345	}
4346
4347	// getVCmpInst: return the vector compare instruction for the specified
4348	// vector type and condition code. Since this is for altivec specific code,
4349	// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4350	// and v4f32).
4351	static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4352	bool HasVSX, bool &Swap, bool &Negate) {
4353	Swap = false;
4354	Negate = false;
4355
4356	if (VecVT.isFloatingPoint()) {
4357	/ Handle some cases by swapping input operands. /
4358	switch (CC) {
4359	case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4360	case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4361	case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4362	case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4363	case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4364	case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4365	default: break;
4366	}
4367	/ Handle some cases by negating the result. /
4368	switch (CC) {
4369	case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4370	case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4371	case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4372	case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4373	default: break;
4374	}
4375	/ We have instructions implementing the remaining cases. /
4376	switch (CC) {
4377	case ISD::SETEQ:
4378	case ISD::SETOEQ:
4379	if (VecVT == MVT::v4f32)
4380	return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4381	else if (VecVT == MVT::v2f64)
4382	return PPC::XVCMPEQDP;
4383	break;
4384	case ISD::SETGT:
4385	case ISD::SETOGT:
4386	if (VecVT == MVT::v4f32)
4387	return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4388	else if (VecVT == MVT::v2f64)
4389	return PPC::XVCMPGTDP;
4390	break;
4391	case ISD::SETGE:
4392	case ISD::SETOGE:
4393	if (VecVT == MVT::v4f32)
4394	return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4395	else if (VecVT == MVT::v2f64)
4396	return PPC::XVCMPGEDP;
4397	break;
4398	default:
4399	break;
4400	}
4401	llvm_unreachable("Invalid floating-point vector compare condition");
4402	} else {
4403	/ Handle some cases by swapping input operands. /
4404	switch (CC) {
4405	case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4406	case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4407	case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4408	case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4409	default: break;
4410	}
4411	/ Handle some cases by negating the result. /
4412	switch (CC) {
4413	case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4414	case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4415	case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4416	case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4417	default: break;
4418	}
4419	/ We have instructions implementing the remaining cases. /
4420	switch (CC) {
4421	case ISD::SETEQ:
4422	case ISD::SETUEQ:
4423	if (VecVT == MVT::v16i8)
4424	return PPC::VCMPEQUB;
4425	else if (VecVT == MVT::v8i16)
4426	return PPC::VCMPEQUH;
4427	else if (VecVT == MVT::v4i32)
4428	return PPC::VCMPEQUW;
4429	else if (VecVT == MVT::v2i64)
4430	return PPC::VCMPEQUD;
4431	else if (VecVT == MVT::v1i128)
4432	return PPC::VCMPEQUQ;
4433	break;
4434	case ISD::SETGT:
4435	if (VecVT == MVT::v16i8)
4436	return PPC::VCMPGTSB;
4437	else if (VecVT == MVT::v8i16)
4438	return PPC::VCMPGTSH;
4439	else if (VecVT == MVT::v4i32)
4440	return PPC::VCMPGTSW;
4441	else if (VecVT == MVT::v2i64)
4442	return PPC::VCMPGTSD;
4443	else if (VecVT == MVT::v1i128)
4444	return PPC::VCMPGTSQ;
4445	break;
4446	case ISD::SETUGT:
4447	if (VecVT == MVT::v16i8)
4448	return PPC::VCMPGTUB;
4449	else if (VecVT == MVT::v8i16)
4450	return PPC::VCMPGTUH;
4451	else if (VecVT == MVT::v4i32)
4452	return PPC::VCMPGTUW;
4453	else if (VecVT == MVT::v2i64)
4454	return PPC::VCMPGTUD;
4455	else if (VecVT == MVT::v1i128)
4456	return PPC::VCMPGTUQ;
4457	break;
4458	default:
4459	break;
4460	}
4461	llvm_unreachable("Invalid integer vector compare condition");
4462	}
4463	}
4464
4465	bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4466	SDLoc dl(N);
4467	unsigned Imm;
4468	bool IsStrict = N->isStrictFPOpcode();
4469	ISD::CondCode CC =
4470	cast<CondCodeSDNode>(Val: N->getOperand(Num: IsStrict ? `3` : `2`))->get();
4471	EVT PtrVT =
4472	CurDAG->getTargetLoweringInfo().getPointerTy(DL: CurDAG->getDataLayout());
4473	bool isPPC64 = (PtrVT == MVT::i64);
4474	SDValue Chain = IsStrict ? N->getOperand(Num: `0`) : SDValue ();
4475
4476	SDValue LHS = N->getOperand(Num: IsStrict ? `1` : `0`);
4477	SDValue RHS = N->getOperand(Num: IsStrict ? `2` : `1`);
4478
4479	if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(N: RHS, Imm)) {
4480	// We can codegen setcc op, imm very efficiently compared to a brcond.
4481	// Check for those cases here.
4482	// setcc op, 0
4483	if (Imm == `0`) {
4484	SDValue Op = LHS;
4485	switch (CC) {
4486	default: break;
4487	case ISD::SETEQ: {
4488	Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), `0`);
4489	SDValue Ops[] = { Op, getI32Imm(Imm: `27`, dl), getI32Imm(Imm: `5`, dl),
4490	getI32Imm(Imm: `31`, dl) };
4491	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4492	return true;
4493	}
4494	case ISD::SETNE: {
4495	if (isPPC64) break;
4496	SDValue AD =
4497	SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4498	Op, getI32Imm(~`0U`, dl)), `0`);
4499	CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(`1`));
4500	return true;
4501	}
4502	case ISD::SETLT: {
4503	SDValue Ops[] = { Op, getI32Imm(Imm: `1`, dl), getI32Imm(Imm: `31`, dl),
4504	getI32Imm(Imm: `31`, dl) };
4505	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4506	return true;
4507	}
4508	case ISD::SETGT: {
4509	SDValue T =
4510	SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), `0`);
4511	T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), `0`);
4512	SDValue Ops[] = { T, getI32Imm(Imm: `1`, dl), getI32Imm(Imm: `31`, dl),
4513	getI32Imm(Imm: `31`, dl) };
4514	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4515	return true;
4516	}
4517	}
4518	} else if (Imm == ~`0U`) { // setcc op, -1
4519	SDValue Op = LHS;
4520	switch (CC) {
4521	default: break;
4522	case ISD::SETEQ:
4523	if (isPPC64) break;
4524	Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4525	Op, getI32Imm(`1`, dl)), `0`);
4526	CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4527	SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4528	MVT::i32,
4529	getI32Imm(`0`, dl)),
4530	`0`), Op.getValue(`1`));
4531	return true;
4532	case ISD::SETNE: {
4533	if (isPPC64) break;
4534	Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), `0`);
4535	SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4536	Op, getI32Imm(~`0U`, dl));
4537	CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, `0`), Op,
4538	SDValue(AD, `1`));
4539	return true;
4540	}
4541	case ISD::SETLT: {
4542	SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4543	getI32Imm(`1`, dl)), `0`);
4544	SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4545	Op), `0`);
4546	SDValue Ops[] = { AN, getI32Imm(Imm: `1`, dl), getI32Imm(Imm: `31`, dl),
4547	getI32Imm(Imm: `31`, dl) };
4548	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4549	return true;
4550	}
4551	case ISD::SETGT: {
4552	SDValue Ops[] = { Op, getI32Imm(Imm: `1`, dl), getI32Imm(Imm: `31`, dl),
4553	getI32Imm(Imm: `31`, dl) };
4554	Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), `0`);
4555	CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(`1`, dl));
4556	return true;
4557	}
4558	}
4559	}
4560	}
4561
4562	// Altivec Vector compare instructions do not set any CR register by default and
4563	// vector compare operations return the same type as the operands.
4564	if (!IsStrict && LHS.getValueType().isVector()) {
4565	if (Subtarget->hasSPE())
4566	return false;
4567
4568	EVT VecVT = LHS.getValueType();
4569	bool Swap, Negate;
4570	unsigned int VCmpInst =
4571	getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4572	if (Swap)
4573	std::swap(a&: LHS, b&: RHS);
4574
4575	EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4576	if (Negate) {
4577	SDValue VCmp(CurDAG->getMachineNode(Opcode: VCmpInst, dl, VT: ResVT, Op1: LHS, Op2: RHS), `0`);
4578	CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4579	ResVT, VCmp, VCmp);
4580	return true;
4581	}
4582
4583	CurDAG->SelectNodeTo(N, MachineOpc: VCmpInst, VT: ResVT, Op1: LHS, Op2: RHS);
4584	return true;
4585	}
4586
4587	if (Subtarget->useCRBits())
4588	return false;
4589
4590	bool Inv;
4591	unsigned Idx = getCRIdxForSetCC(CC, Invert&: Inv);
4592	SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4593	if (IsStrict)
4594	CurDAG->ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: CCReg.getValue(R: `1`));
4595	SDValue IntCR;
4596
4597	// SPE ecmp* instructions only set the 'gt' bit, so hard-code that*
4598	// The correct compare instruction is already set by SelectCC()
4599	if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4600	Idx = `1`;
4601	}
4602
4603	// Force the ccreg into CR7.
4604	SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4605
4606	SDValue InGlue; // Null incoming flag value.
4607	CCReg = CurDAG->getCopyToReg(Chain: CurDAG->getEntryNode(), dl, Reg: CR7Reg, N: CCReg,
4608	Glue: InGlue).getValue(R: `1`);
4609
4610	IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4611	CCReg), `0`);
4612
4613	SDValue Ops[] = { IntCR, getI32Imm(Imm: (`32` - (`3` - Idx)) & `31`, dl),
4614	getI32Imm(Imm: `31`, dl), getI32Imm(Imm: `31`, dl) };
4615	if (!Inv) {
4616	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4617	return true;
4618	}
4619
4620	// Get the specified bit.
4621	SDValue Tmp =
4622	SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), `0`);
4623	CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(`1`, dl));
4624	return true;
4625	}
4626
4627	/// Does this node represent a load/store node whose address can be represented
4628	/// with a register plus an immediate that's a multiple of \p Val:
4629	bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode N, unsigned* Val) const {
4630	LoadSDNode *LDN = dyn_cast<LoadSDNode>(Val: N);
4631	StoreSDNode *STN = dyn_cast<StoreSDNode>(Val: N);
4632	MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(Val: N);
4633	SDValue AddrOp;
4634	if (LDN \|\| (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4635	AddrOp = N->getOperand(Num: `1`);
4636	else if (STN)
4637	AddrOp = STN->getOperand(Num: `2`);
4638
4639	// If the address points a frame object or a frame object with an offset,
4640	// we need to check the object alignment.
4641	short Imm = `0`;
4642	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4643	Val: AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(i: `0`) :
4644	AddrOp)) {
4645	// If op0 is a frame index that is under aligned, we can't do it either,
4646	// because it is translated to r31 or r1 + slot + offset. We won't know the
4647	// slot number until the stack frame is finalized.
4648	const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4649	unsigned SlotAlign = MFI.getObjectAlign(ObjectIdx: FI->getIndex()).value();
4650	if ((SlotAlign % Val) != `0`)
4651	return false;
4652
4653	// If we have an offset, we need further check on the offset.
4654	if (AddrOp.getOpcode() != ISD::ADD)
4655	return true;
4656	}
4657
4658	if (AddrOp.getOpcode() == ISD::ADD)
4659	return isIntS16Immediate(Op: AddrOp.getOperand(i: `1`), Imm) && !(Imm % Val);
4660
4661	// If the address comes from the outside, the offset will be zero.
4662	return AddrOp.getOpcode() == ISD::CopyFromReg;
4663	}
4664
4665	void PPCDAGToDAGISel::transferMemOperands(SDNode N, SDNode Result) {
4666	// Transfer memoperands.
4667	MachineMemOperand *MemOp = cast<MemSDNode>(Val: N)->getMemOperand();
4668	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: Result), NewMemRefs: {MemOp});
4669	}
4670
4671	static bool mayUseP9Setb(SDNode N, const* ISD::CondCode &CC, SelectionDAG *DAG,
4672	bool &NeedSwapOps, bool &IsUnCmp) {
4673
4674	assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4675
4676	SDValue LHS = N->getOperand(Num: `0`);
4677	SDValue RHS = N->getOperand(Num: `1`);
4678	SDValue TrueRes = N->getOperand(Num: `2`);
4679	SDValue FalseRes = N->getOperand(Num: `3`);
4680	ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(Val&: TrueRes);
4681	if (!TrueConst \|\| (N->getSimpleValueType(`0`) != MVT::i64 &&
4682	N->getSimpleValueType(`0`) != MVT::i32))
4683	return false;
4684
4685	// We are looking for any of:
4686	// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4687	// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4688	// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4689	// (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4690	int64_t TrueResVal = TrueConst->getSExtValue();
4691	if ((TrueResVal < -`1` \|\| TrueResVal > `1`) \|\|
4692	(TrueResVal == -`1` && FalseRes.getOpcode() != ISD::ZERO_EXTEND) \|\|
4693	(TrueResVal == `1` && FalseRes.getOpcode() != ISD::SIGN_EXTEND) \|\|
4694	(TrueResVal == `0` &&
4695	(FalseRes.getOpcode() != ISD::SELECT_CC \|\| CC != ISD::SETEQ)))
4696	return false;
4697
4698	SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4699	? FalseRes
4700	: FalseRes.getOperand(i: `0`);
4701	bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4702	if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4703	SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4704	return false;
4705
4706	// Without this setb optimization, the outer SELECT_CC will be manually
4707	// selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4708	// transforms pseudo instruction to isel instruction. When there are more than
4709	// one use for result like zext/sext, with current optimization we only see
4710	// isel is replaced by setb but can't see any significant gain. Since
4711	// setb has longer latency than original isel, we should avoid this. Another
4712	// point is that setb requires comparison always kept, it can break the
4713	// opportunity to get the comparison away if we have in future.
4714	if (!SetOrSelCC.hasOneUse() \|\| (!InnerIsSel && !FalseRes.hasOneUse()))
4715	return false;
4716
4717	SDValue InnerLHS = SetOrSelCC.getOperand(i: `0`);
4718	SDValue InnerRHS = SetOrSelCC.getOperand(i: `1`);
4719	ISD::CondCode InnerCC =
4720	cast<CondCodeSDNode>(Val: SetOrSelCC.getOperand(i: InnerIsSel ? `4` : `2`))->get();
4721	// If the inner comparison is a select_cc, make sure the true/false values are
4722	// 1/-1 and canonicalize it if needed.
4723	if (InnerIsSel) {
4724	ConstantSDNode *SelCCTrueConst =
4725	dyn_cast<ConstantSDNode>(Val: SetOrSelCC.getOperand(i: `2`));
4726	ConstantSDNode *SelCCFalseConst =
4727	dyn_cast<ConstantSDNode>(Val: SetOrSelCC.getOperand(i: `3`));
4728	if (!SelCCTrueConst \|\| !SelCCFalseConst)
4729	return false;
4730	int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4731	int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4732	// The values must be -1/1 (requiring a swap) or 1/-1.
4733	if (SelCCTVal == -`1` && SelCCFVal == `1`) {
4734	std::swap(a&: InnerLHS, b&: InnerRHS);
4735	} else if (SelCCTVal != `1` \|\| SelCCFVal != -`1`)
4736	return false;
4737	}
4738
4739	// Canonicalize unsigned case
4740	if (InnerCC == ISD::SETULT \|\| InnerCC == ISD::SETUGT) {
4741	IsUnCmp = true;
4742	InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4743	}
4744
4745	bool InnerSwapped = false;
4746	if (LHS == InnerRHS && RHS == InnerLHS)
4747	InnerSwapped = true;
4748	else if (LHS != InnerLHS \|\| RHS != InnerRHS)
4749	return false;
4750
4751	switch (CC) {
4752	// (select_cc lhs, rhs, 0, \
4753	// (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4754	case ISD::SETEQ:
4755	if (!InnerIsSel)
4756	return false;
4757	if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4758	return false;
4759	NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4760	break;
4761
4762	// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4763	// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4764	// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4765	// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4766	// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4767	// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4768	case ISD::SETULT:
4769	if (!IsUnCmp && InnerCC != ISD::SETNE)
4770	return false;
4771	IsUnCmp = true;
4772	[[fallthrough]];
4773	case ISD::SETLT:
4774	if (InnerCC == ISD::SETNE \|\| (InnerCC == ISD::SETGT && !InnerSwapped) \|\|
4775	(InnerCC == ISD::SETLT && InnerSwapped))
4776	NeedSwapOps = (TrueResVal == `1`);
4777	else
4778	return false;
4779	break;
4780
4781	// (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4782	// (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4783	// (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4784	// (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4785	// (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4786	// (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4787	case ISD::SETUGT:
4788	if (!IsUnCmp && InnerCC != ISD::SETNE)
4789	return false;
4790	IsUnCmp = true;
4791	[[fallthrough]];
4792	case ISD::SETGT:
4793	if (InnerCC == ISD::SETNE \|\| (InnerCC == ISD::SETLT && !InnerSwapped) \|\|
4794	(InnerCC == ISD::SETGT && InnerSwapped))
4795	NeedSwapOps = (TrueResVal == -`1`);
4796	else
4797	return false;
4798	break;
4799
4800	default:
4801	return false;
4802	}
4803
4804	LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4805	LLVM_DEBUG(N->dump());
4806
4807	return true;
4808	}
4809
4810	// Return true if it's a software square-root/divide operand.
4811	static bool isSWTestOp(SDValue N) {
4812	if (N.getOpcode() == PPCISD::FTSQRT)
4813	return true;
4814	if (N.getNumOperands() < `1` \|\| !isa<ConstantSDNode>(Val: N.getOperand(i: `0`)) \|\|
4815	N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4816	return false;
4817	switch (N.getConstantOperandVal(i: `0`)) {
4818	case Intrinsic::ppc_vsx_xvtdivdp:
4819	case Intrinsic::ppc_vsx_xvtdivsp:
4820	case Intrinsic::ppc_vsx_xvtsqrtdp:
4821	case Intrinsic::ppc_vsx_xvtsqrtsp:
4822	return true;
4823	}
4824	return false;
4825	}
4826
4827	bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4828	assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4829	// We are looking for following patterns, where `truncate to i1` actually has
4830	// the same semantic with `and 1`.
4831	// (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4832	// (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4833	// (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4834	// (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4835	// (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4836	// (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4837	// (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4838	// (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4839	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
4840	if (CC != ISD::SETEQ && CC != ISD::SETNE)
4841	return false;
4842
4843	SDValue CmpRHS = N->getOperand(Num: `3`);
4844	if (!isNullConstant(V: CmpRHS))
4845	return false;
4846
4847	SDValue CmpLHS = N->getOperand(Num: `2`);
4848	if (CmpLHS.getNumOperands() < `1` \|\| !isSWTestOp(N: CmpLHS.getOperand(i: `0`)))
4849	return false;
4850
4851	unsigned PCC = `0`;
4852	bool IsCCNE = CC == ISD::SETNE;
4853	if (CmpLHS.getOpcode() == ISD::AND &&
4854	isa<ConstantSDNode>(Val: CmpLHS.getOperand(i: `1`)))
4855	switch (CmpLHS.getConstantOperandVal(i: `1`)) {
4856	case `1`:
4857	PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4858	break;
4859	case `2`:
4860	PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4861	break;
4862	case `4`:
4863	PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4864	break;
4865	case `8`:
4866	PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4867	break;
4868	default:
4869	return false;
4870	}
4871	else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4872	CmpLHS.getValueType() == MVT::i1)
4873	PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4874
4875	if (PCC) {
4876	SDLoc dl(N);
4877	SDValue Ops[] = {getI32Imm(Imm: PCC, dl), CmpLHS.getOperand(i: `0`), N->getOperand(Num: `4`),
4878	N->getOperand(Num: `0`)};
4879	CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4880	return true;
4881	}
4882	return false;
4883	}
4884
4885	bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4886	// Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4887	// value, for example when crbits is disabled. If so, select the
4888	// loop_decrement intrinsics now.
4889	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
4890	SDValue LHS = N->getOperand(Num: `2`), RHS = N->getOperand(Num: `3`);
4891
4892	if (LHS.getOpcode() != ISD::AND \|\| !isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) \|\|
4893	isNullConstant(V: LHS.getOperand(i: `1`)))
4894	return false;
4895
4896	if (LHS.getOperand(`0`).getOpcode() != ISD::INTRINSIC_W_CHAIN \|\|
4897	LHS.getOperand(`0`).getConstantOperandVal(`1`) != Intrinsic::loop_decrement)
4898	return false;
4899
4900	if (!isa<ConstantSDNode>(Val: RHS))
4901	return false;
4902
4903	assert((CC == ISD::SETEQ \|\| CC == ISD::SETNE) &&
4904	"Counter decrement comparison is not EQ or NE");
4905
4906	SDValue OldDecrement = LHS.getOperand(i: `0`);
4907	assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4908
4909	SDLoc DecrementLoc(OldDecrement);
4910	SDValue ChainInput = OldDecrement.getOperand(i: `0`);
4911	SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(Imm: `1`, dl: DecrementLoc)
4912	: getI32Imm(Imm: `1`, dl: DecrementLoc)};
4913	unsigned DecrementOpcode =
4914	Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4915	SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4916	MVT::i1, DecrementOps);
4917
4918	unsigned Val = RHS ->getAsZExtVal();
4919	bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) \|\| (CC == ISD::SETNE && !Val);
4920	unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4921
4922	ReplaceUses(F: LHS.getValue(R: `0`), T: LHS.getOperand(i: `1`));
4923	CurDAG->RemoveDeadNode(N: LHS.getNode());
4924
4925	// Mark the old loop_decrement intrinsic as dead.
4926	ReplaceUses(F: OldDecrement.getValue(R: `1`), T: ChainInput);
4927	CurDAG->RemoveDeadNode(N: OldDecrement.getNode());
4928
4929	SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4930	ChainInput, N->getOperand(`0`));
4931
4932	CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, `0`),
4933	N->getOperand(`4`), Chain);
4934	return true;
4935	}
4936
4937	bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4938	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4939	unsigned Imm;
4940	if (!isInt32Immediate(N: N->getOperand(Num: `1`), Imm))
4941	return false;
4942
4943	SDLoc dl(N);
4944	SDValue Val = N->getOperand(Num: `0`);
4945	unsigned SH, MB, ME;
4946	// If this is an and of a value rotated between 0 and 31 bits and then and'd
4947	// with a mask, emit rlwinm
4948	if (isRotateAndMask(N: Val.getNode(), Mask: Imm, isShiftMask: false, SH, MB, ME)) {
4949	Val = Val.getOperand(i: `0`);
4950	SDValue Ops[] = {Val, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
4951	getI32Imm(Imm: ME, dl)};
4952	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4953	return true;
4954	}
4955
4956	// If this is just a masked value where the input is not handled, and
4957	// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4958	if (isRunOfOnes(Val: Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4959	SDValue Ops[] = {Val, getI32Imm(Imm: `0`, dl), getI32Imm(Imm: MB, dl),
4960	getI32Imm(Imm: ME, dl)};
4961	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4962	return true;
4963	}
4964
4965	// AND X, 0 -> 0, not "rlwinm 32".
4966	if (Imm == `0`) {
4967	ReplaceUses(F: SDValue (N, `0`), T: N->getOperand(Num: `1`));
4968	return true;
4969	}
4970
4971	return false;
4972	}
4973
4974	bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4975	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4976	uint64_t Imm64;
4977	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64))
4978	return false;
4979
4980	unsigned MB, ME;
4981	if (isRunOfOnes64(Val: Imm64, MB, ME) && MB >= `32` && MB <= ME) {
4982	// MB ME
4983	// +----------------------+
4984	// \|xxxxxxxxxxx00011111000\|
4985	// +----------------------+
4986	// 0 32 64
4987	// We can only do it if the MB is larger than 32 and MB <= ME
4988	// as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4989	// we didn't rotate it.
4990	SDLoc dl(N);
4991	SDValue Ops[] = {N->getOperand(Num: `0`), getI64Imm(Imm: `0`, dl), getI64Imm(Imm: MB - `32`, dl),
4992	getI64Imm(Imm: ME - `32`, dl)};
4993	CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4994	return true;
4995	}
4996
4997	return false;
4998	}
4999
5000	bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5001	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5002	uint64_t Imm64;
5003	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64))
5004	return false;
5005
5006	// Do nothing if it is 16-bit imm as the pattern in the .td file handle
5007	// it well with "andi.".
5008	if (isUInt<`16`>(x: Imm64))
5009	return false;
5010
5011	SDLoc Loc(N);
5012	SDValue Val = N->getOperand(Num: `0`);
5013
5014	// Optimized with two rldicl's as follows:
5015	// Add missing bits on left to the mask and check that the mask is a
5016	// wrapped run of ones, i.e.
5017	// Change pattern \|0001111100000011111111\|
5018	// to \|1111111100000011111111\|.
5019	unsigned NumOfLeadingZeros = llvm::countl_zero(Val: Imm64);
5020	if (NumOfLeadingZeros != `0`)
5021	Imm64 \|= maskLeadingOnes<uint64_t>(N: NumOfLeadingZeros);
5022
5023	unsigned MB, ME;
5024	if (!isRunOfOnes64(Val: Imm64, MB, ME))
5025	return false;
5026
5027	// ME MB MB-ME+63
5028	// +----------------------+ +----------------------+
5029	// \|1111111100000011111111\| -> \|0000001111111111111111\|
5030	// +----------------------+ +----------------------+
5031	// 0 63 0 63
5032	// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5033	unsigned OnesOnLeft = ME + `1`;
5034	unsigned ZerosInBetween = (MB - ME + `63`) & `63`;
5035	// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5036	// on the left the bits that are already zeros in the mask.
5037	Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5038	getI64Imm(OnesOnLeft, Loc),
5039	getI64Imm(ZerosInBetween, Loc)),
5040	`0`);
5041	// MB-ME+63 ME MB
5042	// +----------------------+ +----------------------+
5043	// \|0000001111111111111111\| -> \|0001111100000011111111\|
5044	// +----------------------+ +----------------------+
5045	// 0 63 0 63
5046	// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5047	// left the number of ones we previously added.
5048	SDValue Ops[] = {Val, getI64Imm(Imm: `64` - OnesOnLeft, dl: Loc),
5049	getI64Imm(Imm: NumOfLeadingZeros, dl: Loc)};
5050	CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5051	return true;
5052	}
5053
5054	bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5055	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5056	unsigned Imm;
5057	if (!isInt32Immediate(N: N->getOperand(Num: `1`), Imm))
5058	return false;
5059
5060	SDValue Val = N->getOperand(Num: `0`);
5061	unsigned Imm2;
5062	// ISD::OR doesn't get all the bitfield insertion fun.
5063	// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5064	// bitfield insert.
5065	if (Val.getOpcode() != ISD::OR \|\| !isInt32Immediate(N: Val.getOperand(i: `1`), Imm&: Imm2))
5066	return false;
5067
5068	// The idea here is to check whether this is equivalent to:
5069	// (c1 & m) \| (x & ~m)
5070	// where m is a run-of-ones mask. The logic here is that, for each bit in
5071	// c1 and c2:
5072	// - if both are 1, then the output will be 1.
5073	// - if both are 0, then the output will be 0.
5074	// - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5075	// come from x.
5076	// - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5077	// be 0.
5078	// If that last condition is never the case, then we can form m from the
5079	// bits that are the same between c1 and c2.
5080	unsigned MB, ME;
5081	if (isRunOfOnes(Val: ~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5082	SDLoc dl(N);
5083	SDValue Ops[] = {Val.getOperand(i: `0`), Val.getOperand(i: `1`), getI32Imm(Imm: `0`, dl),
5084	getI32Imm(Imm: MB, dl), getI32Imm(Imm: ME, dl)};
5085	ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5086	return true;
5087	}
5088
5089	return false;
5090	}
5091
5092	bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5093	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5094
5095	uint64_t Imm64;
5096	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64) \|\| !isMask_64(Value: Imm64))
5097	return false;
5098
5099	SDValue Val = N->getOperand(Num: `0`);
5100
5101	if (Val.getOpcode() != ISD::ROTL)
5102	return false;
5103
5104	// Looking to try to avoid a situation like this one:
5105	// %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5106	// %and1 = and i64 %2, 9223372036854775807
5107	// In this function we are looking to try to match RLDCL. However, the above
5108	// DAG would better match RLDICL instead which is not what we are looking
5109	// for here.
5110	SDValue RotateAmt = Val.getOperand(i: `1`);
5111	if (RotateAmt.getOpcode() == ISD::Constant)
5112	return false;
5113
5114	unsigned MB = `64` - llvm::countr_one(Value: Imm64);
5115	SDLoc dl(N);
5116	SDValue Ops[] = {Val.getOperand(i: `0`), RotateAmt, getI32Imm(Imm: MB, dl)};
5117	CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5118	return true;
5119	}
5120
5121	bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5122	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5123	uint64_t Imm64;
5124	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64) \|\| !isMask_64(Value: Imm64))
5125	return false;
5126
5127	// If this is a 64-bit zero-extension mask, emit rldicl.
5128	unsigned MB = `64` - llvm::countr_one(Value: Imm64);
5129	unsigned SH = `0`;
5130	unsigned Imm;
5131	SDValue Val = N->getOperand(Num: `0`);
5132	SDLoc dl(N);
5133
5134	if (Val.getOpcode() == ISD::ANY_EXTEND) {
5135	auto Op0 = Val.getOperand(i: `0`);
5136	if (Op0.getOpcode() == ISD::SRL &&
5137	isInt32Immediate(N: Op0.getOperand(i: `1`).getNode(), Imm) && Imm <= MB) {
5138
5139	auto ResultType = Val.getNode()->getValueType(ResNo: `0`);
5140	auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5141	SDValue IDVal(ImDef, `0`);
5142
5143	Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5144	IDVal, Op0.getOperand(`0`),
5145	getI32Imm(`1`, dl)),
5146	`0`);
5147	SH = `64` - Imm;
5148	}
5149	}
5150
5151	// If the operand is a logical right shift, we can fold it into this
5152	// instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5153	// for n <= mb. The right shift is really a left rotate followed by a
5154	// mask, and this mask is a more-restrictive sub-mask of the mask implied
5155	// by the shift.
5156	if (Val.getOpcode() == ISD::SRL &&
5157	isInt32Immediate(N: Val.getOperand(i: `1`).getNode(), Imm) && Imm <= MB) {
5158	assert(Imm < `64` && "Illegal shift amount");
5159	Val = Val.getOperand(i: `0`);
5160	SH = `64` - Imm;
5161	}
5162
5163	SDValue Ops[] = {Val, getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl)};
5164	CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5165	return true;
5166	}
5167
5168	bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5169	assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5170	uint64_t Imm64;
5171	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64) \|\|
5172	!isMask_64(Value: ~Imm64))
5173	return false;
5174
5175	// If this is a negated 64-bit zero-extension mask,
5176	// i.e. the immediate is a sequence of ones from most significant side
5177	// and all zero for reminder, we should use rldicr.
5178	unsigned MB = `63` - llvm::countr_one(Value: ~Imm64);
5179	unsigned SH = `0`;
5180	SDLoc dl(N);
5181	SDValue Ops[] = {N->getOperand(Num: `0`), getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl)};
5182	CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5183	return true;
5184	}
5185
5186	bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5187	assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5188	uint64_t Imm64;
5189	unsigned MB, ME;
5190	SDValue N0 = N->getOperand(Num: `0`);
5191
5192	// We won't get fewer instructions if the imm is 32-bit integer.
5193	// rldimi requires the imm to have consecutive ones with both sides zero.
5194	// Also, make sure the first Op has only one use, otherwise this may increase
5195	// register pressure since rldimi is destructive.
5196	if (!isInt64Immediate(N: N->getOperand(Num: `1`).getNode(), Imm&: Imm64) \|\|
5197	isUInt<`32`>(x: Imm64) \|\| !isRunOfOnes64(Val: Imm64, MB, ME) \|\| !N0.hasOneUse())
5198	return false;
5199
5200	unsigned SH = `63` - ME;
5201	SDLoc Dl(N);
5202	// Use select64Imm for making LI instr instead of directly putting Imm64
5203	SDValue Ops[] = {
5204	N->getOperand(Num: `0`),
5205	SDValue (selectI64Imm(CurDAG, N: getI64Imm(Imm: -`1`, dl: Dl).getNode()), `0`),
5206	getI32Imm(Imm: SH, dl: Dl), getI32Imm(Imm: MB, dl: Dl)};
5207	CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5208	return true;
5209	}
5210
5211	// Select - Convert the specified operand from a target-independent to a
5212	// target-specific node if it hasn't already been changed.
5213	void PPCDAGToDAGISel::Select(SDNode *N) {
5214	SDLoc dl(N);
5215	if (N->isMachineOpcode()) {
5216	N->setNodeId(-`1`);
5217	return; // Already selected.
5218	}
5219
5220	// In case any misguided DAG-level optimizations form an ADD with a
5221	// TargetConstant operand, crash here instead of miscompiling (by selecting
5222	// an r+r add instead of some kind of r+i add).
5223	if (N->getOpcode() == ISD::ADD &&
5224	N->getOperand(Num: `1`).getOpcode() == ISD::TargetConstant)
5225	llvm_unreachable("Invalid ADD with TargetConstant operand");
5226
5227	// Try matching complex bit permutations before doing anything else.
5228	if (tryBitPermutation(N))
5229	return;
5230
5231	// Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5232	if (tryIntCompareInGPR(N))
5233	return;
5234
5235	switch (N->getOpcode()) {
5236	default: break;
5237
5238	case ISD::Constant:
5239	if (N->getValueType(`0`) == MVT::i64) {
5240	ReplaceNode(F: N, T: selectI64Imm(CurDAG, N));
5241	return;
5242	}
5243	break;
5244
5245	case ISD::INTRINSIC_VOID: {
5246	auto IntrinsicID = N->getConstantOperandVal(Num: `1`);
5247	if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5248	IntrinsicID != Intrinsic::ppc_trapd &&
5249	IntrinsicID != Intrinsic::ppc_trap)
5250	break;
5251	unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw \|\|
5252	IntrinsicID == Intrinsic::ppc_trapd)
5253	? PPC::TDI
5254	: PPC::TWI;
5255	SmallVector<SDValue, `4`> OpsWithMD;
5256	unsigned MDIndex;
5257	if (IntrinsicID == Intrinsic::ppc_tdw \|\|
5258	IntrinsicID == Intrinsic::ppc_tw) {
5259	SDValue Ops[] = {N->getOperand(Num: `4`), N->getOperand(Num: `2`), N->getOperand(Num: `3`)};
5260	int16_t SImmOperand2;
5261	int16_t SImmOperand3;
5262	int16_t SImmOperand4;
5263	bool isOperand2IntS16Immediate =
5264	isIntS16Immediate(Op: N->getOperand(Num: `2`), Imm&: SImmOperand2);
5265	bool isOperand3IntS16Immediate =
5266	isIntS16Immediate(Op: N->getOperand(Num: `3`), Imm&: SImmOperand3);
5267	// We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5268	// reg or imm + imm. The imm + imm form will be optimized to either an
5269	// unconditional trap or a nop in a later pass.
5270	if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5271	Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5272	else if (isOperand3IntS16Immediate)
5273	// The 2nd and 3rd operands are reg + imm.
5274	Ops[`2`] = getI32Imm(Imm: int(SImmOperand3) & `0xFFFF`, dl);
5275	else {
5276	// The 2nd and 3rd operands are imm + reg.
5277	bool isOperand4IntS16Immediate =
5278	isIntS16Immediate(Op: N->getOperand(Num: `4`), Imm&: SImmOperand4);
5279	(void)isOperand4IntS16Immediate;
5280	assert(isOperand4IntS16Immediate &&
5281	"The 4th operand is not an Immediate");
5282	// We need to flip the condition immediate TO.
5283	int16_t TO = int(SImmOperand4) & `0x1F`;
5284	// We swap the first and second bit of TO if they are not same.
5285	if ((TO & `0x1`) != ((TO & `0x2`) >> `1`))
5286	TO = (TO & `0x1`) ? TO + `1` : TO - `1`;
5287	// We swap the fourth and fifth bit of TO if they are not same.
5288	if ((TO & `0x8`) != ((TO & `0x10`) >> `1`))
5289	TO = (TO & `0x8`) ? TO + `8` : TO - `8`;
5290	Ops[`0`] = getI32Imm(Imm: TO, dl);
5291	Ops[`1`] = N->getOperand(Num: `3`);
5292	Ops[`2`] = getI32Imm(Imm: int(SImmOperand2) & `0xFFFF`, dl);
5293	}
5294	OpsWithMD = {Ops[`0`], Ops[`1`], Ops[`2`]};
5295	MDIndex = `5`;
5296	} else {
5297	OpsWithMD = {getI32Imm(Imm: `24`, dl), N->getOperand(Num: `2`), getI32Imm(Imm: `0`, dl)};
5298	MDIndex = `3`;
5299	}
5300
5301	if (N->getNumOperands() > MDIndex) {
5302	SDValue MDV = N->getOperand(Num: MDIndex);
5303	const MDNode *MD = cast<MDNodeSDNode>(Val&: MDV)->getMD();
5304	assert(MD->getNumOperands() != `0` && "Empty MDNode in operands!");
5305	assert((isa<MDString>(MD->getOperand(`0`)) && cast<MDString>(
5306	MD->getOperand(`0`))->getString().equals("ppc-trap-reason"))
5307	&& "Unsupported annotation data type!");
5308	for (unsigned i = `1`; i < MD->getNumOperands(); i++) {
5309	assert(isa<MDString>(MD->getOperand(i)) &&
5310	"Invalid data type for annotation ppc-trap-reason!");
5311	OpsWithMD.push_back(
5312	Elt: getI32Imm(Imm: std::stoi(str: cast<MDString>(
5313	Val: MD->getOperand(I: i))->getString().str()), dl));
5314	}
5315	}
5316	OpsWithMD.push_back(Elt: N->getOperand(Num: `0`)); // chain
5317	CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5318	return;
5319	}
5320
5321	case ISD::INTRINSIC_WO_CHAIN: {
5322	// We emit the PPC::FSELS instruction here because of type conflicts with
5323	// the comparison operand. The FSELS instruction is defined to use an 8-byte
5324	// comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5325	// value for the comparison. When selecting through a .td file, a type
5326	// error is raised. Must check this first so we never break on the
5327	// !Subtarget->isISA3_1() check.
5328	auto IntID = N->getConstantOperandVal(Num: `0`);
5329	if (IntID == Intrinsic::ppc_fsels) {
5330	SDValue Ops[] = {N->getOperand(Num: `1`), N->getOperand(Num: `2`), N->getOperand(Num: `3`)};
5331	CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5332	return;
5333	}
5334
5335	if (IntID == Intrinsic::ppc_bcdadd_p \|\| IntID == Intrinsic::ppc_bcdsub_p) {
5336	auto Pred = N->getConstantOperandVal(Num: `1`);
5337	unsigned Opcode =
5338	IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5339	unsigned SubReg = `0`;
5340	unsigned ShiftVal = `0`;
5341	bool Reverse = false;
5342	switch (Pred) {
5343	case `0`:
5344	SubReg = PPC::sub_eq;
5345	ShiftVal = `1`;
5346	break;
5347	case `1`:
5348	SubReg = PPC::sub_eq;
5349	ShiftVal = `1`;
5350	Reverse = true;
5351	break;
5352	case `2`:
5353	SubReg = PPC::sub_lt;
5354	ShiftVal = `3`;
5355	break;
5356	case `3`:
5357	SubReg = PPC::sub_lt;
5358	ShiftVal = `3`;
5359	Reverse = true;
5360	break;
5361	case `4`:
5362	SubReg = PPC::sub_gt;
5363	ShiftVal = `2`;
5364	break;
5365	case `5`:
5366	SubReg = PPC::sub_gt;
5367	ShiftVal = `2`;
5368	Reverse = true;
5369	break;
5370	case `6`:
5371	SubReg = PPC::sub_un;
5372	break;
5373	case `7`:
5374	SubReg = PPC::sub_un;
5375	Reverse = true;
5376	break;
5377	}
5378
5379	EVT VTs[] = {MVT::v16i8, MVT::Glue};
5380	SDValue Ops[] = {N->getOperand(`2`), N->getOperand(`3`),
5381	CurDAG->getTargetConstant(`0`, dl, MVT::i32)};
5382	SDValue BCDOp = SDValue (CurDAG->getMachineNode(Opcode, dl, VTs, Ops), `0`);
5383	SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5384	// On Power10, we can use SETBC[R]. On prior architectures, we have to use
5385	// MFOCRF and shift/negate the value.
5386	if (Subtarget->isISA3_1()) {
5387	SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5388	SDValue CRBit = SDValue(
5389	CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5390	CR6Reg, SubRegIdx, BCDOp.getValue(`1`)),
5391	`0`);
5392	CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5393	CRBit);
5394	} else {
5395	SDValue Move =
5396	SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5397	BCDOp.getValue(`1`)),
5398	`0`);
5399	SDValue Ops[] = {Move, getI32Imm(Imm: (`32` - (`4` + ShiftVal)) & `31`, dl),
5400	getI32Imm(Imm: `31`, dl), getI32Imm(Imm: `31`, dl)};
5401	if (!Reverse)
5402	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5403	else {
5404	SDValue Shift = SDValue(
5405	CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), `0`);
5406	CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(`1`, dl));
5407	}
5408	}
5409	return;
5410	}
5411
5412	if (!Subtarget->isISA3_1())
5413	break;
5414	unsigned Opcode = `0`;
5415	switch (IntID) {
5416	default:
5417	break;
5418	case Intrinsic::ppc_altivec_vstribr_p:
5419	Opcode = PPC::VSTRIBR_rec;
5420	break;
5421	case Intrinsic::ppc_altivec_vstribl_p:
5422	Opcode = PPC::VSTRIBL_rec;
5423	break;
5424	case Intrinsic::ppc_altivec_vstrihr_p:
5425	Opcode = PPC::VSTRIHR_rec;
5426	break;
5427	case Intrinsic::ppc_altivec_vstrihl_p:
5428	Opcode = PPC::VSTRIHL_rec;
5429	break;
5430	}
5431	if (!Opcode)
5432	break;
5433
5434	// Generate the appropriate vector string isolate intrinsic to match.
5435	EVT VTs[] = {MVT::v16i8, MVT::Glue};
5436	SDValue VecStrOp =
5437	SDValue (CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(Num: `2`)), `0`);
5438	// Vector string isolate instructions update the EQ bit of CR6.
5439	// Generate a SETBC instruction to extract the bit and place it in a GPR.
5440	SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5441	SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5442	SDValue CRBit = SDValue(
5443	CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5444	CR6Reg, SubRegIdx, VecStrOp.getValue(`1`)),
5445	`0`);
5446	CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5447	return;
5448	}
5449
5450	case ISD::SETCC:
5451	case ISD::STRICT_FSETCC:
5452	case ISD::STRICT_FSETCCS:
5453	if (trySETCC(N))
5454	return;
5455	break;
5456	// These nodes will be transformed into GETtlsADDR32 node, which
5457	// later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5458	case PPCISD::ADDI_TLSLD_L_ADDR:
5459	case PPCISD::ADDI_TLSGD_L_ADDR: {
5460	const Module *Mod = MF->getFunction().getParent();
5461	if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 \|\|
5462	!Subtarget->isSecurePlt() \|\| !Subtarget->isTargetELF() \|\|
5463	Mod->getPICLevel() == PICLevel::SmallPIC)
5464	break;
5465	// Attach global base pointer on GETtlsADDR32 node in order to
5466	// generate secure plt code for TLS symbols.
5467	getGlobalBaseReg();
5468	} break;
5469	case PPCISD::CALL: {
5470	if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 \|\|
5471	!TM.isPositionIndependent() \|\| !Subtarget->isSecurePlt() \|\|
5472	!Subtarget->isTargetELF())
5473	break;
5474
5475	SDValue Op = N->getOperand(Num: `1`);
5476
5477	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5478	if (GA->getTargetFlags() == PPCII::MO_PLT)
5479	getGlobalBaseReg();
5480	}
5481	else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Val&: Op)) {
5482	if (ES->getTargetFlags() == PPCII::MO_PLT)
5483	getGlobalBaseReg();
5484	}
5485	}
5486	break;
5487
5488	case PPCISD::GlobalBaseReg:
5489	ReplaceNode(F: N, T: getGlobalBaseReg());
5490	return;
5491
5492	case ISD::FrameIndex:
5493	selectFrameIndex(SN: N, N);
5494	return;
5495
5496	case PPCISD::MFOCRF: {
5497	SDValue InGlue = N->getOperand(Num: `1`);
5498	ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5499	N->getOperand(`0`), InGlue));
5500	return;
5501	}
5502
5503	case PPCISD::READ_TIME_BASE:
5504	ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5505	MVT::Other, N->getOperand(`0`)));
5506	return;
5507
5508	case PPCISD::SRA_ADDZE: {
5509	SDValue N0 = N->getOperand(Num: `0`);
5510	SDValue ShiftAmt =
5511	CurDAG->getTargetConstant(Val: *cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))->
5512	getConstantIntValue(), DL: dl,
5513	VT: N->getValueType(ResNo: `0`));
5514	if (N->getValueType(`0`) == MVT::i64) {
5515	SDNode *Op =
5516	CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5517	N0, ShiftAmt);
5518	CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, `0`),
5519	SDValue(Op, `1`));
5520	return;
5521	} else {
5522	assert(N->getValueType(`0`) == MVT::i32 &&
5523	"Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5524	SDNode *Op =
5525	CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5526	N0, ShiftAmt);
5527	CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, `0`),
5528	SDValue(Op, `1`));
5529	return;
5530	}
5531	}
5532
5533	case ISD::STORE: {
5534	// Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5535	// X-form stores.
5536	StoreSDNode *ST = cast<StoreSDNode>(Val: N);
5537	if (EnableTLSOpt && (Subtarget->isELFv2ABI() \|\| Subtarget->isAIXABI()) &&
5538	ST->getAddressingMode() != ISD::PRE_INC)
5539	if (tryTLSXFormStore(ST))
5540	return;
5541	break;
5542	}
5543	case ISD::LOAD: {
5544	// Handle preincrement loads.
5545	LoadSDNode *LD = cast<LoadSDNode>(Val: N);
5546	EVT LoadedVT = LD->getMemoryVT();
5547
5548	// Normal loads are handled by code generated from the .td file.
5549	if (LD->getAddressingMode() != ISD::PRE_INC) {
5550	// Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5551	// X-form loads.
5552	if (EnableTLSOpt && (Subtarget->isELFv2ABI() \|\| Subtarget->isAIXABI()))
5553	if (tryTLSXFormLoad(LD))
5554	return;
5555	break;
5556	}
5557
5558	SDValue Offset = LD->getOffset();
5559	if (Offset.getOpcode() == ISD::TargetConstant \|\|
5560	Offset.getOpcode() == ISD::TargetGlobalAddress) {
5561
5562	unsigned Opcode;
5563	bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5564	if (LD->getValueType(`0`) != MVT::i64) {
5565	// Handle PPC32 integer and normal FP loads.
5566	assert((!isSExt \|\| LoadedVT == MVT::i16) && "Invalid sext update load");
5567	switch (LoadedVT.getSimpleVT().SimpleTy) {
5568	default: llvm_unreachable("Invalid PPC load type!");
5569	case MVT::f64: Opcode = PPC::LFDU; break;
5570	case MVT::f32: Opcode = PPC::LFSU; break;
5571	case MVT::i32: Opcode = PPC::LWZU; break;
5572	case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5573	case MVT::i1:
5574	case MVT::i8: Opcode = PPC::LBZU; break;
5575	}
5576	} else {
5577	assert(LD->getValueType(`0`) == MVT::i64 && "Unknown load result type!");
5578	assert((!isSExt \|\| LoadedVT == MVT::i16) && "Invalid sext update load");
5579	switch (LoadedVT.getSimpleVT().SimpleTy) {
5580	default: llvm_unreachable("Invalid PPC load type!");
5581	case MVT::i64: Opcode = PPC::LDU; break;
5582	case MVT::i32: Opcode = PPC::LWZU8; break;
5583	case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5584	case MVT::i1:
5585	case MVT::i8: Opcode = PPC::LBZU8; break;
5586	}
5587	}
5588
5589	SDValue Chain = LD->getChain();
5590	SDValue Base = LD->getBasePtr();
5591	SDValue Ops[] = { Offset, Base, Chain };
5592	SDNode *MN = CurDAG->getMachineNode(
5593	Opcode, dl, LD->getValueType(`0`),
5594	PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5595	transferMemOperands(N, Result: MN);
5596	ReplaceNode(F: N, T: MN);
5597	return;
5598	} else {
5599	unsigned Opcode;
5600	bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5601	if (LD->getValueType(`0`) != MVT::i64) {
5602	// Handle PPC32 integer and normal FP loads.
5603	assert((!isSExt \|\| LoadedVT == MVT::i16) && "Invalid sext update load");
5604	switch (LoadedVT.getSimpleVT().SimpleTy) {
5605	default: llvm_unreachable("Invalid PPC load type!");
5606	case MVT::f64: Opcode = PPC::LFDUX; break;
5607	case MVT::f32: Opcode = PPC::LFSUX; break;
5608	case MVT::i32: Opcode = PPC::LWZUX; break;
5609	case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5610	case MVT::i1:
5611	case MVT::i8: Opcode = PPC::LBZUX; break;
5612	}
5613	} else {
5614	assert(LD->getValueType(`0`) == MVT::i64 && "Unknown load result type!");
5615	assert((!isSExt \|\| LoadedVT == MVT::i16 \|\| LoadedVT == MVT::i32) &&
5616	"Invalid sext update load");
5617	switch (LoadedVT.getSimpleVT().SimpleTy) {
5618	default: llvm_unreachable("Invalid PPC load type!");
5619	case MVT::i64: Opcode = PPC::LDUX; break;
5620	case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5621	case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5622	case MVT::i1:
5623	case MVT::i8: Opcode = PPC::LBZUX8; break;
5624	}
5625	}
5626
5627	SDValue Chain = LD->getChain();
5628	SDValue Base = LD->getBasePtr();
5629	SDValue Ops[] = { Base, Offset, Chain };
5630	SDNode *MN = CurDAG->getMachineNode(
5631	Opcode, dl, LD->getValueType(`0`),
5632	PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5633	transferMemOperands(N, Result: MN);
5634	ReplaceNode(F: N, T: MN);
5635	return;
5636	}
5637	}
5638
5639	case ISD::AND:
5640	// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5641	if (tryAsSingleRLWINM(N) \|\| tryAsSingleRLWIMI(N) \|\| tryAsSingleRLDCL(N) \|\|
5642	tryAsSingleRLDICL(N) \|\| tryAsSingleRLDICR(N) \|\| tryAsSingleRLWINM8(N) \|\|
5643	tryAsPairOfRLDICL(N))
5644	return;
5645
5646	// Other cases are autogenerated.
5647	break;
5648	case ISD::OR: {
5649	if (N->getValueType(`0`) == MVT::i32)
5650	if (tryBitfieldInsert(N))
5651	return;
5652
5653	int16_t Imm;
5654	if (N->getOperand(Num: `0`)->getOpcode() == ISD::FrameIndex &&
5655	isIntS16Immediate(Op: N->getOperand(Num: `1`), Imm)) {
5656	KnownBits LHSKnown = CurDAG->computeKnownBits(Op: N->getOperand(Num: `0`));
5657
5658	// If this is equivalent to an add, then we can fold it with the
5659	// FrameIndex calculation.
5660	if ((LHSKnown.Zero.getZExtValue()\|~(uint64_t)Imm) == ~`0ULL`) {
5661	selectFrameIndex(SN: N, N: N->getOperand(Num: `0`).getNode(), Offset: (int64_t)Imm);
5662	return;
5663	}
5664	}
5665
5666	// If this is 'or' against an imm with consecutive ones and both sides zero,
5667	// try to emit rldimi
5668	if (tryAsSingleRLDIMI(N))
5669	return;
5670
5671	// OR with a 32-bit immediate can be handled by ori + oris
5672	// without creating an immediate in a GPR.
5673	uint64_t Imm64 = `0`;
5674	bool IsPPC64 = Subtarget->isPPC64();
5675	if (IsPPC64 && isInt64Immediate(N: N->getOperand(Num: `1`), Imm&: Imm64) &&
5676	(Imm64 & ~`0xFFFFFFFFuLL`) == `0`) {
5677	// If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5678	uint64_t ImmHi = Imm64 >> `16`;
5679	uint64_t ImmLo = Imm64 & `0xFFFF`;
5680	if (ImmHi != `0` && ImmLo != `0`) {
5681	SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5682	N->getOperand(`0`),
5683	getI16Imm(ImmLo, dl));
5684	SDValue Ops1[] = { SDValue (Lo, `0`), getI16Imm(Imm: ImmHi, dl)};
5685	CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5686	return;
5687	}
5688	}
5689
5690	// Other cases are autogenerated.
5691	break;
5692	}
5693	case ISD::XOR: {
5694	// XOR with a 32-bit immediate can be handled by xori + xoris
5695	// without creating an immediate in a GPR.
5696	uint64_t Imm64 = `0`;
5697	bool IsPPC64 = Subtarget->isPPC64();
5698	if (IsPPC64 && isInt64Immediate(N: N->getOperand(Num: `1`), Imm&: Imm64) &&
5699	(Imm64 & ~`0xFFFFFFFFuLL`) == `0`) {
5700	// If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5701	uint64_t ImmHi = Imm64 >> `16`;
5702	uint64_t ImmLo = Imm64 & `0xFFFF`;
5703	if (ImmHi != `0` && ImmLo != `0`) {
5704	SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5705	N->getOperand(`0`),
5706	getI16Imm(ImmLo, dl));
5707	SDValue Ops1[] = { SDValue (Lo, `0`), getI16Imm(Imm: ImmHi, dl)};
5708	CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5709	return;
5710	}
5711	}
5712
5713	break;
5714	}
5715	case ISD::ADD: {
5716	int16_t Imm;
5717	if (N->getOperand(Num: `0`)->getOpcode() == ISD::FrameIndex &&
5718	isIntS16Immediate(Op: N->getOperand(Num: `1`), Imm)) {
5719	selectFrameIndex(SN: N, N: N->getOperand(Num: `0`).getNode(), Offset: (int64_t)Imm);
5720	return;
5721	}
5722
5723	break;
5724	}
5725	case ISD::SHL: {
5726	unsigned Imm, SH, MB, ME;
5727	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm) &&
5728	isRotateAndMask(N, Mask: Imm, isShiftMask: true, SH, MB, ME)) {
5729	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
5730	getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
5731	getI32Imm(Imm: ME, dl) };
5732	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5733	return;
5734	}
5735
5736	// Other cases are autogenerated.
5737	break;
5738	}
5739	case ISD::SRL: {
5740	unsigned Imm, SH, MB, ME;
5741	if (isOpcWithIntImmediate(N: N->getOperand(Num: `0`).getNode(), Opc: ISD::AND, Imm) &&
5742	isRotateAndMask(N, Mask: Imm, isShiftMask: true, SH, MB, ME)) {
5743	SDValue Ops[] = { N->getOperand(Num: `0`).getOperand(i: `0`),
5744	getI32Imm(Imm: SH, dl), getI32Imm(Imm: MB, dl),
5745	getI32Imm(Imm: ME, dl) };
5746	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5747	return;
5748	}
5749
5750	// Other cases are autogenerated.
5751	break;
5752	}
5753	case ISD::MUL: {
5754	SDValue Op1 = N->getOperand(Num: `1`);
5755	if (Op1.getOpcode() != ISD::Constant \|\|
5756	(Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5757	break;
5758
5759	// If the multiplier fits int16, we can handle it with mulli.
5760	int64_t Imm = Op1 ->getAsZExtVal();
5761	unsigned Shift = llvm::countr_zero<uint64_t>(Val: Imm);
5762	if (isInt<`16`>(x: Imm) \|\| !Shift)
5763	break;
5764
5765	// If the shifted value fits int16, we can do this transformation:
5766	// (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5767	// DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5768	uint64_t ImmSh = Imm >> Shift;
5769	if (!isInt<`16`>(x: ImmSh))
5770	break;
5771
5772	uint64_t SextImm = SignExtend64(X: ImmSh & `0xFFFF`, B: `16`);
5773	if (Op1.getValueType() == MVT::i64) {
5774	SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5775	SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5776	N->getOperand(`0`), SDImm);
5777
5778	SDValue Ops[] = {SDValue (MulNode, `0`), getI32Imm(Imm: Shift, dl),
5779	getI32Imm(Imm: `63` - Shift, dl)};
5780	CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5781	return;
5782	} else {
5783	SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5784	SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5785	N->getOperand(`0`), SDImm);
5786
5787	SDValue Ops[] = {SDValue (MulNode, `0`), getI32Imm(Imm: Shift, dl),
5788	getI32Imm(Imm: `0`, dl), getI32Imm(Imm: `31` - Shift, dl)};
5789	CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5790	return;
5791	}
5792	break;
5793	}
5794	// FIXME: Remove this once the ANDI glue bug is fixed:
5795	case PPCISD::ANDI_rec_1_EQ_BIT:
5796	case PPCISD::ANDI_rec_1_GT_BIT: {
5797	if (!ANDIGlueBug)
5798	break;
5799
5800	EVT InVT = N->getOperand(Num: `0`).getValueType();
5801	assert((InVT == MVT::i64 \|\| InVT == MVT::i32) &&
5802	"Invalid input type for ANDI_rec_1_EQ_BIT");
5803
5804	unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5805	SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5806	N->getOperand(`0`),
5807	CurDAG->getTargetConstant(`1`, dl, InVT)),
5808	`0`);
5809	SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5810	SDValue SRIdxVal = CurDAG->getTargetConstant(
5811	N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5812	dl, MVT::i32);
5813
5814	CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5815	SRIdxVal, SDValue(AndI.getNode(), `1`) / glue /);
5816	return;
5817	}
5818	case ISD::SELECT_CC: {
5819	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `4`))->get();
5820	EVT PtrVT =
5821	CurDAG->getTargetLoweringInfo().getPointerTy(DL: CurDAG->getDataLayout());
5822	bool isPPC64 = (PtrVT == MVT::i64);
5823
5824	// If this is a select of i1 operands, we'll pattern match it.
5825	if (Subtarget->useCRBits() && N->getOperand(`0`).getValueType() == MVT::i1)
5826	break;
5827
5828	if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5829	bool NeedSwapOps = false;
5830	bool IsUnCmp = false;
5831	if (mayUseP9Setb(N, CC, DAG: CurDAG, NeedSwapOps, IsUnCmp)) {
5832	SDValue LHS = N->getOperand(Num: `0`);
5833	SDValue RHS = N->getOperand(Num: `1`);
5834	if (NeedSwapOps)
5835	std::swap(a&: LHS, b&: RHS);
5836
5837	// Make use of SelectCC to generate the comparison to set CR bits, for
5838	// equality comparisons having one literal operand, SelectCC probably
5839	// doesn't need to materialize the whole literal and just use xoris to
5840	// check it first, it leads the following comparison result can't
5841	// exactly represent GT/LT relationship. So to avoid this we specify
5842	// SETGT/SETUGT here instead of SETEQ.
5843	SDValue GenCC =
5844	SelectCC(LHS, RHS, CC: IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5845	CurDAG->SelectNodeTo(
5846	N, N->getSimpleValueType(`0`) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5847	N->getValueType(`0`), GenCC);
5848	NumP9Setb ++;
5849	return;
5850	}
5851	}
5852
5853	// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5854	if (!isPPC64 && isNullConstant(N->getOperand(`1`)) &&
5855	isOneConstant(N->getOperand(`2`)) && isNullConstant(N->getOperand(`3`)) &&
5856	CC == ISD::SETNE &&
5857	// FIXME: Implement this optzn for PPC64.
5858	N->getValueType(`0`) == MVT::i32) {
5859	SDNode *Tmp =
5860	CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5861	N->getOperand(`0`), getI32Imm(~`0U`, dl));
5862	CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, `0`),
5863	N->getOperand(`0`), SDValue(Tmp, `1`));
5864	return;
5865	}
5866
5867	SDValue CCReg = SelectCC(LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), CC, dl);
5868
5869	if (N->getValueType(`0`) == MVT::i1) {
5870	// An i1 select is: (c & t) \| (!c & f).
5871	bool Inv;
5872	unsigned Idx = getCRIdxForSetCC(CC, Invert&: Inv);
5873
5874	unsigned SRI;
5875	switch (Idx) {
5876	default: llvm_unreachable("Invalid CC index");
5877	case `0`: SRI = PPC::sub_lt; break;
5878	case `1`: SRI = PPC::sub_gt; break;
5879	case `2`: SRI = PPC::sub_eq; break;
5880	case `3`: SRI = PPC::sub_un; break;
5881	}
5882
5883	SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5884
5885	SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5886	CCBit, CCBit), `0`);
5887	SDValue C = Inv ? NotCCBit : CCBit,
5888	NotC = Inv ? CCBit : NotCCBit;
5889
5890	SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5891	C, N->getOperand(`2`)), `0`);
5892	SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5893	NotC, N->getOperand(`3`)), `0`);
5894
5895	CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5896	return;
5897	}
5898
5899	unsigned BROpc =
5900	getPredicateForSetCC(CC, VT: N->getOperand(Num: `0`).getValueType(), Subtarget);
5901
5902	unsigned SelectCCOp;
5903	if (N->getValueType(`0`) == MVT::i32)
5904	SelectCCOp = PPC::SELECT_CC_I4;
5905	else if (N->getValueType(`0`) == MVT::i64)
5906	SelectCCOp = PPC::SELECT_CC_I8;
5907	else if (N->getValueType(`0`) == MVT::f32) {
5908	if (Subtarget->hasP8Vector())
5909	SelectCCOp = PPC::SELECT_CC_VSSRC;
5910	else if (Subtarget->hasSPE())
5911	SelectCCOp = PPC::SELECT_CC_SPE4;
5912	else
5913	SelectCCOp = PPC::SELECT_CC_F4;
5914	} else if (N->getValueType(`0`) == MVT::f64) {
5915	if (Subtarget->hasVSX())
5916	SelectCCOp = PPC::SELECT_CC_VSFRC;
5917	else if (Subtarget->hasSPE())
5918	SelectCCOp = PPC::SELECT_CC_SPE;
5919	else
5920	SelectCCOp = PPC::SELECT_CC_F8;
5921	} else if (N->getValueType(`0`) == MVT::f128)
5922	SelectCCOp = PPC::SELECT_CC_F16;
5923	else if (Subtarget->hasSPE())
5924	SelectCCOp = PPC::SELECT_CC_SPE;
5925	else if (N->getValueType(`0`) == MVT::v2f64 \|\|
5926	N->getValueType(`0`) == MVT::v2i64)
5927	SelectCCOp = PPC::SELECT_CC_VSRC;
5928	else
5929	SelectCCOp = PPC::SELECT_CC_VRRC;
5930
5931	SDValue Ops[] = { CCReg, N->getOperand(Num: `2`), N->getOperand(Num: `3`),
5932	getI32Imm(Imm: BROpc, dl) };
5933	CurDAG->SelectNodeTo(N, MachineOpc: SelectCCOp, VT: N->getValueType(ResNo: `0`), Ops);
5934	return;
5935	}
5936	case ISD::VECTOR_SHUFFLE:
5937	if (Subtarget->hasVSX() && (N->getValueType(`0`) == MVT::v2f64 \|\|
5938	N->getValueType(`0`) == MVT::v2i64)) {
5939	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N);
5940
5941	SDValue Op1 = N->getOperand(Num: SVN->getMaskElt(Idx: `0`) < `2` ? `0` : `1`),
5942	Op2 = N->getOperand(Num: SVN->getMaskElt(Idx: `1`) < `2` ? `0` : `1`);
5943	unsigned DM[`2`];
5944
5945	for (int i = `0`; i < `2`; ++i)
5946	if (SVN->getMaskElt(Idx: i) <= `0` \|\| SVN->getMaskElt(Idx: i) == `2`)
5947	DM[i] = `0`;
5948	else
5949	DM[i] = `1`;
5950
5951	if (Op1 == Op2 && DM[`0`] == `0` && DM[`1`] == `0` &&
5952	Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5953	isa<LoadSDNode>(Val: Op1.getOperand(i: `0`))) {
5954	LoadSDNode *LD = cast<LoadSDNode>(Val: Op1.getOperand(i: `0`));
5955	SDValue Base, Offset;
5956
5957	if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5958	(LD->getMemoryVT() == MVT::f64 \|\|
5959	LD->getMemoryVT() == MVT::i64) &&
5960	SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5961	SDValue Chain = LD->getChain();
5962	SDValue Ops[] = { Base, Offset, Chain };
5963	MachineMemOperand *MemOp = LD->getMemOperand();
5964	SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5965	N->getValueType(`0`), Ops);
5966	CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: NewN), NewMemRefs: {MemOp});
5967	return;
5968	}
5969	}
5970
5971	// For little endian, we must swap the input operands and adjust
5972	// the mask elements (reverse and invert them).
5973	if (Subtarget->isLittleEndian()) {
5974	std::swap(a&: Op1, b&: Op2);
5975	unsigned tmp = DM[`0`];
5976	DM[`0`] = `1` - DM[`1`];
5977	DM[`1`] = `1` - tmp;
5978	}
5979
5980	SDValue DMV = CurDAG->getTargetConstant(DM[`1`] \| (DM[`0`] << `1`), dl,
5981	MVT::i32);
5982	SDValue Ops[] = { Op1, Op2, DMV };
5983	CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(`0`), Ops);
5984	return;
5985	}
5986
5987	break;
5988	case PPCISD::BDNZ:
5989	case PPCISD::BDZ: {
5990	bool IsPPC64 = Subtarget->isPPC64();
5991	SDValue Ops[] = { N->getOperand(Num: `1`), N->getOperand(Num: `0`) };
5992	CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
5993	? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
5994	: (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
5995	MVT::Other, Ops);
5996	return;
5997	}
5998	case PPCISD::COND_BRANCH: {
5999	// Op #0 is the Chain.
6000	// Op #1 is the PPC::PRED_ number.*
6001	// Op #2 is the CR#
6002	// Op #3 is the Dest MBB
6003	// Op #4 is the Flag.
6004	// Prevent PPC::PRED_ from being selected into LI.*
6005	unsigned PCC = N->getConstantOperandVal(Num: `1`);
6006	if (EnableBranchHint)
6007	PCC \|= getBranchHint(PCC, FuncInfo: *FuncInfo, DestMBB: N->getOperand(Num: `3`));
6008
6009	SDValue Pred = getI32Imm(Imm: PCC, dl);
6010	SDValue Ops[] = { Pred, N->getOperand(Num: `2`), N->getOperand(Num: `3`),
6011	N->getOperand(Num: `0`), N->getOperand(Num: `4`) };
6012	CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6013	return;
6014	}
6015	case ISD::BR_CC: {
6016	if (tryFoldSWTestBRCC(N))
6017	return;
6018	if (trySelectLoopCountIntrinsic(N))
6019	return;
6020	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `1`))->get();
6021	unsigned PCC =
6022	getPredicateForSetCC(CC, VT: N->getOperand(Num: `2`).getValueType(), Subtarget);
6023
6024	if (N->getOperand(`2`).getValueType() == MVT::i1) {
6025	unsigned Opc;
6026	bool Swap;
6027	switch (PCC) {
6028	default: llvm_unreachable("Unexpected Boolean-operand predicate");
6029	case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6030	case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6031	case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6032	case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6033	case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6034	case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6035	}
6036
6037	// A signed comparison of i1 values produces the opposite result to an
6038	// unsigned one if the condition code includes less-than or greater-than.
6039	// This is because 1 is the most negative signed i1 number and the most
6040	// positive unsigned i1 number. The CR-logical operations used for such
6041	// comparisons are non-commutative so for signed comparisons vs. unsigned
6042	// ones, the input operands just need to be swapped.
6043	if (ISD::isSignedIntSetCC(Code: CC))
6044	Swap = !Swap;
6045
6046	SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6047	N->getOperand(Swap ? `3` : `2`),
6048	N->getOperand(Swap ? `2` : `3`)), `0`);
6049	CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(`4`),
6050	N->getOperand(`0`));
6051	return;
6052	}
6053
6054	if (EnableBranchHint)
6055	PCC \|= getBranchHint(PCC, FuncInfo: *FuncInfo, DestMBB: N->getOperand(Num: `4`));
6056
6057	SDValue CondCode = SelectCC(LHS: N->getOperand(Num: `2`), RHS: N->getOperand(Num: `3`), CC, dl);
6058	SDValue Ops[] = { getI32Imm(Imm: PCC, dl), CondCode,
6059	N->getOperand(Num: `4`), N->getOperand(Num: `0`) };
6060	CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6061	return;
6062	}
6063	case ISD::BRIND: {
6064	// FIXME: Should custom lower this.
6065	SDValue Chain = N->getOperand(Num: `0`);
6066	SDValue Target = N->getOperand(Num: `1`);
6067	unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6068	unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6069	Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6070	Chain), `0`);
6071	CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6072	return;
6073	}
6074	case PPCISD::TOC_ENTRY: {
6075	const bool isPPC64 = Subtarget->isPPC64();
6076	const bool isELFABI = Subtarget->isSVR4ABI();
6077	const bool isAIXABI = Subtarget->isAIXABI();
6078
6079	// PowerPC only support small, medium and large code model.
6080	const CodeModel::Model CModel = getCodeModel(Subtarget: *Subtarget, TM, Node: N);
6081
6082	assert(!(CModel == CodeModel::Tiny \|\| CModel == CodeModel::Kernel) &&
6083	"PowerPC doesn't support tiny or kernel code models.");
6084
6085	if (isAIXABI && CModel == CodeModel::Medium)
6086	report_fatal_error(reason: "Medium code model is not supported on AIX.");
6087
6088	// For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6089	// this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6090	// small code model, we need to check for a toc-data attribute.
6091	if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6092	break;
6093
6094	auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6095	EVT OperandTy) {
6096	SDValue GA = TocEntry->getOperand(Num: `0`);
6097	SDValue TocBase = TocEntry->getOperand(Num: `1`);
6098	SDNode *MN = CurDAG->getMachineNode(Opcode: OpCode, dl, VT: OperandTy, Op1: GA, Op2: TocBase);
6099	transferMemOperands(N: TocEntry, Result: MN);
6100	ReplaceNode(F: TocEntry, T: MN);
6101	};
6102
6103	// Handle 32-bit small code model.
6104	if (!isPPC64 && CModel == CodeModel::Small) {
6105	// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6106	// PPC::ADDItoc, or PPC::LWZtoc
6107	if (isELFABI) {
6108	assert(TM.isPositionIndependent() &&
6109	"32-bit ELF can only have TOC entries in position independent"
6110	" code.");
6111	// 32-bit ELF always uses a small code model toc access.
6112	replaceWith(PPC::LWZtoc, N, MVT::i32);
6113	return;
6114	}
6115
6116	assert(isAIXABI && "ELF ABI already handled");
6117
6118	if (hasTocDataAttr(Val: N->getOperand(Num: `0`))) {
6119	replaceWith(PPC::ADDItoc, N, MVT::i32);
6120	return;
6121	}
6122
6123	replaceWith(PPC::LWZtoc, N, MVT::i32);
6124	return;
6125	}
6126
6127	if (isPPC64 && CModel == CodeModel::Small) {
6128	assert(isAIXABI && "ELF ABI handled in common SelectCode");
6129
6130	if (hasTocDataAttr(Val: N->getOperand(Num: `0`))) {
6131	replaceWith(PPC::ADDItoc8, N, MVT::i64);
6132	return;
6133	}
6134	// Break if it doesn't have toc data attribute. Proceed with common
6135	// SelectCode.
6136	break;
6137	}
6138
6139	assert(CModel != CodeModel::Small && "All small code models handled.");
6140
6141	assert((isPPC64 \|\| (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6142	" ELF/AIX or 32-bit AIX in the following.");
6143
6144	// Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
6145	// or 64-bit medium (ELF-only) or large (ELF and AIX) code model code non
6146	// toc-data symbols.
6147	// We generate two instructions as described below. The first source
6148	// operand is a symbol reference. If it must be toc-referenced according to
6149	// Subtarget, we generate:
6150	// [32-bit AIX]
6151	// LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6152	// [64-bit ELF/AIX]
6153	// LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6154	// Otherwise we generate:
6155	// ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6156
6157	// For large code model toc-data symbols we generate:
6158	// [32-bit AIX]
6159	// ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6160	// [64-bit AIX]
6161	// Currently not supported.
6162
6163	SDValue GA = N->getOperand(Num: `0`);
6164	SDValue TOCbase = N->getOperand(Num: `1`);
6165
6166	EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
6167	SDNode *Tmp = CurDAG->getMachineNode(
6168	isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6169
6170	// On AIX if the symbol has the toc-data attribute it will be defined
6171	// in the TOC entry, so we use an ADDItocL similar to the medium code
6172	// model ELF abi.
6173	if (isAIXABI && hasTocDataAttr(Val: GA)) {
6174	if (isPPC64)
6175	report_fatal_error(
6176	reason: "64-bit large code model toc-data not yet supported");
6177
6178	ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, VT,
6179	SDValue(Tmp, `0`), GA));
6180	return;
6181	}
6182
6183	if (PPCLowering->isAccessedAsGotIndirect(N: GA)) {
6184	// If it is accessed as got-indirect, we need an extra LWZ/LD to load
6185	// the address.
6186	SDNode *MN = CurDAG->getMachineNode(
6187	isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, `0`));
6188
6189	transferMemOperands(N, Result: MN);
6190	ReplaceNode(F: N, T: MN);
6191	return;
6192	}
6193
6194	// Build the address relative to the TOC-pointer.
6195	ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6196	SDValue(Tmp, `0`), GA));
6197	return;
6198	}
6199	case PPCISD::PPC32_PICGOT:
6200	// Generate a PIC-safe GOT reference.
6201	assert(Subtarget->is32BitELFABI() &&
6202	"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6203	CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6204	PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6205	MVT::i32);
6206	return;
6207
6208	case PPCISD::VADD_SPLAT: {
6209	// This expands into one of three sequences, depending on whether
6210	// the first operand is odd or even, positive or negative.
6211	assert(isa<ConstantSDNode>(N->getOperand(`0`)) &&
6212	isa<ConstantSDNode>(N->getOperand(`1`)) &&
6213	"Invalid operand on VADD_SPLAT!");
6214
6215	int Elt = N->getConstantOperandVal(Num: `0`);
6216	int EltSize = N->getConstantOperandVal(Num: `1`);
6217	unsigned Opc1, Opc2, Opc3;
6218	EVT VT;
6219
6220	if (EltSize == `1`) {
6221	Opc1 = PPC::VSPLTISB;
6222	Opc2 = PPC::VADDUBM;
6223	Opc3 = PPC::VSUBUBM;
6224	VT = MVT::v16i8;
6225	} else if (EltSize == `2`) {
6226	Opc1 = PPC::VSPLTISH;
6227	Opc2 = PPC::VADDUHM;
6228	Opc3 = PPC::VSUBUHM;
6229	VT = MVT::v8i16;
6230	} else {
6231	assert(EltSize == `4` && "Invalid element size on VADD_SPLAT!");
6232	Opc1 = PPC::VSPLTISW;
6233	Opc2 = PPC::VADDUWM;
6234	Opc3 = PPC::VSUBUWM;
6235	VT = MVT::v4i32;
6236	}
6237
6238	if ((Elt & `1`) == `0`) {
6239	// Elt is even, in the range [-32,-18] + [16,30].
6240	//
6241	// Convert: VADD_SPLAT elt, size
6242	// Into: tmp = VSPLTIS[BHW] elt
6243	// VADDU[BHW]M tmp, tmp
6244	// Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6245	SDValue EltVal = getI32Imm(Imm: Elt >> `1`, dl);
6246	SDNode *Tmp = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6247	SDValue TmpVal = SDValue (Tmp, `0`);
6248	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc2, dl, VT, Op1: TmpVal, Op2: TmpVal));
6249	return;
6250	} else if (Elt > `0`) {
6251	// Elt is odd and positive, in the range [17,31].
6252	//
6253	// Convert: VADD_SPLAT elt, size
6254	// Into: tmp1 = VSPLTIS[BHW] elt-16
6255	// tmp2 = VSPLTIS[BHW] -16
6256	// VSUBU[BHW]M tmp1, tmp2
6257	SDValue EltVal = getI32Imm(Imm: Elt - `16`, dl);
6258	SDNode *Tmp1 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6259	EltVal = getI32Imm(Imm: -`16`, dl);
6260	SDNode *Tmp2 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6261	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc3, dl, VT, Op1: SDValue (Tmp1, `0`),
6262	Op2: SDValue (Tmp2, `0`)));
6263	return;
6264	} else {
6265	// Elt is odd and negative, in the range [-31,-17].
6266	//
6267	// Convert: VADD_SPLAT elt, size
6268	// Into: tmp1 = VSPLTIS[BHW] elt+16
6269	// tmp2 = VSPLTIS[BHW] -16
6270	// VADDU[BHW]M tmp1, tmp2
6271	SDValue EltVal = getI32Imm(Imm: Elt + `16`, dl);
6272	SDNode *Tmp1 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6273	EltVal = getI32Imm(Imm: -`16`, dl);
6274	SDNode *Tmp2 = CurDAG->getMachineNode(Opcode: Opc1, dl, VT, Op1: EltVal);
6275	ReplaceNode(F: N, T: CurDAG->getMachineNode(Opcode: Opc2, dl, VT, Op1: SDValue (Tmp1, `0`),
6276	Op2: SDValue (Tmp2, `0`)));
6277	return;
6278	}
6279	}
6280	case PPCISD::LD_SPLAT: {
6281	// Here we want to handle splat load for type v16i8 and v8i16 when there is
6282	// no direct move, we don't need to use stack for this case. If target has
6283	// direct move, we should be able to get the best selection in the .td file.
6284	if (!Subtarget->hasAltivec() \|\| Subtarget->hasDirectMove())
6285	break;
6286
6287	EVT Type = N->getValueType(ResNo: `0`);
6288	if (Type != MVT::v16i8 && Type != MVT::v8i16)
6289	break;
6290
6291	// If the alignment for the load is 16 or bigger, we don't need the
6292	// permutated mask to get the required value. The value must be the 0
6293	// element in big endian target or 7/15 in little endian target in the
6294	// result vsx register of lvx instruction.
6295	// Select the instruction in the .td file.
6296	if (cast<MemIntrinsicSDNode>(Val: N)->getAlign() >= Align (`16`) &&
6297	isOffsetMultipleOf(N, Val: `16`))
6298	break;
6299
6300	SDValue ZeroReg =
6301	CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6302	Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
6303	unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6304	// v16i8 LD_SPLAT addr
6305	// ======>
6306	// Mask = LVSR/LVSL 0, addr
6307	// LoadLow = LVX 0, addr
6308	// Perm = VPERM LoadLow, LoadLow, Mask
6309	// Splat = VSPLTB 15/0, Perm
6310	//
6311	// v8i16 LD_SPLAT addr
6312	// ======>
6313	// Mask = LVSR/LVSL 0, addr
6314	// LoadLow = LVX 0, addr
6315	// LoadHigh = LVX (LI, 1), addr
6316	// Perm = VPERM LoadLow, LoadHigh, Mask
6317	// Splat = VSPLTH 7/0, Perm
6318	unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6319	unsigned SplatElemIndex =
6320	Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? `15` : `7`) : `0`;
6321
6322	SDNode *Mask = CurDAG->getMachineNode(
6323	Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6324	N->getOperand(`1`));
6325
6326	SDNode *LoadLow =
6327	CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6328	{ZeroReg, N->getOperand(`1`), N->getOperand(`0`)});
6329
6330	SDNode *LoadHigh = LoadLow;
6331	if (Type == MVT::v8i16) {
6332	LoadHigh = CurDAG->getMachineNode(
6333	PPC::LVX, dl, MVT::v16i8, MVT::Other,
6334	{SDValue(CurDAG->getMachineNode(
6335	LIOpcode, dl, MVT::i32,
6336	CurDAG->getTargetConstant(`1`, dl, MVT::i8)),
6337	`0`),
6338	N->getOperand(`1`), SDValue(LoadLow, `1`)});
6339	}
6340
6341	CurDAG->ReplaceAllUsesOfValueWith(From: SDValue (N, `1`), To: SDValue (LoadHigh, `1`));
6342	transferMemOperands(N, Result: LoadHigh);
6343
6344	SDNode *Perm =
6345	CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, `0`),
6346	SDValue(LoadHigh, `0`), SDValue(Mask, `0`));
6347	CurDAG->SelectNodeTo(N, SplatOp, Type,
6348	CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6349	SDValue(Perm, `0`));
6350	return;
6351	}
6352	}
6353
6354	SelectCode(N);
6355	}
6356
6357	// If the target supports the cmpb instruction, do the idiom recognition here.
6358	// We don't do this as a DAG combine because we don't want to do it as nodes
6359	// are being combined (because we might miss part of the eventual idiom). We
6360	// don't want to do it during instruction selection because we want to reuse
6361	// the logic for lowering the masking operations already part of the
6362	// instruction selector.
6363	SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6364	SDLoc dl(N);
6365
6366	assert(N->getOpcode() == ISD::OR &&
6367	"Only OR nodes are supported for CMPB");
6368
6369	SDValue Res;
6370	if (!Subtarget->hasCMPB())
6371	return Res;
6372
6373	if (N->getValueType(`0`) != MVT::i32 &&
6374	N->getValueType(`0`) != MVT::i64)
6375	return Res;
6376
6377	EVT VT = N->getValueType(ResNo: `0`);
6378
6379	SDValue RHS, LHS;
6380	bool BytesFound[`8`] = {false, false, false, false, false, false, false, false};
6381	uint64_t Mask = `0`, Alt = `0`;
6382
6383	auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6384	uint64_t &Mask, uint64_t &Alt,
6385	SDValue &LHS, SDValue &RHS) {
6386	if (O.getOpcode() != ISD::SELECT_CC)
6387	return false;
6388	ISD::CondCode CC = cast<CondCodeSDNode>(Val: O.getOperand(i: `4`))->get();
6389
6390	if (!isa<ConstantSDNode>(Val: O.getOperand(i: `2`)) \|\|
6391	!isa<ConstantSDNode>(Val: O.getOperand(i: `3`)))
6392	return false;
6393
6394	uint64_t PM = O.getConstantOperandVal(i: `2`);
6395	uint64_t PAlt = O.getConstantOperandVal(i: `3`);
6396	for (b = `0`; b < `8`; ++b) {
6397	uint64_t Mask = UINT64_C(`0xFF`) << (`8`*b);
6398	if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6399	break;
6400	}
6401
6402	if (b == `8`)
6403	return false;
6404	Mask \|= PM;
6405	Alt \|= PAlt;
6406
6407	if (!isa<ConstantSDNode>(Val: O.getOperand(i: `1`)) \|\|
6408	O.getConstantOperandVal(i: `1`) != `0`) {
6409	SDValue Op0 = O.getOperand(i: `0`), Op1 = O.getOperand(i: `1`);
6410	if (Op0.getOpcode() == ISD::TRUNCATE)
6411	Op0 = Op0.getOperand(i: `0`);
6412	if (Op1.getOpcode() == ISD::TRUNCATE)
6413	Op1 = Op1.getOperand(i: `0`);
6414
6415	if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6416	Op0.getOperand(i: `1`) == Op1.getOperand(i: `1`) && CC == ISD::SETEQ &&
6417	isa<ConstantSDNode>(Val: Op0.getOperand(i: `1`))) {
6418
6419	unsigned Bits = Op0.getValueSizeInBits();
6420	if (b != Bits/`8`-`1`)
6421	return false;
6422	if (Op0.getConstantOperandVal(i: `1`) != Bits-`8`)
6423	return false;
6424
6425	LHS = Op0.getOperand(i: `0`);
6426	RHS = Op1.getOperand(i: `0`);
6427	return true;
6428	}
6429
6430	// When we have small integers (i16 to be specific), the form present
6431	// post-legalization uses SETULT in the SELECT_CC for the
6432	// higher-order byte, depending on the fact that the
6433	// even-higher-order bytes are known to all be zero, for example:
6434	// select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6435	// (so when the second byte is the same, because all higher-order
6436	// bits from bytes 3 and 4 are known to be zero, the result of the
6437	// xor can be at most 255)
6438	if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6439	isa<ConstantSDNode>(Val: O.getOperand(i: `1`))) {
6440
6441	uint64_t ULim = O.getConstantOperandVal(i: `1`);
6442	if (ULim != (UINT64_C(`1`) << b*`8`))
6443	return false;
6444
6445	// Now we need to make sure that the upper bytes are known to be
6446	// zero.
6447	unsigned Bits = Op0.getValueSizeInBits();
6448	if (!CurDAG->MaskedValueIsZero(
6449	Op: Op0, Mask: APInt::getHighBitsSet(numBits: Bits, hiBitsSet: Bits - (b + `1`) * `8`)))
6450	return false;
6451
6452	LHS = Op0.getOperand(i: `0`);
6453	RHS = Op0.getOperand(i: `1`);
6454	return true;
6455	}
6456
6457	return false;
6458	}
6459
6460	if (CC != ISD::SETEQ)
6461	return false;
6462
6463	SDValue Op = O.getOperand(i: `0`);
6464	if (Op.getOpcode() == ISD::AND) {
6465	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
6466	return false;
6467	if (Op.getConstantOperandVal(i: `1`) != (UINT64_C(`0xFF`) << (`8`*b)))
6468	return false;
6469
6470	SDValue XOR = Op.getOperand(i: `0`);
6471	if (XOR.getOpcode() == ISD::TRUNCATE)
6472	XOR = XOR.getOperand(i: `0`);
6473	if (XOR.getOpcode() != ISD::XOR)
6474	return false;
6475
6476	LHS = XOR.getOperand(i: `0`);
6477	RHS = XOR.getOperand(i: `1`);
6478	return true;
6479	} else if (Op.getOpcode() == ISD::SRL) {
6480	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `1`)))
6481	return false;
6482	unsigned Bits = Op.getValueSizeInBits();
6483	if (b != Bits/`8`-`1`)
6484	return false;
6485	if (Op.getConstantOperandVal(i: `1`) != Bits-`8`)
6486	return false;
6487
6488	SDValue XOR = Op.getOperand(i: `0`);
6489	if (XOR.getOpcode() == ISD::TRUNCATE)
6490	XOR = XOR.getOperand(i: `0`);
6491	if (XOR.getOpcode() != ISD::XOR)
6492	return false;
6493
6494	LHS = XOR.getOperand(i: `0`);
6495	RHS = XOR.getOperand(i: `1`);
6496	return true;
6497	}
6498
6499	return false;
6500	};
6501
6502	SmallVector<SDValue, `8`> Queue(`1`, SDValue (N, `0`));
6503	while (!Queue.empty()) {
6504	SDValue V = Queue.pop_back_val();
6505
6506	for (const SDValue &O : V.getNode()->ops()) {
6507	unsigned b = `0`;
6508	uint64_t M = `0`, A = `0`;
6509	SDValue OLHS, ORHS;
6510	if (O.getOpcode() == ISD::OR) {
6511	Queue.push_back(Elt: O);
6512	} else if (IsByteSelectCC (O, b, M, A, OLHS, ORHS)) {
6513	if (!LHS) {
6514	LHS = OLHS;
6515	RHS = ORHS;
6516	BytesFound[b] = true;
6517	Mask \|= M;
6518	Alt \|= A;
6519	} else if ((LHS == ORHS && RHS == OLHS) \|\|
6520	(RHS == ORHS && LHS == OLHS)) {
6521	BytesFound[b] = true;
6522	Mask \|= M;
6523	Alt \|= A;
6524	} else {
6525	return Res;
6526	}
6527	} else {
6528	return Res;
6529	}
6530	}
6531	}
6532
6533	unsigned LastB = `0`, BCnt = `0`;
6534	for (unsigned i = `0`; i < `8`; ++i)
6535	if (BytesFound[LastB]) {
6536	++BCnt;
6537	LastB = i;
6538	}
6539
6540	if (!LastB \|\| BCnt < `2`)
6541	return Res;
6542
6543	// Because we'll be zero-extending the output anyway if don't have a specific
6544	// value for each input byte (via the Mask), we can 'anyext' the inputs.
6545	if (LHS.getValueType() != VT) {
6546	LHS = CurDAG->getAnyExtOrTrunc(Op: LHS, DL: dl, VT);
6547	RHS = CurDAG->getAnyExtOrTrunc(Op: RHS, DL: dl, VT);
6548	}
6549
6550	Res = CurDAG->getNode(Opcode: PPCISD::CMPB, DL: dl, VT, N1: LHS, N2: RHS);
6551
6552	bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-`1`);
6553	if (NonTrivialMask && !Alt) {
6554	// Res = Mask & CMPB
6555	Res = CurDAG->getNode(Opcode: ISD::AND, DL: dl, VT, N1: Res,
6556	N2: CurDAG->getConstant(Val: Mask, DL: dl, VT));
6557	} else if (Alt) {
6558	// Res = (CMPB & Mask) \| (~CMPB & Alt)
6559	// Which, as suggested here:
6560	// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6561	// can be written as:
6562	// Res = Alt ^ ((Alt ^ Mask) & CMPB)
6563	// useful because the (Alt ^ Mask) can be pre-computed.
6564	Res = CurDAG->getNode(Opcode: ISD::AND, DL: dl, VT, N1: Res,
6565	N2: CurDAG->getConstant(Val: Mask ^ Alt, DL: dl, VT));
6566	Res = CurDAG->getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Res,
6567	N2: CurDAG->getConstant(Val: Alt, DL: dl, VT));
6568	}
6569
6570	return Res;
6571	}
6572
6573	// When CR bit registers are enabled, an extension of an i1 variable to a i32
6574	// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6575	// involves constant materialization of a 0 or a 1 or both. If the result of
6576	// the extension is then operated upon by some operator that can be constant
6577	// folded with a constant 0 or 1, and that constant can be materialized using
6578	// only one instruction (like a zero or one), then we should fold in those
6579	// operations with the select.
6580	void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6581	if (!Subtarget->useCRBits())
6582	return;
6583
6584	if (N->getOpcode() != ISD::ZERO_EXTEND &&
6585	N->getOpcode() != ISD::SIGN_EXTEND &&
6586	N->getOpcode() != ISD::ANY_EXTEND)
6587	return;
6588
6589	if (N->getOperand(`0`).getValueType() != MVT::i1)
6590	return;
6591
6592	if (!N->hasOneUse())
6593	return;
6594
6595	SDLoc dl(N);
6596	EVT VT = N->getValueType(ResNo: `0`);
6597	SDValue Cond = N->getOperand(Num: `0`);
6598	SDValue ConstTrue =
6599	CurDAG->getConstant(Val: N->getOpcode() == ISD::SIGN_EXTEND ? -`1` : `1`, DL: dl, VT);
6600	SDValue ConstFalse = CurDAG->getConstant(Val: `0`, DL: dl, VT);
6601
6602	do {
6603	SDNode User = N->use_begin();
6604	if (User->getNumOperands() != `2`)
6605	break;
6606
6607	auto TryFold = [this, N, User, dl](SDValue Val) {
6608	SDValue UserO0 = User->getOperand(Num: `0`), UserO1 = User->getOperand(Num: `1`);
6609	SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6610	SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6611
6612	return CurDAG->FoldConstantArithmetic(Opcode: User->getOpcode(), DL: dl,
6613	VT: User->getValueType(ResNo: `0`), Ops: {O0, O1});
6614	};
6615
6616	// FIXME: When the semantics of the interaction between select and undef
6617	// are clearly defined, it may turn out to be unnecessary to break here.
6618	SDValue TrueRes = TryFold (ConstTrue);
6619	if (!TrueRes \|\| TrueRes.isUndef())
6620	break;
6621	SDValue FalseRes = TryFold (ConstFalse);
6622	if (!FalseRes \|\| FalseRes.isUndef())
6623	break;
6624
6625	// For us to materialize these using one instruction, we must be able to
6626	// represent them as signed 16-bit integers.
6627	uint64_t True = TrueRes ->getAsZExtVal(), False = FalseRes ->getAsZExtVal();
6628	if (!isInt<`16`>(x: True) \|\| !isInt<`16`>(x: False))
6629	break;
6630
6631	// We can replace User with a new SELECT node, and try again to see if we
6632	// can fold the select with its user.
6633	Res = CurDAG->getSelect(DL: dl, VT: User->getValueType(ResNo: `0`), Cond, LHS: TrueRes, RHS: FalseRes);
6634	N = User;
6635	ConstTrue = TrueRes;
6636	ConstFalse = FalseRes;
6637	} while (N->hasOneUse());
6638	}
6639
6640	void PPCDAGToDAGISel::PreprocessISelDAG() {
6641	SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6642
6643	bool MadeChange = false;
6644	while (Position != CurDAG->allnodes_begin()) {
6645	SDNode N = &--Position;
6646	if (N->use_empty())
6647	continue;
6648
6649	SDValue Res;
6650	switch (N->getOpcode()) {
6651	default: break;
6652	case ISD::OR:
6653	Res = combineToCMPB(N);
6654	break;
6655	}
6656
6657	if (!Res)
6658	foldBoolExts(Res, N);
6659
6660	if (Res) {
6661	LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6662	LLVM_DEBUG(N->dump(CurDAG));
6663	LLVM_DEBUG(dbgs() << "\nNew: ");
6664	LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6665	LLVM_DEBUG(dbgs() << "\n");
6666
6667	CurDAG->ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: Res);
6668	MadeChange = true;
6669	}
6670	}
6671
6672	if (MadeChange)
6673	CurDAG->RemoveDeadNodes();
6674	}
6675
6676	/// PostprocessISelDAG - Perform some late peephole optimizations
6677	/// on the DAG representation.
6678	void PPCDAGToDAGISel::PostprocessISelDAG() {
6679	// Skip peepholes at -O0.
6680	if (TM.getOptLevel() == CodeGenOptLevel::None)
6681	return;
6682
6683	PeepholePPC64();
6684	PeepholeCROps();
6685	PeepholePPC64ZExt();
6686	}
6687
6688	// Check if all users of this node will become isel where the second operand
6689	// is the constant zero. If this is so, and if we can negate the condition,
6690	// then we can flip the true and false operands. This will allow the zero to
6691	// be folded with the isel so that we don't need to materialize a register
6692	// containing zero.
6693	bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6694	for (const SDNode *User : N->uses()) {
6695	if (!User->isMachineOpcode())
6696	return false;
6697	if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6698	User->getMachineOpcode() != PPC::SELECT_I8)
6699	return false;
6700
6701	SDNode *Op1 = User->getOperand(Num: `1`).getNode();
6702	SDNode *Op2 = User->getOperand(Num: `2`).getNode();
6703	// If we have a degenerate select with two equal operands, swapping will
6704	// not do anything, and we may run into an infinite loop.
6705	if (Op1 == Op2)
6706	return false;
6707
6708	if (!Op2->isMachineOpcode())
6709	return false;
6710
6711	if (Op2->getMachineOpcode() != PPC::LI &&
6712	Op2->getMachineOpcode() != PPC::LI8)
6713	return false;
6714
6715	if (!isNullConstant(V: Op2->getOperand(Num: `0`)))
6716	return false;
6717	}
6718
6719	return true;
6720	}
6721
6722	void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6723	SmallVector<SDNode *, `4`> ToReplace;
6724	for (SDNode *User : N->uses()) {
6725	assert((User->getMachineOpcode() == PPC::SELECT_I4 \|\|
6726	User->getMachineOpcode() == PPC::SELECT_I8) &&
6727	"Must have all select users");
6728	ToReplace.push_back(Elt: User);
6729	}
6730
6731	for (SDNode *User : ToReplace) {
6732	SDNode *ResNode =
6733	CurDAG->getMachineNode(Opcode: User->getMachineOpcode(), dl: SDLoc (User),
6734	VT: User->getValueType(ResNo: `0`), Op1: User->getOperand(Num: `0`),
6735	Op2: User->getOperand(Num: `2`),
6736	Op3: User->getOperand(Num: `1`));
6737
6738	LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6739	LLVM_DEBUG(User->dump(CurDAG));
6740	LLVM_DEBUG(dbgs() << "\nNew: ");
6741	LLVM_DEBUG(ResNode->dump(CurDAG));
6742	LLVM_DEBUG(dbgs() << "\n");
6743
6744	ReplaceUses(F: User, T: ResNode);
6745	}
6746	}
6747
6748	void PPCDAGToDAGISel::PeepholeCROps() {
6749	bool IsModified;
6750	do {
6751	IsModified = false;
6752	for (SDNode &Node : CurDAG->allnodes()) {
6753	MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Val: &Node);
6754	if (!MachineNode \|\| MachineNode->use_empty())
6755	continue;
6756	SDNode *ResNode = MachineNode;
6757
6758	bool Op1Set = false, Op1Unset = false,
6759	Op1Not = false,
6760	Op2Set = false, Op2Unset = false,
6761	Op2Not = false;
6762
6763	unsigned Opcode = MachineNode->getMachineOpcode();
6764	switch (Opcode) {
6765	default: break;
6766	case PPC::CRAND:
6767	case PPC::CRNAND:
6768	case PPC::CROR:
6769	case PPC::CRXOR:
6770	case PPC::CRNOR:
6771	case PPC::CREQV:
6772	case PPC::CRANDC:
6773	case PPC::CRORC: {
6774	SDValue Op = MachineNode->getOperand(Num: `1`);
6775	if (Op.isMachineOpcode()) {
6776	if (Op.getMachineOpcode() == PPC::CRSET)
6777	Op2Set = true;
6778	else if (Op.getMachineOpcode() == PPC::CRUNSET)
6779	Op2Unset = true;
6780	else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6781	Op.getOperand(`0`) == Op.getOperand(`1`)) \|\|
6782	Op.getMachineOpcode() == PPC::CRNOT)
6783	Op2Not = true;
6784	}
6785	[[fallthrough]];
6786	}
6787	case PPC::BC:
6788	case PPC::BCn:
6789	case PPC::SELECT_I4:
6790	case PPC::SELECT_I8:
6791	case PPC::SELECT_F4:
6792	case PPC::SELECT_F8:
6793	case PPC::SELECT_SPE:
6794	case PPC::SELECT_SPE4:
6795	case PPC::SELECT_VRRC:
6796	case PPC::SELECT_VSFRC:
6797	case PPC::SELECT_VSSRC:
6798	case PPC::SELECT_VSRC: {
6799	SDValue Op = MachineNode->getOperand(Num: `0`);
6800	if (Op.isMachineOpcode()) {
6801	if (Op.getMachineOpcode() == PPC::CRSET)
6802	Op1Set = true;
6803	else if (Op.getMachineOpcode() == PPC::CRUNSET)
6804	Op1Unset = true;
6805	else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6806	Op.getOperand(`0`) == Op.getOperand(`1`)) \|\|
6807	Op.getMachineOpcode() == PPC::CRNOT)
6808	Op1Not = true;
6809	}
6810	}
6811	break;
6812	}
6813
6814	bool SelectSwap = false;
6815	switch (Opcode) {
6816	default: break;
6817	case PPC::CRAND:
6818	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
6819	// x & x = x
6820	ResNode = MachineNode->getOperand(Num: `0`).getNode();
6821	else if (Op1Set)
6822	// 1 & y = y
6823	ResNode = MachineNode->getOperand(Num: `1`).getNode();
6824	else if (Op2Set)
6825	// x & 1 = x
6826	ResNode = MachineNode->getOperand(Num: `0`).getNode();
6827	else if (Op1Unset \|\| Op2Unset)
6828	// x & 0 = 0 & y = 0
6829	ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6830	MVT::i1);
6831	else if (Op1Not)
6832	// ~x & y = andc(y, x)
6833	ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6834	MVT::i1, MachineNode->getOperand(`1`),
6835	MachineNode->getOperand(`0`).
6836	getOperand(`0`));
6837	else if (Op2Not)
6838	// x & ~y = andc(x, y)
6839	ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6840	MVT::i1, MachineNode->getOperand(`0`),
6841	MachineNode->getOperand(`1`).
6842	getOperand(`0`));
6843	else if (AllUsersSelectZero(N: MachineNode)) {
6844	ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6845	MVT::i1, MachineNode->getOperand(`0`),
6846	MachineNode->getOperand(`1`));
6847	SelectSwap = true;
6848	}
6849	break;
6850	case PPC::CRNAND:
6851	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
6852	// nand(x, x) -> nor(x, x)
6853	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6854	MVT::i1, MachineNode->getOperand(`0`),
6855	MachineNode->getOperand(`0`));
6856	else if (Op1Set)
6857	// nand(1, y) -> nor(y, y)
6858	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6859	MVT::i1, MachineNode->getOperand(`1`),
6860	MachineNode->getOperand(`1`));
6861	else if (Op2Set)
6862	// nand(x, 1) -> nor(x, x)
6863	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6864	MVT::i1, MachineNode->getOperand(`0`),
6865	MachineNode->getOperand(`0`));
6866	else if (Op1Unset \|\| Op2Unset)
6867	// nand(x, 0) = nand(0, y) = 1
6868	ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6869	MVT::i1);
6870	else if (Op1Not)
6871	// nand(~x, y) = ~(~x & y) = x \| ~y = orc(x, y)
6872	ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6873	MVT::i1, MachineNode->getOperand(`0`).
6874	getOperand(`0`),
6875	MachineNode->getOperand(`1`));
6876	else if (Op2Not)
6877	// nand(x, ~y) = ~x \| y = orc(y, x)
6878	ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6879	MVT::i1, MachineNode->getOperand(`1`).
6880	getOperand(`0`),
6881	MachineNode->getOperand(`0`));
6882	else if (AllUsersSelectZero(N: MachineNode)) {
6883	ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6884	MVT::i1, MachineNode->getOperand(`0`),
6885	MachineNode->getOperand(`1`));
6886	SelectSwap = true;
6887	}
6888	break;
6889	case PPC::CROR:
6890	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
6891	// x \| x = x
6892	ResNode = MachineNode->getOperand(Num: `0`).getNode();
6893	else if (Op1Set \|\| Op2Set)
6894	// x \| 1 = 1 \| y = 1
6895	ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6896	MVT::i1);
6897	else if (Op1Unset)
6898	// 0 \| y = y
6899	ResNode = MachineNode->getOperand(Num: `1`).getNode();
6900	else if (Op2Unset)
6901	// x \| 0 = x
6902	ResNode = MachineNode->getOperand(Num: `0`).getNode();
6903	else if (Op1Not)
6904	// ~x \| y = orc(y, x)
6905	ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6906	MVT::i1, MachineNode->getOperand(`1`),
6907	MachineNode->getOperand(`0`).
6908	getOperand(`0`));
6909	else if (Op2Not)
6910	// x \| ~y = orc(x, y)
6911	ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6912	MVT::i1, MachineNode->getOperand(`0`),
6913	MachineNode->getOperand(`1`).
6914	getOperand(`0`));
6915	else if (AllUsersSelectZero(N: MachineNode)) {
6916	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6917	MVT::i1, MachineNode->getOperand(`0`),
6918	MachineNode->getOperand(`1`));
6919	SelectSwap = true;
6920	}
6921	break;
6922	case PPC::CRXOR:
6923	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
6924	// xor(x, x) = 0
6925	ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6926	MVT::i1);
6927	else if (Op1Set)
6928	// xor(1, y) -> nor(y, y)
6929	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6930	MVT::i1, MachineNode->getOperand(`1`),
6931	MachineNode->getOperand(`1`));
6932	else if (Op2Set)
6933	// xor(x, 1) -> nor(x, x)
6934	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6935	MVT::i1, MachineNode->getOperand(`0`),
6936	MachineNode->getOperand(`0`));
6937	else if (Op1Unset)
6938	// xor(0, y) = y
6939	ResNode = MachineNode->getOperand(Num: `1`).getNode();
6940	else if (Op2Unset)
6941	// xor(x, 0) = x
6942	ResNode = MachineNode->getOperand(Num: `0`).getNode();
6943	else if (Op1Not)
6944	// xor(~x, y) = eqv(x, y)
6945	ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6946	MVT::i1, MachineNode->getOperand(`0`).
6947	getOperand(`0`),
6948	MachineNode->getOperand(`1`));
6949	else if (Op2Not)
6950	// xor(x, ~y) = eqv(x, y)
6951	ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6952	MVT::i1, MachineNode->getOperand(`0`),
6953	MachineNode->getOperand(`1`).
6954	getOperand(`0`));
6955	else if (AllUsersSelectZero(N: MachineNode)) {
6956	ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6957	MVT::i1, MachineNode->getOperand(`0`),
6958	MachineNode->getOperand(`1`));
6959	SelectSwap = true;
6960	}
6961	break;
6962	case PPC::CRNOR:
6963	if (Op1Set \|\| Op2Set)
6964	// nor(1, y) -> 0
6965	ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6966	MVT::i1);
6967	else if (Op1Unset)
6968	// nor(0, y) = ~y -> nor(y, y)
6969	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6970	MVT::i1, MachineNode->getOperand(`1`),
6971	MachineNode->getOperand(`1`));
6972	else if (Op2Unset)
6973	// nor(x, 0) = ~x
6974	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6975	MVT::i1, MachineNode->getOperand(`0`),
6976	MachineNode->getOperand(`0`));
6977	else if (Op1Not)
6978	// nor(~x, y) = andc(x, y)
6979	ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6980	MVT::i1, MachineNode->getOperand(`0`).
6981	getOperand(`0`),
6982	MachineNode->getOperand(`1`));
6983	else if (Op2Not)
6984	// nor(x, ~y) = andc(y, x)
6985	ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6986	MVT::i1, MachineNode->getOperand(`1`).
6987	getOperand(`0`),
6988	MachineNode->getOperand(`0`));
6989	else if (AllUsersSelectZero(N: MachineNode)) {
6990	ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
6991	MVT::i1, MachineNode->getOperand(`0`),
6992	MachineNode->getOperand(`1`));
6993	SelectSwap = true;
6994	}
6995	break;
6996	case PPC::CREQV:
6997	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
6998	// eqv(x, x) = 1
6999	ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7000	MVT::i1);
7001	else if (Op1Set)
7002	// eqv(1, y) = y
7003	ResNode = MachineNode->getOperand(Num: `1`).getNode();
7004	else if (Op2Set)
7005	// eqv(x, 1) = x
7006	ResNode = MachineNode->getOperand(Num: `0`).getNode();
7007	else if (Op1Unset)
7008	// eqv(0, y) = ~y -> nor(y, y)
7009	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7010	MVT::i1, MachineNode->getOperand(`1`),
7011	MachineNode->getOperand(`1`));
7012	else if (Op2Unset)
7013	// eqv(x, 0) = ~x
7014	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7015	MVT::i1, MachineNode->getOperand(`0`),
7016	MachineNode->getOperand(`0`));
7017	else if (Op1Not)
7018	// eqv(~x, y) = xor(x, y)
7019	ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7020	MVT::i1, MachineNode->getOperand(`0`).
7021	getOperand(`0`),
7022	MachineNode->getOperand(`1`));
7023	else if (Op2Not)
7024	// eqv(x, ~y) = xor(x, y)
7025	ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7026	MVT::i1, MachineNode->getOperand(`0`),
7027	MachineNode->getOperand(`1`).
7028	getOperand(`0`));
7029	else if (AllUsersSelectZero(N: MachineNode)) {
7030	ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7031	MVT::i1, MachineNode->getOperand(`0`),
7032	MachineNode->getOperand(`1`));
7033	SelectSwap = true;
7034	}
7035	break;
7036	case PPC::CRANDC:
7037	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
7038	// andc(x, x) = 0
7039	ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7040	MVT::i1);
7041	else if (Op1Set)
7042	// andc(1, y) = ~y
7043	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7044	MVT::i1, MachineNode->getOperand(`1`),
7045	MachineNode->getOperand(`1`));
7046	else if (Op1Unset \|\| Op2Set)
7047	// andc(0, y) = andc(x, 1) = 0
7048	ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7049	MVT::i1);
7050	else if (Op2Unset)
7051	// andc(x, 0) = x
7052	ResNode = MachineNode->getOperand(Num: `0`).getNode();
7053	else if (Op1Not)
7054	// andc(~x, y) = ~(x \| y) = nor(x, y)
7055	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7056	MVT::i1, MachineNode->getOperand(`0`).
7057	getOperand(`0`),
7058	MachineNode->getOperand(`1`));
7059	else if (Op2Not)
7060	// andc(x, ~y) = x & y
7061	ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7062	MVT::i1, MachineNode->getOperand(`0`),
7063	MachineNode->getOperand(`1`).
7064	getOperand(`0`));
7065	else if (AllUsersSelectZero(N: MachineNode)) {
7066	ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7067	MVT::i1, MachineNode->getOperand(`1`),
7068	MachineNode->getOperand(`0`));
7069	SelectSwap = true;
7070	}
7071	break;
7072	case PPC::CRORC:
7073	if (MachineNode->getOperand(Num: `0`) == MachineNode->getOperand(Num: `1`))
7074	// orc(x, x) = 1
7075	ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7076	MVT::i1);
7077	else if (Op1Set \|\| Op2Unset)
7078	// orc(1, y) = orc(x, 0) = 1
7079	ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7080	MVT::i1);
7081	else if (Op2Set)
7082	// orc(x, 1) = x
7083	ResNode = MachineNode->getOperand(Num: `0`).getNode();
7084	else if (Op1Unset)
7085	// orc(0, y) = ~y
7086	ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7087	MVT::i1, MachineNode->getOperand(`1`),
7088	MachineNode->getOperand(`1`));
7089	else if (Op1Not)
7090	// orc(~x, y) = ~(x & y) = nand(x, y)
7091	ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7092	MVT::i1, MachineNode->getOperand(`0`).
7093	getOperand(`0`),
7094	MachineNode->getOperand(`1`));
7095	else if (Op2Not)
7096	// orc(x, ~y) = x \| y
7097	ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7098	MVT::i1, MachineNode->getOperand(`0`),
7099	MachineNode->getOperand(`1`).
7100	getOperand(`0`));
7101	else if (AllUsersSelectZero(N: MachineNode)) {
7102	ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7103	MVT::i1, MachineNode->getOperand(`1`),
7104	MachineNode->getOperand(`0`));
7105	SelectSwap = true;
7106	}
7107	break;
7108	case PPC::SELECT_I4:
7109	case PPC::SELECT_I8:
7110	case PPC::SELECT_F4:
7111	case PPC::SELECT_F8:
7112	case PPC::SELECT_SPE:
7113	case PPC::SELECT_SPE4:
7114	case PPC::SELECT_VRRC:
7115	case PPC::SELECT_VSFRC:
7116	case PPC::SELECT_VSSRC:
7117	case PPC::SELECT_VSRC:
7118	if (Op1Set)
7119	ResNode = MachineNode->getOperand(Num: `1`).getNode();
7120	else if (Op1Unset)
7121	ResNode = MachineNode->getOperand(Num: `2`).getNode();
7122	else if (Op1Not)
7123	ResNode = CurDAG->getMachineNode(Opcode: MachineNode->getMachineOpcode(),
7124	dl: SDLoc (MachineNode),
7125	VT: MachineNode->getValueType(ResNo: `0`),
7126	Op1: MachineNode->getOperand(Num: `0`).
7127	getOperand(i: `0`),
7128	Op2: MachineNode->getOperand(Num: `2`),
7129	Op3: MachineNode->getOperand(Num: `1`));
7130	break;
7131	case PPC::BC:
7132	case PPC::BCn:
7133	if (Op1Not)
7134	ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7135	PPC::BC,
7136	SDLoc(MachineNode),
7137	MVT::Other,
7138	MachineNode->getOperand(`0`).
7139	getOperand(`0`),
7140	MachineNode->getOperand(`1`),
7141	MachineNode->getOperand(`2`));
7142	// FIXME: Handle Op1Set, Op1Unset here too.
7143	break;
7144	}
7145
7146	// If we're inverting this node because it is used only by selects that
7147	// we'd like to swap, then swap the selects before the node replacement.
7148	if (SelectSwap)
7149	SwapAllSelectUsers(N: MachineNode);
7150
7151	if (ResNode != MachineNode) {
7152	LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7153	LLVM_DEBUG(MachineNode->dump(CurDAG));
7154	LLVM_DEBUG(dbgs() << "\nNew: ");
7155	LLVM_DEBUG(ResNode->dump(CurDAG));
7156	LLVM_DEBUG(dbgs() << "\n");
7157
7158	ReplaceUses(F: MachineNode, T: ResNode);
7159	IsModified = true;
7160	}
7161	}
7162	if (IsModified)
7163	CurDAG->RemoveDeadNodes();
7164	} while (IsModified);
7165	}
7166
7167	// Gather the set of 32-bit operations that are known to have their
7168	// higher-order 32 bits zero, where ToPromote contains all such operations.
7169	static bool PeepholePPC64ZExtGather(SDValue Op32,
7170	SmallPtrSetImpl<SDNode *> &ToPromote) {
7171	if (!Op32.isMachineOpcode())
7172	return false;
7173
7174	// First, check for the "frontier" instructions (those that will clear the
7175	// higher-order 32 bits.
7176
7177	// For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7178	// around. If it does not, then these instructions will clear the
7179	// higher-order bits.
7180	if ((Op32.getMachineOpcode() == PPC::RLWINM \|\|
7181	Op32.getMachineOpcode() == PPC::RLWNM) &&
7182	Op32.getConstantOperandVal(`2`) <= Op32.getConstantOperandVal(`3`)) {
7183	ToPromote.insert(Ptr: Op32.getNode());
7184	return true;
7185	}
7186
7187	// SLW and SRW always clear the higher-order bits.
7188	if (Op32.getMachineOpcode() == PPC::SLW \|\|
7189	Op32.getMachineOpcode() == PPC::SRW) {
7190	ToPromote.insert(Ptr: Op32.getNode());
7191	return true;
7192	}
7193
7194	// For LI and LIS, we need the immediate to be positive (so that it is not
7195	// sign extended).
7196	if (Op32.getMachineOpcode() == PPC::LI \|\|
7197	Op32.getMachineOpcode() == PPC::LIS) {
7198	if (!isUInt<`15`>(x: Op32.getConstantOperandVal(i: `0`)))
7199	return false;
7200
7201	ToPromote.insert(Ptr: Op32.getNode());
7202	return true;
7203	}
7204
7205	// LHBRX and LWBRX always clear the higher-order bits.
7206	if (Op32.getMachineOpcode() == PPC::LHBRX \|\|
7207	Op32.getMachineOpcode() == PPC::LWBRX) {
7208	ToPromote.insert(Ptr: Op32.getNode());
7209	return true;
7210	}
7211
7212	// CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7213	if (Op32.getMachineOpcode() == PPC::CNTLZW \|\|
7214	Op32.getMachineOpcode() == PPC::CNTTZW) {
7215	ToPromote.insert(Ptr: Op32.getNode());
7216	return true;
7217	}
7218
7219	// Next, check for those instructions we can look through.
7220
7221	// Assuming the mask does not wrap around, then the higher-order bits are
7222	// taken directly from the first operand.
7223	if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7224	Op32.getConstantOperandVal(`3`) <= Op32.getConstantOperandVal(`4`)) {
7225	SmallPtrSet<SDNode *, `16`> ToPromote1;
7226	if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: `0`), ToPromote&: ToPromote1))
7227	return false;
7228
7229	ToPromote.insert(Ptr: Op32.getNode());
7230	ToPromote.insert(I: ToPromote1.begin(), E: ToPromote1.end());
7231	return true;
7232	}
7233
7234	// For OR, the higher-order bits are zero if that is true for both operands.
7235	// For SELECT_I4, the same is true (but the relevant operand numbers are
7236	// shifted by 1).
7237	if (Op32.getMachineOpcode() == PPC::OR \|\|
7238	Op32.getMachineOpcode() == PPC::SELECT_I4) {
7239	unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? `1` : `0`;
7240	SmallPtrSet<SDNode *, `16`> ToPromote1;
7241	if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: B+`0`), ToPromote&: ToPromote1))
7242	return false;
7243	if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: B+`1`), ToPromote&: ToPromote1))
7244	return false;
7245
7246	ToPromote.insert(Ptr: Op32.getNode());
7247	ToPromote.insert(I: ToPromote1.begin(), E: ToPromote1.end());
7248	return true;
7249	}
7250
7251	// For ORI and ORIS, we need the higher-order bits of the first operand to be
7252	// zero, and also for the constant to be positive (so that it is not sign
7253	// extended).
7254	if (Op32.getMachineOpcode() == PPC::ORI \|\|
7255	Op32.getMachineOpcode() == PPC::ORIS) {
7256	SmallPtrSet<SDNode *, `16`> ToPromote1;
7257	if (!PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: `0`), ToPromote&: ToPromote1))
7258	return false;
7259	if (!isUInt<`15`>(x: Op32.getConstantOperandVal(i: `1`)))
7260	return false;
7261
7262	ToPromote.insert(Ptr: Op32.getNode());
7263	ToPromote.insert(I: ToPromote1.begin(), E: ToPromote1.end());
7264	return true;
7265	}
7266
7267	// The higher-order bits of AND are zero if that is true for at least one of
7268	// the operands.
7269	if (Op32.getMachineOpcode() == PPC::AND) {
7270	SmallPtrSet<SDNode *, `16`> ToPromote1, ToPromote2;
7271	bool Op0OK =
7272	PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: `0`), ToPromote&: ToPromote1);
7273	bool Op1OK =
7274	PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: `1`), ToPromote&: ToPromote2);
7275	if (!Op0OK && !Op1OK)
7276	return false;
7277
7278	ToPromote.insert(Ptr: Op32.getNode());
7279
7280	if (Op0OK)
7281	ToPromote.insert(I: ToPromote1.begin(), E: ToPromote1.end());
7282
7283	if (Op1OK)
7284	ToPromote.insert(I: ToPromote2.begin(), E: ToPromote2.end());
7285
7286	return true;
7287	}
7288
7289	// For ANDI and ANDIS, the higher-order bits are zero if either that is true
7290	// of the first operand, or if the second operand is positive (so that it is
7291	// not sign extended).
7292	if (Op32.getMachineOpcode() == PPC::ANDI_rec \|\|
7293	Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7294	SmallPtrSet<SDNode *, `16`> ToPromote1;
7295	bool Op0OK =
7296	PeepholePPC64ZExtGather(Op32: Op32.getOperand(i: `0`), ToPromote&: ToPromote1);
7297	bool Op1OK = isUInt<`15`>(x: Op32.getConstantOperandVal(i: `1`));
7298	if (!Op0OK && !Op1OK)
7299	return false;
7300
7301	ToPromote.insert(Ptr: Op32.getNode());
7302
7303	if (Op0OK)
7304	ToPromote.insert(I: ToPromote1.begin(), E: ToPromote1.end());
7305
7306	return true;
7307	}
7308
7309	return false;
7310	}
7311
7312	void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7313	if (!Subtarget->isPPC64())
7314	return;
7315
7316	// When we zero-extend from i32 to i64, we use a pattern like this:
7317	// def : Pat<(i64 (zext i32:$in)),
7318	// (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7319	// 0, 32)>;
7320	// There are several 32-bit shift/rotate instructions, however, that will
7321	// clear the higher-order bits of their output, rendering the RLDICL
7322	// unnecessary. When that happens, we remove it here, and redefine the
7323	// relevant 32-bit operation to be a 64-bit operation.
7324
7325	SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7326
7327	bool MadeChange = false;
7328	while (Position != CurDAG->allnodes_begin()) {
7329	SDNode N = &--Position;
7330	// Skip dead nodes and any non-machine opcodes.
7331	if (N->use_empty() \|\| !N->isMachineOpcode())
7332	continue;
7333
7334	if (N->getMachineOpcode() != PPC::RLDICL)
7335	continue;
7336
7337	if (N->getConstantOperandVal(Num: `1`) != `0` \|\|
7338	N->getConstantOperandVal(Num: `2`) != `32`)
7339	continue;
7340
7341	SDValue ISR = N->getOperand(Num: `0`);
7342	if (!ISR.isMachineOpcode() \|\|
7343	ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7344	continue;
7345
7346	if (!ISR.hasOneUse())
7347	continue;
7348
7349	if (ISR.getConstantOperandVal(`2`) != PPC::sub_32)
7350	continue;
7351
7352	SDValue IDef = ISR.getOperand(i: `0`);
7353	if (!IDef.isMachineOpcode() \|\|
7354	IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7355	continue;
7356
7357	// We now know that we're looking at a canonical i32 -> i64 zext. See if we
7358	// can get rid of it.
7359
7360	SDValue Op32 = ISR ->getOperand(Num: `1`);
7361	if (!Op32.isMachineOpcode())
7362	continue;
7363
7364	// There are some 32-bit instructions that always clear the high-order 32
7365	// bits, there are also some instructions (like AND) that we can look
7366	// through.
7367	SmallPtrSet<SDNode *, `16`> ToPromote;
7368	if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7369	continue;
7370
7371	// If the ToPromote set contains nodes that have uses outside of the set
7372	// (except for the original INSERT_SUBREG), then abort the transformation.
7373	bool OutsideUse = false;
7374	for (SDNode *PN : ToPromote) {
7375	for (SDNode *UN : PN->uses()) {
7376	if (!ToPromote.count(Ptr: UN) && UN != ISR.getNode()) {
7377	OutsideUse = true;
7378	break;
7379	}
7380	}
7381
7382	if (OutsideUse)
7383	break;
7384	}
7385	if (OutsideUse)
7386	continue;
7387
7388	MadeChange = true;
7389
7390	// We now know that this zero extension can be removed by promoting to
7391	// nodes in ToPromote to 64-bit operations, where for operations in the
7392	// frontier of the set, we need to insert INSERT_SUBREGs for their
7393	// operands.
7394	for (SDNode *PN : ToPromote) {
7395	unsigned NewOpcode;
7396	switch (PN->getMachineOpcode()) {
7397	default:
7398	llvm_unreachable("Don't know the 64-bit variant of this instruction");
7399	case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7400	case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7401	case PPC::SLW: NewOpcode = PPC::SLW8; break;
7402	case PPC::SRW: NewOpcode = PPC::SRW8; break;
7403	case PPC::LI: NewOpcode = PPC::LI8; break;
7404	case PPC::LIS: NewOpcode = PPC::LIS8; break;
7405	case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7406	case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7407	case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7408	case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7409	case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7410	case PPC::OR: NewOpcode = PPC::OR8; break;
7411	case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7412	case PPC::ORI: NewOpcode = PPC::ORI8; break;
7413	case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7414	case PPC::AND: NewOpcode = PPC::AND8; break;
7415	case PPC::ANDI_rec:
7416	NewOpcode = PPC::ANDI8_rec;
7417	break;
7418	case PPC::ANDIS_rec:
7419	NewOpcode = PPC::ANDIS8_rec;
7420	break;
7421	}
7422
7423	// Note: During the replacement process, the nodes will be in an
7424	// inconsistent state (some instructions will have operands with values
7425	// of the wrong type). Once done, however, everything should be right
7426	// again.
7427
7428	SmallVector<SDValue, `4`> Ops;
7429	for (const SDValue &V : PN->ops()) {
7430	if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7431	!isa<ConstantSDNode>(V)) {
7432	SDValue ReplOpOps[] = { ISR.getOperand(i: `0`), V, ISR.getOperand(i: `2`) };
7433	SDNode *ReplOp =
7434	CurDAG->getMachineNode(Opcode: TargetOpcode::INSERT_SUBREG, dl: SDLoc (V),
7435	VTs: ISR.getNode()->getVTList(), Ops: ReplOpOps);
7436	Ops.push_back(Elt: SDValue (ReplOp, `0`));
7437	} else {
7438	Ops.push_back(Elt: V);
7439	}
7440	}
7441
7442	// Because all to-be-promoted nodes only have users that are other
7443	// promoted nodes (or the original INSERT_SUBREG), we can safely replace
7444	// the i32 result value type with i64.
7445
7446	SmallVector<EVT, `2`> NewVTs;
7447	SDVTList VTs = PN->getVTList();
7448	for (unsigned i = `0`, ie = VTs.NumVTs; i != ie; ++i)
7449	if (VTs.VTs[i] == MVT::i32)
7450	NewVTs.push_back(MVT::i64);
7451	else
7452	NewVTs.push_back(Elt: VTs.VTs[i]);
7453
7454	LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7455	LLVM_DEBUG(PN->dump(CurDAG));
7456
7457	CurDAG->SelectNodeTo(N: PN, MachineOpc: NewOpcode, VTs: CurDAG->getVTList(VTs: NewVTs), Ops);
7458
7459	LLVM_DEBUG(dbgs() << "\nNew: ");
7460	LLVM_DEBUG(PN->dump(CurDAG));
7461	LLVM_DEBUG(dbgs() << "\n");
7462	}
7463
7464	// Now we replace the original zero extend and its associated INSERT_SUBREG
7465	// with the value feeding the INSERT_SUBREG (which has now been promoted to
7466	// return an i64).
7467
7468	LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7469	LLVM_DEBUG(N->dump(CurDAG));
7470	LLVM_DEBUG(dbgs() << "\nNew: ");
7471	LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7472	LLVM_DEBUG(dbgs() << "\n");
7473
7474	ReplaceUses(F: N, T: Op32.getNode());
7475	}
7476
7477	if (MadeChange)
7478	CurDAG->RemoveDeadNodes();
7479	}
7480
7481	static bool isVSXSwap(SDValue N) {
7482	if (!N ->isMachineOpcode())
7483	return false;
7484	unsigned Opc = N ->getMachineOpcode();
7485
7486	// Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7487	// operand is 2.
7488	if (Opc == PPC::XXPERMDIs) {
7489	return isa<ConstantSDNode>(Val: N ->getOperand(Num: `1`)) &&
7490	N ->getConstantOperandVal(Num: `1`) == `2`;
7491	} else if (Opc == PPC::XXPERMDI \|\| Opc == PPC::XXSLDWI) {
7492	return N ->getOperand(Num: `0`) == N ->getOperand(Num: `1`) &&
7493	isa<ConstantSDNode>(Val: N ->getOperand(Num: `2`)) &&
7494	N ->getConstantOperandVal(Num: `2`) == `2`;
7495	}
7496
7497	return false;
7498	}
7499
7500	// TODO: Make this complete and replace with a table-gen bit.
7501	static bool isLaneInsensitive(SDValue N) {
7502	if (!N ->isMachineOpcode())
7503	return false;
7504	unsigned Opc = N ->getMachineOpcode();
7505
7506	switch (Opc) {
7507	default:
7508	return false;
7509	case PPC::VAVGSB:
7510	case PPC::VAVGUB:
7511	case PPC::VAVGSH:
7512	case PPC::VAVGUH:
7513	case PPC::VAVGSW:
7514	case PPC::VAVGUW:
7515	case PPC::VMAXFP:
7516	case PPC::VMAXSB:
7517	case PPC::VMAXUB:
7518	case PPC::VMAXSH:
7519	case PPC::VMAXUH:
7520	case PPC::VMAXSW:
7521	case PPC::VMAXUW:
7522	case PPC::VMINFP:
7523	case PPC::VMINSB:
7524	case PPC::VMINUB:
7525	case PPC::VMINSH:
7526	case PPC::VMINUH:
7527	case PPC::VMINSW:
7528	case PPC::VMINUW:
7529	case PPC::VADDFP:
7530	case PPC::VADDUBM:
7531	case PPC::VADDUHM:
7532	case PPC::VADDUWM:
7533	case PPC::VSUBFP:
7534	case PPC::VSUBUBM:
7535	case PPC::VSUBUHM:
7536	case PPC::VSUBUWM:
7537	case PPC::VAND:
7538	case PPC::VANDC:
7539	case PPC::VOR:
7540	case PPC::VORC:
7541	case PPC::VXOR:
7542	case PPC::VNOR:
7543	case PPC::VMULUWM:
7544	return true;
7545	}
7546	}
7547
7548	// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7549	// lane-insensitive.
7550	static void reduceVSXSwap(SDNode N, SelectionDAG DAG) {
7551	// Our desired xxswap might be source of COPY_TO_REGCLASS.
7552	// TODO: Can we put this a common method for DAG?
7553	auto SkipRCCopy = [](SDValue V) {
7554	while (V ->isMachineOpcode() &&
7555	V ->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7556	// All values in the chain should have single use.
7557	if (V ->use_empty() \|\| !V ->use_begin()->isOnlyUserOf(N: V.getNode()))
7558	return SDValue ();
7559	V = V ->getOperand(Num: `0`);
7560	}
7561	return V.hasOneUse() ? V : SDValue ();
7562	};
7563
7564	SDValue VecOp = SkipRCCopy (N->getOperand(Num: `0`));
7565	if (!VecOp \|\| !isLaneInsensitive(N: VecOp))
7566	return;
7567
7568	SDValue LHS = SkipRCCopy (VecOp.getOperand(i: `0`)),
7569	RHS = SkipRCCopy (VecOp.getOperand(i: `1`));
7570	if (!LHS \|\| !RHS \|\| !isVSXSwap(N: LHS) \|\| !isVSXSwap(N: RHS))
7571	return;
7572
7573	// These swaps may still have chain-uses here, count on dead code elimination
7574	// in following passes to remove them.
7575	DAG->ReplaceAllUsesOfValueWith(From: LHS, To: LHS.getOperand(i: `0`));
7576	DAG->ReplaceAllUsesOfValueWith(From: RHS, To: RHS.getOperand(i: `0`));
7577	DAG->ReplaceAllUsesOfValueWith(From: SDValue (N, `0`), To: N->getOperand(Num: `0`));
7578	}
7579
7580	// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7581	static bool hasAIXSmallTLSAttr(SDValue Val) {
7582	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val))
7583	if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Val: GA->getGlobal()))
7584	if (GV->hasAttribute(Kind: "aix-small-tls"))
7585	return true;
7586
7587	return false;
7588	}
7589
7590	// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
7591	static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG,
7592	SDValue ADDIToFold) {
7593	// Check if ADDIToFold (the ADDI that we want to fold into local-exec
7594	// accesses), is truly an ADDI.
7595	if (!ADDIToFold.isMachineOpcode() \|\|
7596	(ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7597	return false;
7598
7599	// Folding is only allowed for the AIX small-local-exec TLS target attribute
7600	// or when the 'aix-small-tls' global variable attribute is present.
7601	const PPCSubtarget &Subtarget =
7602	DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7603	SDValue TLSVarNode = ADDIToFold.getOperand(i: `1`);
7604	if (!(Subtarget.hasAIXSmallLocalExecTLS() \|\| hasAIXSmallTLSAttr(Val: TLSVarNode)))
7605	return false;
7606
7607	// The first operand of the ADDIToFold should be the thread pointer.
7608	// This transformation is only performed if the first operand of the
7609	// addi is the thread pointer.
7610	SDValue TPRegNode = ADDIToFold.getOperand(i: `0`);
7611	RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(Val: TPRegNode.getNode());
7612	if (!TPReg \|\| (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7613	return false;
7614
7615	// The second operand of the ADDIToFold should be the global TLS address
7616	// (the local-exec TLS variable). We only perform the folding if the TLS
7617	// variable is the second operand.
7618	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: TLSVarNode);
7619	if (!GA)
7620	return false;
7621
7622	// The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
7623	// so this optimization is not performed otherwise if the flag is not set.
7624	unsigned TargetFlags = GA->getTargetFlags();
7625	if (TargetFlags != PPCII::MO_TPREL_FLAG)
7626	return false;
7627
7628	// If all conditions are satisfied, the ADDI is valid for folding.
7629	return true;
7630	}
7631
7632	// For non-TOC-based local-exec access where an addi is feeding into another
7633	// addi, fold this sequence into a single addi if possible.
7634	// Before this optimization, the sequence appears as:
7635	// addi rN, r13, sym@le
7636	// addi rM, rN, imm
7637	// After this optimization, we can fold the two addi into a single one:
7638	// addi rM, r13, sym@le + imm
7639	static void foldADDIForLocalExecAccesses(SDNode N, SelectionDAG DAG) {
7640	if (N->getMachineOpcode() != PPC::ADDI8)
7641	return;
7642
7643	// InitialADDI is the addi feeding into N (also an addi), and the addi that
7644	// we want optimized out.
7645	SDValue InitialADDI = N->getOperand(Num: `0`);
7646
7647	if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, ADDIToFold: InitialADDI))
7648	return;
7649
7650	// At this point, InitialADDI can be folded into a non-TOC-based local-exec
7651	// access. The first operand of InitialADDI should be the thread pointer,
7652	// which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7653	SDValue TPRegNode = InitialADDI.getOperand(i: `0`);
7654	[[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(Val: TPRegNode.getNode());
7655	[[maybe_unused]] const PPCSubtarget &Subtarget =
7656	DAG->getMachineFunction().getSubtarget<PPCSubtarget>();
7657	assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
7658	"Expecting the first operand to be a thread pointer for folding addi "
7659	"in local-exec accesses!");
7660
7661	// The second operand of the InitialADDI should be the global TLS address
7662	// (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
7663	// This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7664	SDValue TLSVarNode = InitialADDI.getOperand(i: `1`);
7665	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: TLSVarNode);
7666	assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7667	"local-exec accesses!");
7668	unsigned TargetFlags = GA->getTargetFlags();
7669
7670	// The second operand of the addi that we want to preserve will be an
7671	// immediate. We add this immediate, together with the address of the TLS
7672	// variable found in InitialADDI, in order to preserve the correct TLS address
7673	// information during assembly printing. The offset is likely to be non-zero
7674	// when we end up in this case.
7675	int Offset = N->getConstantOperandVal(Num: `1`);
7676	TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7677	Offset, TargetFlags);
7678
7679	(void)DAG->UpdateNodeOperands(N, Op1: TPRegNode, Op2: TLSVarNode);
7680	if (InitialADDI.getNode()->use_empty())
7681	DAG->RemoveDeadNode(N: InitialADDI.getNode());
7682	}
7683
7684	void PPCDAGToDAGISel::PeepholePPC64() {
7685	SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7686
7687	while (Position != CurDAG->allnodes_begin()) {
7688	SDNode N = &--Position;
7689	// Skip dead nodes and any non-machine opcodes.
7690	if (N->use_empty() \|\| !N->isMachineOpcode())
7691	continue;
7692
7693	if (isVSXSwap(N: SDValue (N, `0`)))
7694	reduceVSXSwap(N, DAG: CurDAG);
7695
7696	// This optimization is performed for non-TOC-based local-exec accesses.
7697	foldADDIForLocalExecAccesses(N, DAG: CurDAG);
7698
7699	unsigned FirstOp;
7700	unsigned StorageOpcode = N->getMachineOpcode();
7701	bool RequiresMod4Offset = false;
7702
7703	switch (StorageOpcode) {
7704	default: continue;
7705
7706	case PPC::LWA:
7707	case PPC::LD:
7708	case PPC::DFLOADf64:
7709	case PPC::DFLOADf32:
7710	RequiresMod4Offset = true;
7711	[[fallthrough]];
7712	case PPC::LBZ:
7713	case PPC::LBZ8:
7714	case PPC::LFD:
7715	case PPC::LFS:
7716	case PPC::LHA:
7717	case PPC::LHA8:
7718	case PPC::LHZ:
7719	case PPC::LHZ8:
7720	case PPC::LWZ:
7721	case PPC::LWZ8:
7722	FirstOp = `0`;
7723	break;
7724
7725	case PPC::STD:
7726	case PPC::DFSTOREf64:
7727	case PPC::DFSTOREf32:
7728	RequiresMod4Offset = true;
7729	[[fallthrough]];
7730	case PPC::STB:
7731	case PPC::STB8:
7732	case PPC::STFD:
7733	case PPC::STFS:
7734	case PPC::STH:
7735	case PPC::STH8:
7736	case PPC::STW:
7737	case PPC::STW8:
7738	FirstOp = `1`;
7739	break;
7740	}
7741
7742	// If this is a load or store with a zero offset, or within the alignment,
7743	// we may be able to fold an add-immediate into the memory operation.
7744	// The check against alignment is below, as it can't occur until we check
7745	// the arguments to N
7746	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: FirstOp)))
7747	continue;
7748
7749	SDValue Base = N->getOperand(Num: FirstOp + `1`);
7750	if (!Base.isMachineOpcode())
7751	continue;
7752
7753	unsigned Flags = `0`;
7754	bool ReplaceFlags = true;
7755
7756	// When the feeding operation is an add-immediate of some sort,
7757	// determine whether we need to add relocation information to the
7758	// target flags on the immediate operand when we fold it into the
7759	// load instruction.
7760	//
7761	// For something like ADDItocL8, the relocation information is
7762	// inferred from the opcode; when we process it in the AsmPrinter,
7763	// we add the necessary relocation there. A load, though, can receive
7764	// relocation from various flavors of ADDIxxx, so we need to carry
7765	// the relocation information in the target flags.
7766	switch (Base.getMachineOpcode()) {
7767	default: continue;
7768
7769	case PPC::ADDI8:
7770	case PPC::ADDI:
7771	// In some cases (such as TLS) the relocation information
7772	// is already in place on the operand, so copying the operand
7773	// is sufficient.
7774	ReplaceFlags = false;
7775	break;
7776	case PPC::ADDIdtprelL:
7777	Flags = PPCII::MO_DTPREL_LO;
7778	break;
7779	case PPC::ADDItlsldL:
7780	Flags = PPCII::MO_TLSLD_LO;
7781	break;
7782	case PPC::ADDItocL8:
7783	Flags = PPCII::MO_TOC_LO;
7784	break;
7785	}
7786
7787	SDValue ImmOpnd = Base.getOperand(i: `1`);
7788
7789	// On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7790	// 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7791	// we might have needed different @ha relocation values for the offset
7792	// pointers).
7793	int MaxDisplacement = `7`;
7794	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7795	const GlobalValue *GV = GA->getGlobal();
7796	Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7797	MaxDisplacement = std::min(a: (int)Alignment.value() - `1`, b: MaxDisplacement);
7798	}
7799
7800	bool UpdateHBase = false;
7801	SDValue HBase = Base.getOperand(i: `0`);
7802
7803	int Offset = N->getConstantOperandVal(Num: FirstOp);
7804	if (ReplaceFlags) {
7805	if (Offset < `0` \|\| Offset > MaxDisplacement) {
7806	// If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7807	// one use, then we can do this for any offset, we just need to also
7808	// update the offset (i.e. the symbol addend) on the addis also.
7809	if (Base.getMachineOpcode() != PPC::ADDItocL8)
7810	continue;
7811
7812	if (!HBase.isMachineOpcode() \|\|
7813	HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7814	continue;
7815
7816	if (!Base.hasOneUse() \|\| !HBase.hasOneUse())
7817	continue;
7818
7819	SDValue HImmOpnd = HBase.getOperand(i: `1`);
7820	if (HImmOpnd != ImmOpnd)
7821	continue;
7822
7823	UpdateHBase = true;
7824	}
7825	} else {
7826	// Global addresses can be folded, but only if they are sufficiently
7827	// aligned.
7828	if (RequiresMod4Offset) {
7829	if (GlobalAddressSDNode *GA =
7830	dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7831	const GlobalValue *GV = GA->getGlobal();
7832	Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7833	if (Alignment < `4`)
7834	continue;
7835	}
7836	}
7837
7838	// If we're directly folding the addend from an addi instruction, then:
7839	// 1. In general, the offset on the memory access must be zero.
7840	// 2. If the addend is a constant, then it can be combined with a
7841	// non-zero offset, but only if the result meets the encoding
7842	// requirements.
7843	if (auto *C = dyn_cast<ConstantSDNode>(Val&: ImmOpnd)) {
7844	Offset += C->getSExtValue();
7845
7846	if (RequiresMod4Offset && (Offset % `4`) != `0`)
7847	continue;
7848
7849	if (!isInt<`16`>(x: Offset))
7850	continue;
7851
7852	ImmOpnd = CurDAG->getTargetConstant(Val: Offset, DL: SDLoc (ImmOpnd),
7853	VT: ImmOpnd.getValueType());
7854	} else if (Offset != `0`) {
7855	// This optimization is performed for non-TOC-based local-exec accesses.
7856	if (isEligibleToFoldADDIForLocalExecAccesses(DAG: CurDAG, ADDIToFold: Base)) {
7857	// Add the non-zero offset information into the load or store
7858	// instruction to be used for non-TOC-based local-exec accesses.
7859	GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd);
7860	assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7861	"addi into local-exec accesses!");
7862	ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7863	MVT::i64, Offset,
7864	GA->getTargetFlags());
7865	} else
7866	continue;
7867	}
7868	}
7869
7870	// We found an opportunity. Reverse the operands from the add
7871	// immediate and substitute them into the load or store. If
7872	// needed, update the target flags for the immediate operand to
7873	// reflect the necessary relocation information.
7874	LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7875	LLVM_DEBUG(Base ->dump(CurDAG));
7876	LLVM_DEBUG(dbgs() << "\nN: ");
7877	LLVM_DEBUG(N->dump(CurDAG));
7878	LLVM_DEBUG(dbgs() << "\n");
7879
7880	// If the relocation information isn't already present on the
7881	// immediate operand, add it now.
7882	if (ReplaceFlags) {
7883	if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: ImmOpnd)) {
7884	SDLoc dl(GA);
7885	const GlobalValue *GV = GA->getGlobal();
7886	Align Alignment = GV->getPointerAlignment(DL: CurDAG->getDataLayout());
7887	// We can't perform this optimization for data whose alignment
7888	// is insufficient for the instruction encoding.
7889	if (Alignment < `4` && (RequiresMod4Offset \|\| (Offset % `4`) != `0`)) {
7890	LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7891	continue;
7892	}
7893	ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7894	} else if (ConstantPoolSDNode *CP =
7895	dyn_cast<ConstantPoolSDNode>(Val&: ImmOpnd)) {
7896	const Constant *C = CP->getConstVal();
7897	ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7898	Offset, Flags);
7899	}
7900	}
7901
7902	if (FirstOp == `1`) // Store
7903	(void)CurDAG->UpdateNodeOperands(N, Op1: N->getOperand(Num: `0`), Op2: ImmOpnd,
7904	Op3: Base.getOperand(i: `0`), Op4: N->getOperand(Num: `3`));
7905	else // Load
7906	(void)CurDAG->UpdateNodeOperands(N, Op1: ImmOpnd, Op2: Base.getOperand(i: `0`),
7907	Op3: N->getOperand(Num: `2`));
7908
7909	if (UpdateHBase)
7910	(void)CurDAG->UpdateNodeOperands(N: HBase.getNode(), Op1: HBase.getOperand(i: `0`),
7911	Op2: ImmOpnd);
7912
7913	// The add-immediate may now be dead, in which case remove it.
7914	if (Base.getNode()->use_empty())
7915	CurDAG->RemoveDeadNode(N: Base.getNode());
7916	}
7917	}
7918
7919	/// createPPCISelDag - This pass converts a legalized DAG into a
7920	/// PowerPC-specific DAG, ready for instruction scheduling.
7921	///
7922	FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
7923	CodeGenOptLevel OptLevel) {
7924	return new PPCDAGToDAGISel (TM, OptLevel);
7925	}
7926

source code of llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp