TargetLowering.cpp source code [llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp]

1	//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements the TargetLowering class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/CodeGen/TargetLowering.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/Analysis/VectorUtils.h"
16	#include "llvm/CodeGen/CallingConvLower.h"
17	#include "llvm/CodeGen/CodeGenCommonISel.h"
18	#include "llvm/CodeGen/MachineFrameInfo.h"
19	#include "llvm/CodeGen/MachineFunction.h"
20	#include "llvm/CodeGen/MachineJumpTableInfo.h"
21	#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22	#include "llvm/CodeGen/MachineRegisterInfo.h"
23	#include "llvm/CodeGen/SelectionDAG.h"
24	#include "llvm/CodeGen/TargetRegisterInfo.h"
25	#include "llvm/IR/DataLayout.h"
26	#include "llvm/IR/DerivedTypes.h"
27	#include "llvm/IR/GlobalVariable.h"
28	#include "llvm/IR/LLVMContext.h"
29	#include "llvm/MC/MCAsmInfo.h"
30	#include "llvm/MC/MCExpr.h"
31	#include "llvm/Support/DivisionByConstantInfo.h"
32	#include "llvm/Support/ErrorHandling.h"
33	#include "llvm/Support/KnownBits.h"
34	#include "llvm/Support/MathExtras.h"
35	#include "llvm/Target/TargetMachine.h"
36	#include <cctype>
37	using namespace llvm;
38
39	/// NOTE: The TargetMachine owns TLOF.
40	TargetLowering::TargetLowering(const TargetMachine &tm)
41	: TargetLoweringBase (tm) {}
42
43	const char TargetLowering::getTargetNodeName(unsigned* Opcode) const {
44	return nullptr;
45	}
46
47	bool TargetLowering::isPositionIndependent() const {
48	return getTargetMachine().isPositionIndependent();
49	}
50
51	/// Check whether a given call node is in tail position within its function. If
52	/// so, it sets Chain to the input chain of the tail call.
53	bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54	SDValue &Chain) const {
55	const Function &F = DAG.getMachineFunction().getFunction();
56
57	// First, check if tail calls have been disabled in this function.
58	if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
59	return false;
60
61	// Conservatively require the attributes of the call to match those of
62	// the return. Ignore following attributes because they don't affect the
63	// call sequence.
64	AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65	for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66	Attribute::DereferenceableOrNull, Attribute::NoAlias,
67	Attribute::NonNull, Attribute::NoUndef})
68	CallerAttrs.removeAttribute(Attr);
69
70	if (CallerAttrs.hasAttributes())
71	return false;
72
73	// It's not safe to eliminate the sign / zero extension of the return value.
74	if (CallerAttrs.contains(Attribute::ZExt) \|\|
75	CallerAttrs.contains(Attribute::SExt))
76	return false;
77
78	// Check if the only use is a function return node.
79	return isUsedByReturnOnly(Node, Chain);
80	}
81
82	bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83	const uint32_t *CallerPreservedMask,
84	const SmallVectorImpl<CCValAssign> &ArgLocs,
85	const SmallVectorImpl<SDValue> &OutVals) const {
86	for (unsigned I = `0`, E = ArgLocs.size(); I != E; ++I) {
87	const CCValAssign &ArgLoc = ArgLocs [I];
88	if (!ArgLoc.isRegLoc())
89	continue;
90	MCRegister Reg = ArgLoc.getLocReg();
91	// Only look at callee saved registers.
92	if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
93	continue;
94	// Check that we pass the value used for the caller.
95	// (We look for a CopyFromReg reading a virtual register that is used
96	// for the function live-in value of register Reg)
97	SDValue Value = OutVals [I];
98	if (Value ->getOpcode() == ISD::AssertZext)
99	Value = Value.getOperand(i: `0`);
100	if (Value ->getOpcode() != ISD::CopyFromReg)
101	return false;
102	Register ArgReg = cast<RegisterSDNode>(Val: Value ->getOperand(Num: `1`))->getReg();
103	if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
104	return false;
105	}
106	return true;
107	}
108
109	/// Set CallLoweringInfo attribute flags based on a call instruction
110	/// and called function attributes.
111	void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112	unsigned ArgIdx) {
113	IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SExt);
114	IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ZExt);
115	IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InReg);
116	IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: StructRet);
117	IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Nest);
118	IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ByVal);
119	IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Preallocated);
120	IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InAlloca);
121	IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Returned);
122	IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftSelf);
123	IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftAsync);
124	IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftError);
125	Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
126	IndirectType = nullptr;
127	assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= `1` &&
128	"multiple ABI attributes?");
129	if (IsByVal) {
130	IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
131	if (!Alignment)
132	Alignment = Call->getParamAlign(ArgNo: ArgIdx);
133	}
134	if (IsPreallocated)
135	IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
136	if (IsInAlloca)
137	IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
138	if (IsSRet)
139	IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
140	}
141
142	/// Generate a libcall taking the given operands as arguments and returning a
143	/// result of type RetVT.
144	std::pair<SDValue, SDValue>
145	TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146	ArrayRef<SDValue> Ops,
147	MakeLibCallOptions CallOptions,
148	const SDLoc &dl,
149	SDValue InChain) const {
150	if (!InChain)
151	InChain = DAG.getEntryNode();
152
153	TargetLowering::ArgListTy Args;
154	Args.reserve(n: Ops.size());
155
156	TargetLowering::ArgListEntry Entry;
157	for (unsigned i = `0`; i < Ops.size(); ++i) {
158	SDValue NewOp = Ops [i];
159	Entry.Node = NewOp;
160	Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
161	Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: NewOp.getValueType(),
162	IsSigned: CallOptions.IsSExt);
163	Entry.IsZExt = !Entry.IsSExt;
164
165	if (CallOptions.IsSoften &&
166	!shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften [i])) {
167	Entry.IsSExt = Entry.IsZExt = false;
168	}
169	Args.push_back(x: Entry);
170	}
171
172	if (LC == RTLIB::UNKNOWN_LIBCALL)
173	report_fatal_error(reason: "Unsupported library call operation!");
174	SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
175	VT: getPointerTy(DL: DAG.getDataLayout()));
176
177	Type RetTy = RetVT.getTypeForEVT(Context&: DAG.getContext());
178	TargetLowering::CallLoweringInfo CLI(DAG);
179	bool signExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: CallOptions.IsSExt);
180	bool zeroExtend = !signExtend;
181
182	if (CallOptions.IsSoften &&
183	!shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
184	signExtend = zeroExtend = false;
185	}
186
187	CLI.setDebugLoc(dl)
188	.setChain(InChain)
189	.setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
190	.setNoReturn(CallOptions.DoesNotReturn)
191	.setDiscardResult(!CallOptions.IsReturnValueUsed)
192	.setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193	.setSExtResult(signExtend)
194	.setZExtResult(zeroExtend);
195	return LowerCallTo(CLI);
196	}
197
198	bool TargetLowering::findOptimalMemOpLowering(
199	std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200	unsigned SrcAS, const AttributeList &FuncAttributes) const {
201	if (Limit != ~unsigned(`0`) && Op.isMemcpyWithFixedDstAlign() &&
202	Op.getSrcAlign() < Op.getDstAlign())
203	return false;
204
205	EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207	if (VT == MVT::Other) {
208	// Use the largest integer type whose alignment constraints are satisfied.
209	// We only need to check DstAlign here as SrcAlign is always greater or
210	// equal to DstAlign (or zero).
211	VT = MVT::i64;
212	if (Op.isFixedDstAlign())
213	while (Op.getDstAlign() < (VT.getSizeInBits() / `8`) &&
214	!allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
215	VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - `1`);
216	assert(VT.isInteger());
217
218	// Find the largest legal integer type.
219	MVT LVT = MVT::i64;
220	while (!isTypeLegal(VT: LVT))
221	LVT = (MVT::SimpleValueType)(LVT.SimpleTy - `1`);
222	assert(LVT.isInteger());
223
224	// If the type we've chosen is larger than the largest legal integer type
225	// then use that instead.
226	if (VT.bitsGT(VT: LVT))
227	VT = LVT;
228	}
229
230	unsigned NumMemOps = `0`;
231	uint64_t Size = Op.size();
232	while (Size) {
233	unsigned VTSize = VT.getSizeInBits() / `8`;
234	while (VTSize > Size) {
235	// For now, only use non-vector load / store's for the left-over pieces.
236	EVT NewVT = VT;
237	unsigned NewVTSize;
238
239	bool Found = false;
240	if (VT.isVector() \|\| VT.isFloatingPoint()) {
241	NewVT = (VT.getSizeInBits() > `64`) ? MVT::i64 : MVT::i32;
242	if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
243	isSafeMemOpType(NewVT.getSimpleVT()))
244	Found = true;
245	else if (NewVT == MVT::i64 &&
246	isOperationLegalOrCustom(Op: ISD::STORE, MVT::VT: f64) &&
247	isSafeMemOpType(MVT::f64)) {
248	// i64 is usually not legal on 32-bit targets, but f64 may be.
249	NewVT = MVT::f64;
250	Found = true;
251	}
252	}
253
254	if (!Found) {
255	do {
256	NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - `1`);
257	if (NewVT == MVT::i8)
258	break;
259	} while (!isSafeMemOpType(NewVT.getSimpleVT()));
260	}
261	NewVTSize = NewVT.getSizeInBits() / `8`;
262
263	// If the new VT cannot cover all of the remaining bits, then consider
264	// issuing a (or a pair of) unaligned and overlapping load / store.
265	unsigned Fast;
266	if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267	allowsMisalignedMemoryAccesses(
268	VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align (`1`),
269	Flags: MachineMemOperand::MONone, &Fast) &&
270	Fast)
271	VTSize = Size;
272	else {
273	VT = NewVT;
274	VTSize = NewVTSize;
275	}
276	}
277
278	if (++NumMemOps > Limit)
279	return false;
280
281	MemOps.push_back(x: VT);
282	Size -= VTSize;
283	}
284
285	return true;
286	}
287
288	/// Soften the operands of a comparison. This code is shared among BR_CC,
289	/// SELECT_CC, and SETCC handlers.
290	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291	SDValue &NewLHS, SDValue &NewRHS,
292	ISD::CondCode &CCCode,
293	const SDLoc &dl, const SDValue OldLHS,
294	const SDValue OldRHS) const {
295	SDValue Chain;
296	return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
297	OldRHS, Chain);
298	}
299
300	void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301	SDValue &NewLHS, SDValue &NewRHS,
302	ISD::CondCode &CCCode,
303	const SDLoc &dl, const SDValue OldLHS,
304	const SDValue OldRHS,
305	SDValue &Chain,
306	bool IsSignaling) const {
307	// FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308	// not supporting it. We can update this code when libgcc provides such
309	// functions.
310
311	assert((VT == MVT::f32 \|\| VT == MVT::f64 \|\| VT == MVT::f128 \|\| VT == MVT::ppcf128)
312	&& "Unsupported setcc type!");
313
314	// Expand into one or more soft-fp libcall(s).
315	RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316	bool ShouldInvertCC = false;
317	switch (CCCode) {
318	case ISD::SETEQ:
319	case ISD::SETOEQ:
320	LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
322	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323	break;
324	case ISD::SETNE:
325	case ISD::SETUNE:
326	LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327	(VT == MVT::f64) ? RTLIB::UNE_F64 :
328	(VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329	break;
330	case ISD::SETGE:
331	case ISD::SETOGE:
332	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333	(VT == MVT::f64) ? RTLIB::OGE_F64 :
334	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335	break;
336	case ISD::SETLT:
337	case ISD::SETOLT:
338	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339	(VT == MVT::f64) ? RTLIB::OLT_F64 :
340	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341	break;
342	case ISD::SETLE:
343	case ISD::SETOLE:
344	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345	(VT == MVT::f64) ? RTLIB::OLE_F64 :
346	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347	break;
348	case ISD::SETGT:
349	case ISD::SETOGT:
350	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351	(VT == MVT::f64) ? RTLIB::OGT_F64 :
352	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353	break;
354	case ISD::SETO:
355	ShouldInvertCC = true;
356	[[fallthrough]];
357	case ISD::SETUO:
358	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359	(VT == MVT::f64) ? RTLIB::UO_F64 :
360	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361	break;
362	case ISD::SETONE:
363	// SETONE = O && UNE
364	ShouldInvertCC = true;
365	[[fallthrough]];
366	case ISD::SETUEQ:
367	LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368	(VT == MVT::f64) ? RTLIB::UO_F64 :
369	(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370	LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371	(VT == MVT::f64) ? RTLIB::OEQ_F64 :
372	(VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373	break;
374	default:
375	// Invert CC for unordered comparisons
376	ShouldInvertCC = true;
377	switch (CCCode) {
378	case ISD::SETULT:
379	LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380	(VT == MVT::f64) ? RTLIB::OGE_F64 :
381	(VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382	break;
383	case ISD::SETULE:
384	LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385	(VT == MVT::f64) ? RTLIB::OGT_F64 :
386	(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387	break;
388	case ISD::SETUGT:
389	LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390	(VT == MVT::f64) ? RTLIB::OLE_F64 :
391	(VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392	break;
393	case ISD::SETUGE:
394	LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395	(VT == MVT::f64) ? RTLIB::OLT_F64 :
396	(VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397	break;
398	default: llvm_unreachable("Do not know how to soften this setcc!");
399	}
400	}
401
402	// Use the target specific return value for comparison lib calls.
403	EVT RetVT = getCmpLibcallReturnType();
404	SDValue Ops[`2`] = {NewLHS, NewRHS};
405	TargetLowering::MakeLibCallOptions CallOptions;
406	EVT OpsVT[`2`] = { OldLHS.getValueType(),
407	OldRHS.getValueType() };
408	CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
409	auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
410	NewLHS = Call.first;
411	NewRHS = DAG.getConstant(Val: `0`, DL: dl, VT: RetVT);
412
413	CCCode = getCmpLibcallCC(Call: LC1);
414	if (ShouldInvertCC) {
415	assert(RetVT.isInteger());
416	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
417	}
418
419	if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420	// Update Chain.
421	Chain = Call.second;
422	} else {
423	EVT SetCCVT =
424	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
425	SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
426	auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
427	CCCode = getCmpLibcallCC(Call: LC2);
428	if (ShouldInvertCC)
429	CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
430	NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
431	if (Chain)
432	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433	Call2.second);
434	NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
435	VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
436	NewRHS = SDValue ();
437	}
438	}
439
440	/// Return the entry encoding for a jump table in the current function. The
441	/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442	unsigned TargetLowering::getJumpTableEncoding() const {
443	// In non-pic modes, just use the address of a block.
444	if (!isPositionIndependent())
445	return MachineJumpTableInfo::EK_BlockAddress;
446
447	// In PIC mode, if the target supports a GPRel32 directive, use it.
448	if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449	return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451	// Otherwise, use a label difference.
452	return MachineJumpTableInfo::EK_LabelDifference32;
453	}
454
455	SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456	SelectionDAG &DAG) const {
457	// If our PIC model is GP relative, use the global offset table as the base.
458	unsigned JTEncoding = getJumpTableEncoding();
459
460	if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) \|\|
461	(JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462	return DAG.getGLOBAL_OFFSET_TABLE(VT: getPointerTy(DL: DAG.getDataLayout()));
463
464	return Table;
465	}
466
467	/// This returns the relocation base for the given PIC jumptable, the same as
468	/// getPICJumpTableRelocBase, but as an MCExpr.
469	const MCExpr *
470	TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471	unsigned JTI,MCContext &Ctx) const{
472	// The normal PIC reloc base is the label at the start of the jump table.
473	return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
474	}
475
476	SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477	SDValue Addr, int JTI,
478	SelectionDAG &DAG) const {
479	SDValue Chain = Value;
480	// Jump table debug info is only needed if CodeView is enabled.
481	if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482	Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
483	}
484	return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485	}
486
487	bool
488	TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode GA) const* {
489	const TargetMachine &TM = getTargetMachine();
490	const GlobalValue *GV = GA->getGlobal();
491
492	// If the address is not even local to this DSO we will have to load it from
493	// a got and then add the offset.
494	if (!TM.shouldAssumeDSOLocal(M: *GV->getParent(), GV))
495	return false;
496
497	// If the code is position independent we will have to add a base register.
498	if (isPositionIndependent())
499	return false;
500
501	// Otherwise we can do it.
502	return true;
503	}
504
505	//===----------------------------------------------------------------------===//
506	// Optimization Methods
507	//===----------------------------------------------------------------------===//
508
509	/// If the specified instruction has a constant integer operand and there are
510	/// bits set in that constant that are not demanded, then clear those bits and
511	/// return true.
512	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513	const APInt &DemandedBits,
514	const APInt &DemandedElts,
515	TargetLoweringOpt &TLO) const {
516	SDLoc DL(Op);
517	unsigned Opcode = Op.getOpcode();
518
519	// Early-out if we've ended up calling an undemanded node, leave this to
520	// constant folding.
521	if (DemandedBits.isZero() \|\| DemandedElts.isZero())
522	return false;
523
524	// Do target-specific constant optimization.
525	if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526	return TLO.New.getNode();
527
528	// FIXME: ISD::SELECT, ISD::SELECT_CC
529	switch (Opcode) {
530	default:
531	break;
532	case ISD::XOR:
533	case ISD::AND:
534	case ISD::OR: {
535	auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`));
536	if (!Op1C \|\| Op1C->isOpaque())
537	return false;
538
539	// If this is a 'not' op, don't touch it because that's a canonical form.
540	const APInt &C = Op1C->getAPIntValue();
541	if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
542	return false;
543
544	if (!C.isSubsetOf(RHS: DemandedBits)) {
545	EVT VT = Op.getValueType();
546	SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
547	SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: `0`), N2: NewC);
548	return TLO.CombineTo(O: Op, N: NewOp);
549	}
550
551	break;
552	}
553	}
554
555	return false;
556	}
557
558	bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
559	const APInt &DemandedBits,
560	TargetLoweringOpt &TLO) const {
561	EVT VT = Op.getValueType();
562	APInt DemandedElts = VT.isVector()
563	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
564	: APInt (`1`, `1`);
565	return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566	}
567
568	/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569	/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570	/// but it could be generalized for targets with other types of implicit
571	/// widening casts.
572	bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
573	const APInt &DemandedBits,
574	TargetLoweringOpt &TLO) const {
575	assert(Op.getNumOperands() == `2` &&
576	"ShrinkDemandedOp only supports binary operators!");
577	assert(Op.getNode()->getNumValues() == `1` &&
578	"ShrinkDemandedOp only supports nodes with one result!");
579
580	EVT VT = Op.getValueType();
581	SelectionDAG &DAG = TLO.DAG;
582	SDLoc dl(Op);
583
584	// Early return, as this function cannot handle vector types.
585	if (VT.isVector())
586	return false;
587
588	// Don't do this if the node has another user, which may require the
589	// full value.
590	if (!Op.getNode()->hasOneUse())
591	return false;
592
593	// Search for the smallest integer type with free casts to and from
594	// Op's type. For expedience, just check power-of-2 integer types.
595	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596	unsigned DemandedSize = DemandedBits.getActiveBits();
597	for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
598	SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
599	EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
600	if (TLI.isTruncateFree(FromVT: VT, ToVT: SmallVT) && TLI.isZExtFree(FromTy: SmallVT, ToTy: VT)) {
601	// We found a type with free casts.
602	SDValue X = DAG.getNode(
603	Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
604	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `0`)),
605	N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: `1`)));
606	assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607	SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
608	return TLO.CombineTo(O: Op, N: Z);
609	}
610	}
611	return false;
612	}
613
614	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
615	DAGCombinerInfo &DCI) const {
616	SelectionDAG &DAG = DCI.DAG;
617	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618	!DCI.isBeforeLegalizeOps());
619	KnownBits Known;
620
621	bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622	if (Simplified) {
623	DCI.AddToWorklist(N: Op.getNode());
624	DCI.CommitTargetLoweringOpt(TLO);
625	}
626	return Simplified;
627	}
628
629	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
630	const APInt &DemandedElts,
631	DAGCombinerInfo &DCI) const {
632	SelectionDAG &DAG = DCI.DAG;
633	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634	!DCI.isBeforeLegalizeOps());
635	KnownBits Known;
636
637	bool Simplified =
638	SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639	if (Simplified) {
640	DCI.AddToWorklist(N: Op.getNode());
641	DCI.CommitTargetLoweringOpt(TLO);
642	}
643	return Simplified;
644	}
645
646	bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
647	KnownBits &Known,
648	TargetLoweringOpt &TLO,
649	unsigned Depth,
650	bool AssumeSingleUse) const {
651	EVT VT = Op.getValueType();
652
653	// Since the number of lanes in a scalable vector is unknown at compile time,
654	// we track one bit which is implicitly broadcast to all lanes. This means
655	// that all lanes in a scalable vector are considered demanded.
656	APInt DemandedElts = VT.isFixedLengthVector()
657	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
658	: APInt (`1`, `1`);
659	return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660	AssumeSingleUse);
661	}
662
663	// TODO: Under what circumstances can we create nodes? Constant folding?
664	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
665	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666	SelectionDAG &DAG, unsigned Depth) const {
667	EVT VT = Op.getValueType();
668
669	// Limit search depth.
670	if (Depth >= SelectionDAG::MaxRecursionDepth)
671	return SDValue ();
672
673	// Ignore UNDEFs.
674	if (Op.isUndef())
675	return SDValue ();
676
677	// Not demanding any bits/elts from Op.
678	if (DemandedBits == `0` \|\| DemandedElts == `0`)
679	return DAG.getUNDEF(VT);
680
681	bool IsLE = DAG.getDataLayout().isLittleEndian();
682	unsigned NumElts = DemandedElts.getBitWidth();
683	unsigned BitWidth = DemandedBits.getBitWidth();
684	KnownBits LHSKnown, RHSKnown;
685	switch (Op.getOpcode()) {
686	case ISD::BITCAST: {
687	if (VT.isScalableVector())
688	return SDValue ();
689
690	SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: `0`));
691	EVT SrcVT = Src.getValueType();
692	EVT DstVT = Op.getValueType();
693	if (SrcVT == DstVT)
694	return Src;
695
696	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697	unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698	if (NumSrcEltBits == NumDstEltBits)
699	if (SDValue V = SimplifyMultipleUseDemandedBits(
700	Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + `1`))
701	return DAG.getBitcast(VT: DstVT, V);
702
703	if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == `0`) {
704	unsigned Scale = NumDstEltBits / NumSrcEltBits;
705	unsigned NumSrcElts = SrcVT.getVectorNumElements();
706	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
707	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
708	for (unsigned i = `0`; i != Scale; ++i) {
709	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
710	unsigned BitOffset = EltOffset * NumSrcEltBits;
711	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
712	if (!Sub.isZero()) {
713	DemandedSrcBits \|= Sub;
714	for (unsigned j = `0`; j != NumElts; ++j)
715	if (DemandedElts [j])
716	DemandedSrcElts.setBit((j * Scale) + i);
717	}
718	}
719
720	if (SDValue V = SimplifyMultipleUseDemandedBits(
721	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
722	return DAG.getBitcast(VT: DstVT, V);
723	}
724
725	// TODO - bigendian once we have test coverage.
726	if (IsLE && (NumSrcEltBits % NumDstEltBits) == `0`) {
727	unsigned Scale = NumSrcEltBits / NumDstEltBits;
728	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
729	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
730	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
731	for (unsigned i = `0`; i != NumElts; ++i)
732	if (DemandedElts [i]) {
733	unsigned Offset = (i % Scale) * NumDstEltBits;
734	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
735	DemandedSrcElts.setBit(i / Scale);
736	}
737
738	if (SDValue V = SimplifyMultipleUseDemandedBits(
739	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + `1`))
740	return DAG.getBitcast(VT: DstVT, V);
741	}
742
743	break;
744	}
745	case ISD::AND: {
746	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
747	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
748
749	// If all of the demanded bits are known 1 on one side, return the other.
750	// These bits cannot contribute to the result of the 'and' in this
751	// context.
752	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero \| RHSKnown.One))
753	return Op.getOperand(i: `0`);
754	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero \| LHSKnown.One))
755	return Op.getOperand(i: `1`);
756	break;
757	}
758	case ISD::OR: {
759	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
760	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
761
762	// If all of the demanded bits are known zero on one side, return the
763	// other. These bits cannot contribute to the result of the 'or' in this
764	// context.
765	if (DemandedBits.isSubsetOf(RHS: LHSKnown.One \| RHSKnown.Zero))
766	return Op.getOperand(i: `0`);
767	if (DemandedBits.isSubsetOf(RHS: RHSKnown.One \| LHSKnown.Zero))
768	return Op.getOperand(i: `1`);
769	break;
770	}
771	case ISD::XOR: {
772	LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `0`), DemandedElts, Depth: Depth + `1`);
773	RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: `1`), DemandedElts, Depth: Depth + `1`);
774
775	// If all of the demanded bits are known zero on one side, return the
776	// other.
777	if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
778	return Op.getOperand(i: `0`);
779	if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
780	return Op.getOperand(i: `1`);
781	break;
782	}
783	case ISD::SHL: {
784	// If we are only demanding sign bits then we can use the shift source
785	// directly.
786	if (const APInt *MaxSA =
787	DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
788	SDValue Op0 = Op.getOperand(i: `0`);
789	unsigned ShAmt = MaxSA->getZExtValue();
790	unsigned NumSignBits =
791	DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
792	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794	return Op0;
795	}
796	break;
797	}
798	case ISD::SETCC: {
799	SDValue Op0 = Op.getOperand(i: `0`);
800	SDValue Op1 = Op.getOperand(i: `1`);
801	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
802	// If (1) we only need the sign-bit, (2) the setcc operands are the same
803	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
804	// -1, we may be able to bypass the setcc.
805	if (DemandedBits.isSignMask() &&
806	Op0.getScalarValueSizeInBits() == BitWidth &&
807	getBooleanContents(Type: Op0.getValueType()) ==
808	BooleanContent::ZeroOrNegativeOneBooleanContent) {
809	// If we're testing X < 0, then this compare isn't needed - just use X!
810	// FIXME: We're limiting to integer types here, but this should also work
811	// if we don't care about FP signed-zero. The use of SETLT with FP means
812	// that we don't care about NaNs.
813	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
814	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
815	return Op0;
816	}
817	break;
818	}
819	case ISD::SIGN_EXTEND_INREG: {
820	// If none of the extended bits are demanded, eliminate the sextinreg.
821	SDValue Op0 = Op.getOperand(i: `0`);
822	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
823	unsigned ExBits = ExVT.getScalarSizeInBits();
824	if (DemandedBits.getActiveBits() <= ExBits &&
825	shouldRemoveRedundantExtend(Op))
826	return Op0;
827	// If the input is already sign extended, just drop the extension.
828	unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
829	if (NumSignBits >= (BitWidth - ExBits + `1`))
830	return Op0;
831	break;
832	}
833	case ISD::ANY_EXTEND_VECTOR_INREG:
834	case ISD::SIGN_EXTEND_VECTOR_INREG:
835	case ISD::ZERO_EXTEND_VECTOR_INREG: {
836	if (VT.isScalableVector())
837	return SDValue ();
838
839	// If we only want the lowest element and none of extended bits, then we can
840	// return the bitcasted source vector.
841	SDValue Src = Op.getOperand(i: `0`);
842	EVT SrcVT = Src.getValueType();
843	EVT DstVT = Op.getValueType();
844	if (IsLE && DemandedElts == `1` &&
845	DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846	DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847	return DAG.getBitcast(VT: DstVT, V: Src);
848	}
849	break;
850	}
851	case ISD::INSERT_VECTOR_ELT: {
852	if (VT.isScalableVector())
853	return SDValue ();
854
855	// If we don't demand the inserted element, return the base vector.
856	SDValue Vec = Op.getOperand(i: `0`);
857	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
858	EVT VecVT = Vec.getValueType();
859	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
860	!DemandedElts [CIdx->getZExtValue()])
861	return Vec;
862	break;
863	}
864	case ISD::INSERT_SUBVECTOR: {
865	if (VT.isScalableVector())
866	return SDValue ();
867
868	SDValue Vec = Op.getOperand(i: `0`);
869	SDValue Sub = Op.getOperand(i: `1`);
870	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
871	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
873	// If we don't demand the inserted subvector, return the base vector.
874	if (DemandedSubElts == `0`)
875	return Vec;
876	break;
877	}
878	case ISD::VECTOR_SHUFFLE: {
879	assert(!VT.isScalableVector());
880	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
881
882	// If all the demanded elts are from one operand and are inline,
883	// then we can use the operand directly.
884	bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885	for (unsigned i = `0`; i != NumElts; ++i) {
886	int M = ShuffleMask [i];
887	if (M < `0` \|\| !DemandedElts [i])
888	continue;
889	AllUndef = false;
890	IdentityLHS &= (M == (int)i);
891	IdentityRHS &= ((M - NumElts) == i);
892	}
893
894	if (AllUndef)
895	return DAG.getUNDEF(VT: Op.getValueType());
896	if (IdentityLHS)
897	return Op.getOperand(i: `0`);
898	if (IdentityRHS)
899	return Op.getOperand(i: `1`);
900	break;
901	}
902	default:
903	// TODO: Probably okay to remove after audit; here to reduce change size
904	// in initial enablement patch for scalable vectors
905	if (VT.isScalableVector())
906	return SDValue ();
907
908	if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909	if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910	Op, DemandedBits, DemandedElts, DAG, Depth))
911	return V;
912	break;
913	}
914	return SDValue ();
915	}
916
917	SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
918	SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
919	unsigned Depth) const {
920	EVT VT = Op.getValueType();
921	// Since the number of lanes in a scalable vector is unknown at compile time,
922	// we track one bit which is implicitly broadcast to all lanes. This means
923	// that all lanes in a scalable vector are considered demanded.
924	APInt DemandedElts = VT.isFixedLengthVector()
925	? APInt::getAllOnes(numBits: VT.getVectorNumElements())
926	: APInt (`1`, `1`);
927	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928	Depth);
929	}
930
931	SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
932	SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933	unsigned Depth) const {
934	APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
935	return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936	Depth);
937	}
938
939	// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940	// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
941	static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
942	const TargetLowering &TLI,
943	const APInt &DemandedBits,
944	const APInt &DemandedElts,
945	unsigned Depth) {
946	assert((Op.getOpcode() == ISD::SRL \|\| Op.getOpcode() == ISD::SRA) &&
947	"SRL or SRA node is required here!");
948	// Is the right shift using an immediate value of 1?
949	ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
950	if (!N1C \|\| !N1C->isOne())
951	return SDValue ();
952
953	// We are looking for an avgfloor
954	// add(ext, ext)
955	// or one of these as a avgceil
956	// add(add(ext, ext), 1)
957	// add(add(ext, 1), ext)
958	// add(ext, add(ext, 1))
959	SDValue Add = Op.getOperand(i: `0`);
960	if (Add.getOpcode() != ISD::ADD)
961	return SDValue ();
962
963	SDValue ExtOpA = Add.getOperand(i: `0`);
964	SDValue ExtOpB = Add.getOperand(i: `1`);
965	SDValue Add2;
966	auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967	ConstantSDNode *ConstOp;
968	if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
969	ConstOp->isOne()) {
970	ExtOpA = Op1;
971	ExtOpB = Op3;
972	Add2 = A;
973	return true;
974	}
975	if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
976	ConstOp->isOne()) {
977	ExtOpA = Op1;
978	ExtOpB = Op2;
979	Add2 = A;
980	return true;
981	}
982	return false;
983	};
984	bool IsCeil =
985	(ExtOpA.getOpcode() == ISD::ADD &&
986	MatchOperands (ExtOpA.getOperand(i: `0`), ExtOpA.getOperand(i: `1`), ExtOpB, ExtOpA)) \|\|
987	(ExtOpB.getOpcode() == ISD::ADD &&
988	MatchOperands (ExtOpB.getOperand(i: `0`), ExtOpB.getOperand(i: `1`), ExtOpA, ExtOpB));
989
990	// If the shift is signed (sra):
991	// - Needs >= 2 sign bit for both operands.
992	// - Needs >= 2 zero bits.
993	// If the shift is unsigned (srl):
994	// - Needs >= 1 zero bit for both operands.
995	// - Needs 1 demanded bit zero and >= 2 sign bits.
996	unsigned ShiftOpc = Op.getOpcode();
997	bool IsSigned = false;
998	unsigned KnownBits;
999	unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1000	unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1001	unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - `1`;
1002	unsigned NumZeroA =
1003	DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004	unsigned NumZeroB =
1005	DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006	unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1007
1008	switch (ShiftOpc) {
1009	default:
1010	llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011	case ISD::SRA: {
1012	if (NumZero >= `2` && NumSigned < NumZero) {
1013	IsSigned = false;
1014	KnownBits = NumZero;
1015	break;
1016	}
1017	if (NumSigned >= `1`) {
1018	IsSigned = true;
1019	KnownBits = NumSigned;
1020	break;
1021	}
1022	return SDValue ();
1023	}
1024	case ISD::SRL: {
1025	if (NumZero >= `1` && NumSigned < NumZero) {
1026	IsSigned = false;
1027	KnownBits = NumZero;
1028	break;
1029	}
1030	if (NumSigned >= `1` && DemandedBits.isSignBitClear()) {
1031	IsSigned = true;
1032	KnownBits = NumSigned;
1033	break;
1034	}
1035	return SDValue ();
1036	}
1037	}
1038
1039	unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040	: (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042	// Find the smallest power-2 type that is legal for this vector size and
1043	// operation, given the original type size and the number of known sign/zero
1044	// bits.
1045	EVT VT = Op.getValueType();
1046	unsigned MinWidth =
1047	std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: `8`);
1048	EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1049	if (VT.isVector())
1050	NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1051	if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT: NVT)) {
1052	// If we could not transform, and (both) adds are nuw/nsw, we can use the
1053	// larger type size to do the transform.
1054	if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT))
1055	return SDValue ();
1056	if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: `0`),
1057	N1: Add.getOperand(i: `1`)) &&
1058	(!Add2 \|\| DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: `0`),
1059	N1: Add2.getOperand(i: `1`))))
1060	NVT = VT;
1061	else
1062	return SDValue ();
1063	}
1064
1065	SDLoc DL(Op);
1066	SDValue ResultAVG =
1067	DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1068	N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1069	return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1070	}
1071
1072	/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073	/// result of Op are ever used downstream. If we can use this information to
1074	/// simplify Op, create a new simplified DAG node and return true, returning the
1075	/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076	/// return a mask of Known bits for the expression (used to simplify the
1077	/// caller). The Known bits may only be accurate for those bits in the
1078	/// OriginalDemandedBits and OriginalDemandedElts.
1079	bool TargetLowering::SimplifyDemandedBits(
1080	SDValue Op, const APInt &OriginalDemandedBits,
1081	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082	unsigned Depth, bool AssumeSingleUse) const {
1083	unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084	assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085	"Mask size mismatches value type size!");
1086
1087	// Don't know anything.
1088	Known = KnownBits (BitWidth);
1089
1090	EVT VT = Op.getValueType();
1091	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092	unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093	assert((!VT.isFixedLengthVector() \|\| NumElts == VT.getVectorNumElements()) &&
1094	"Unexpected vector size");
1095
1096	APInt DemandedBits = OriginalDemandedBits;
1097	APInt DemandedElts = OriginalDemandedElts;
1098	SDLoc dl(Op);
1099
1100	// Undef operand.
1101	if (Op.isUndef())
1102	return false;
1103
1104	// We can't simplify target constants.
1105	if (Op.getOpcode() == ISD::TargetConstant)
1106	return false;
1107
1108	if (Op.getOpcode() == ISD::Constant) {
1109	// We know all of the bits for a constant!
1110	Known = KnownBits::makeConstant(C: Op ->getAsAPIntVal());
1111	return false;
1112	}
1113
1114	if (Op.getOpcode() == ISD::ConstantFP) {
1115	// We know all of the bits for a floating point constant!
1116	Known = KnownBits::makeConstant(
1117	C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1118	return false;
1119	}
1120
1121	// Other users may use these bits.
1122	bool HasMultiUse = false;
1123	if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1124	if (Depth >= SelectionDAG::MaxRecursionDepth) {
1125	// Limit search depth.
1126	return false;
1127	}
1128	// Allow multiple uses, just set the DemandedBits/Elts to all bits.
1129	DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1130	DemandedElts = APInt::getAllOnes(numBits: NumElts);
1131	HasMultiUse = true;
1132	} else if (OriginalDemandedBits == `0` \|\| OriginalDemandedElts == `0`) {
1133	// Not demanding any bits/elts from Op.
1134	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1135	} else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1136	// Limit search depth.
1137	return false;
1138	}
1139
1140	KnownBits Known2;
1141	switch (Op.getOpcode()) {
1142	case ISD::SCALAR_TO_VECTOR: {
1143	if (VT.isScalableVector())
1144	return false;
1145	if (!DemandedElts [`0`])
1146	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1147
1148	KnownBits SrcKnown;
1149	SDValue Src = Op.getOperand(i: `0`);
1150	unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1151	APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1152	if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + `1`))
1153	return true;
1154
1155	// Upper elements are undef, so only get the knownbits if we just demand
1156	// the bottom element.
1157	if (DemandedElts == `1`)
1158	Known = SrcKnown.anyextOrTrunc(BitWidth);
1159	break;
1160	}
1161	case ISD::BUILD_VECTOR:
1162	// Collect the known bits that are shared by every demanded element.
1163	// TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1164	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1165	return false; // Don't fall through, will infinitely loop.
1166	case ISD::SPLAT_VECTOR: {
1167	SDValue Scl = Op.getOperand(i: `0`);
1168	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1169	KnownBits KnownScl;
1170	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1171	return true;
1172
1173	// Implicitly truncate the bits to match the official semantics of
1174	// SPLAT_VECTOR.
1175	Known = KnownScl.trunc(BitWidth);
1176	break;
1177	}
1178	case ISD::LOAD: {
1179	auto *LD = cast<LoadSDNode>(Val&: Op);
1180	if (getTargetConstantFromLoad(LD)) {
1181	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1182	return false; // Don't fall through, will infinitely loop.
1183	}
1184	if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == `0`) {
1185	// If this is a ZEXTLoad and we are looking at the loaded value.
1186	EVT MemVT = LD->getMemoryVT();
1187	unsigned MemBits = MemVT.getScalarSizeInBits();
1188	Known.Zero.setBitsFrom(MemBits);
1189	return false; // Don't fall through, will infinitely loop.
1190	}
1191	break;
1192	}
1193	case ISD::INSERT_VECTOR_ELT: {
1194	if (VT.isScalableVector())
1195	return false;
1196	SDValue Vec = Op.getOperand(i: `0`);
1197	SDValue Scl = Op.getOperand(i: `1`);
1198	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
1199	EVT VecVT = Vec.getValueType();
1200
1201	// If index isn't constant, assume we need all vector elements AND the
1202	// inserted element.
1203	APInt DemandedVecElts(DemandedElts);
1204	if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1205	unsigned Idx = CIdx->getZExtValue();
1206	DemandedVecElts.clearBit(BitPosition: Idx);
1207
1208	// Inserted element is not required.
1209	if (!DemandedElts [Idx])
1210	return TLO.CombineTo(O: Op, N: Vec);
1211	}
1212
1213	KnownBits KnownScl;
1214	unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1215	APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1216	if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + `1`))
1217	return true;
1218
1219	Known = KnownScl.anyextOrTrunc(BitWidth);
1220
1221	KnownBits KnownVec;
1222	if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1223	Depth: Depth + `1`))
1224	return true;
1225
1226	if (!!DemandedVecElts)
1227	Known = Known.intersectWith(RHS: KnownVec);
1228
1229	return false;
1230	}
1231	case ISD::INSERT_SUBVECTOR: {
1232	if (VT.isScalableVector())
1233	return false;
1234	// Demand any elements from the subvector and the remainder from the src its
1235	// inserted into.
1236	SDValue Src = Op.getOperand(i: `0`);
1237	SDValue Sub = Op.getOperand(i: `1`);
1238	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
1239	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1240	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1241	APInt DemandedSrcElts = DemandedElts;
1242	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
1243
1244	KnownBits KnownSub, KnownSrc;
1245	if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1246	Depth: Depth + `1`))
1247	return true;
1248	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1249	Depth: Depth + `1`))
1250	return true;
1251
1252	Known.Zero.setAllBits();
1253	Known.One.setAllBits();
1254	if (!!DemandedSubElts)
1255	Known = Known.intersectWith(RHS: KnownSub);
1256	if (!!DemandedSrcElts)
1257	Known = Known.intersectWith(RHS: KnownSrc);
1258
1259	// Attempt to avoid multi-use src if we don't need anything from it.
1260	if (!DemandedBits.isAllOnes() \|\| !DemandedSubElts.isAllOnes() \|\|
1261	!DemandedSrcElts.isAllOnes()) {
1262	SDValue NewSub = SimplifyMultipleUseDemandedBits(
1263	Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1264	SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1265	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1266	if (NewSub \|\| NewSrc) {
1267	NewSub = NewSub ? NewSub : Sub;
1268	NewSrc = NewSrc ? NewSrc : Src;
1269	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1270	N3: Op.getOperand(i: `2`));
1271	return TLO.CombineTo(O: Op, N: NewOp);
1272	}
1273	}
1274	break;
1275	}
1276	case ISD::EXTRACT_SUBVECTOR: {
1277	if (VT.isScalableVector())
1278	return false;
1279	// Offset the demanded elts by the subvector index.
1280	SDValue Src = Op.getOperand(i: `0`);
1281	if (Src.getValueType().isScalableVector())
1282	break;
1283	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
1284	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1285	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1286
1287	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1288	Depth: Depth + `1`))
1289	return true;
1290
1291	// Attempt to avoid multi-use src if we don't need anything from it.
1292	if (!DemandedBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
1293	SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1294	Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1295	if (DemandedSrc) {
1296	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1297	N2: Op.getOperand(i: `1`));
1298	return TLO.CombineTo(O: Op, N: NewOp);
1299	}
1300	}
1301	break;
1302	}
1303	case ISD::CONCAT_VECTORS: {
1304	if (VT.isScalableVector())
1305	return false;
1306	Known.Zero.setAllBits();
1307	Known.One.setAllBits();
1308	EVT SubVT = Op.getOperand(i: `0`).getValueType();
1309	unsigned NumSubVecs = Op.getNumOperands();
1310	unsigned NumSubElts = SubVT.getVectorNumElements();
1311	for (unsigned i = `0`; i != NumSubVecs; ++i) {
1312	APInt DemandedSubElts =
1313	DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1314	if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1315	Known&: Known2, TLO, Depth: Depth + `1`))
1316	return true;
1317	// Known bits are shared by every demanded subvector element.
1318	if (!!DemandedSubElts)
1319	Known = Known.intersectWith(RHS: Known2);
1320	}
1321	break;
1322	}
1323	case ISD::VECTOR_SHUFFLE: {
1324	assert(!VT.isScalableVector());
1325	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1326
1327	// Collect demanded elements from shuffle operands..
1328	APInt DemandedLHS, DemandedRHS;
1329	if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1330	DemandedRHS))
1331	break;
1332
1333	if (!!DemandedLHS \|\| !!DemandedRHS) {
1334	SDValue Op0 = Op.getOperand(i: `0`);
1335	SDValue Op1 = Op.getOperand(i: `1`);
1336
1337	Known.Zero.setAllBits();
1338	Known.One.setAllBits();
1339	if (!!DemandedLHS) {
1340	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1341	Depth: Depth + `1`))
1342	return true;
1343	Known = Known.intersectWith(RHS: Known2);
1344	}
1345	if (!!DemandedRHS) {
1346	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1347	Depth: Depth + `1`))
1348	return true;
1349	Known = Known.intersectWith(RHS: Known2);
1350	}
1351
1352	// Attempt to avoid multi-use ops if we don't need anything from them.
1353	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1354	Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1355	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1356	Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + `1`);
1357	if (DemandedOp0 \|\| DemandedOp1) {
1358	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1359	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1360	SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1361	return TLO.CombineTo(O: Op, N: NewOp);
1362	}
1363	}
1364	break;
1365	}
1366	case ISD::AND: {
1367	SDValue Op0 = Op.getOperand(i: `0`);
1368	SDValue Op1 = Op.getOperand(i: `1`);
1369
1370	// If the RHS is a constant, check to see if the LHS would be zero without
1371	// using the bits from the RHS. Below, we use knowledge about the RHS to
1372	// simplify the LHS, here we're using information from the LHS to simplify
1373	// the RHS.
1374	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1)) {
1375	// Do not increment Depth here; that can cause an infinite loop.
1376	KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1377	// If the LHS already has zeros where RHSC does, this 'and' is dead.
1378	if ((LHSKnown.Zero & DemandedBits) ==
1379	(~RHSC->getAPIntValue() & DemandedBits))
1380	return TLO.CombineTo(O: Op, N: Op0);
1381
1382	// If any of the set bits in the RHS are known zero on the LHS, shrink
1383	// the constant.
1384	if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1385	DemandedElts, TLO))
1386	return true;
1387
1388	// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1389	// constant, but if this 'and' is only clearing bits that were just set by
1390	// the xor, then this 'and' can be eliminated by shrinking the mask of
1391	// the xor. For example, for a 32-bit X:
1392	// and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1393	if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1394	LHSKnown.One == ~RHSC->getAPIntValue()) {
1395	SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1396	return TLO.CombineTo(O: Op, N: Xor);
1397	}
1398	}
1399
1400	// AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1401	// iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1402	if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1403	(Op0.getOperand(i: `0`).isUndef() \|\|
1404	ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: `0`).getNode())) &&
1405	Op0 ->hasOneUse()) {
1406	unsigned NumSubElts =
1407	Op0.getOperand(i: `1`).getValueType().getVectorNumElements();
1408	unsigned SubIdx = Op0.getConstantOperandVal(i: `2`);
1409	APInt DemandedSub =
1410	APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1411	KnownBits KnownSubMask =
1412	TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + `1`);
1413	if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1414	SDValue NewAnd =
1415	TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: Op1);
1416	SDValue NewInsert =
1417	TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1418	N2: Op0.getOperand(i: `1`), N3: Op0.getOperand(i: `2`));
1419	return TLO.CombineTo(O: Op, N: NewInsert);
1420	}
1421	}
1422
1423	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1424	Depth: Depth + `1`))
1425	return true;
1426	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1427	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1428	Known&: Known2, TLO, Depth: Depth + `1`))
1429	return true;
1430	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1431
1432	// If all of the demanded bits are known one on one side, return the other.
1433	// These bits cannot contribute to the result of the 'and'.
1434	if (DemandedBits.isSubsetOf(RHS: Known2.Zero \| Known.One))
1435	return TLO.CombineTo(O: Op, N: Op0);
1436	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.One))
1437	return TLO.CombineTo(O: Op, N: Op1);
1438	// If all of the demanded bits in the inputs are known zeros, return zero.
1439	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1440	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: dl, VT));
1441	// If the RHS is a constant, see if we can simplify it.
1442	if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1443	TLO))
1444	return true;
1445	// If the operation can be done in a smaller type, do so.
1446	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1447	return true;
1448
1449	// Attempt to avoid multi-use ops if we don't need anything from them.
1450	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1451	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1452	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1453	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1454	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1455	if (DemandedOp0 \|\| DemandedOp1) {
1456	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1457	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1458	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1459	return TLO.CombineTo(O: Op, N: NewOp);
1460	}
1461	}
1462
1463	Known &= Known2;
1464	break;
1465	}
1466	case ISD::OR: {
1467	SDValue Op0 = Op.getOperand(i: `0`);
1468	SDValue Op1 = Op.getOperand(i: `1`);
1469	SDNodeFlags Flags = Op.getNode()->getFlags();
1470	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1471	Depth: Depth + `1`)) {
1472	if (Flags.hasDisjoint()) {
1473	Flags.setDisjoint(false);
1474	Op ->setFlags(Flags);
1475	}
1476	return true;
1477	}
1478	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1479	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1480	Known&: Known2, TLO, Depth: Depth + `1`)) {
1481	if (Flags.hasDisjoint()) {
1482	Flags.setDisjoint(false);
1483	Op ->setFlags(Flags);
1484	}
1485	return true;
1486	}
1487	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1488
1489	// If all of the demanded bits are known zero on one side, return the other.
1490	// These bits cannot contribute to the result of the 'or'.
1491	if (DemandedBits.isSubsetOf(RHS: Known2.One \| Known.Zero))
1492	return TLO.CombineTo(O: Op, N: Op0);
1493	if (DemandedBits.isSubsetOf(RHS: Known.One \| Known2.Zero))
1494	return TLO.CombineTo(O: Op, N: Op1);
1495	// If the RHS is a constant, see if we can simplify it.
1496	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1497	return true;
1498	// If the operation can be done in a smaller type, do so.
1499	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500	return true;
1501
1502	// Attempt to avoid multi-use ops if we don't need anything from them.
1503	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1504	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1506	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1508	if (DemandedOp0 \|\| DemandedOp1) {
1509	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1512	return TLO.CombineTo(O: Op, N: NewOp);
1513	}
1514	}
1515
1516	// (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1\|C2), (and Y, C2))
1517	// TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1518	if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1519	Op0 ->hasOneUse() && Op1 ->hasOneUse()) {
1520	// Attempt to match all commutations - m_c_Or would've been useful!
1521	for (int I = `0`; I != `2`; ++I) {
1522	SDValue X = Op.getOperand(i: I).getOperand(i: `0`);
1523	SDValue C1 = Op.getOperand(i: I).getOperand(i: `1`);
1524	SDValue Alt = Op.getOperand(i: `1` - I).getOperand(i: `0`);
1525	SDValue C2 = Op.getOperand(i: `1` - I).getOperand(i: `1`);
1526	if (Alt.getOpcode() == ISD::OR) {
1527	for (int J = `0`; J != `2`; ++J) {
1528	if (X == Alt.getOperand(i: J)) {
1529	SDValue Y = Alt.getOperand(i: `1` - J);
1530	if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1531	Ops: {C1, C2})) {
1532	SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1533	SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1534	return TLO.CombineTo(
1535	O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1536	}
1537	}
1538	}
1539	}
1540	}
1541	}
1542
1543	Known \|= Known2;
1544	break;
1545	}
1546	case ISD::XOR: {
1547	SDValue Op0 = Op.getOperand(i: `0`);
1548	SDValue Op1 = Op.getOperand(i: `1`);
1549
1550	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1551	Depth: Depth + `1`))
1552	return true;
1553	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1554	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1555	Depth: Depth + `1`))
1556	return true;
1557	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1558
1559	// If all of the demanded bits are known zero on one side, return the other.
1560	// These bits cannot contribute to the result of the 'xor'.
1561	if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1562	return TLO.CombineTo(O: Op, N: Op0);
1563	if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1564	return TLO.CombineTo(O: Op, N: Op1);
1565	// If the operation can be done in a smaller type, do so.
1566	if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1567	return true;
1568
1569	// If all of the unknown bits are known to be zero on one side or the other
1570	// turn this into an inclusive* or.*
1571	// e.g. (A & C1)^(B & C2) -> (A & C1)\|(B & C2) iff C1&C2 == 0
1572	if (DemandedBits.isSubsetOf(RHS: Known.Zero \| Known2.Zero))
1573	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1574
1575	ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1576	if (C) {
1577	// If one side is a constant, and all of the set bits in the constant are
1578	// also known set on the other side, turn this into an AND, as we know
1579	// the bits will be cleared.
1580	// e.g. (X \| C1) ^ C2 --> (X \| C1) & ~C2 iff (C1&C2) == C2
1581	// NB: it is okay if more bits are known than are requested
1582	if (C->getAPIntValue() == Known2.One) {
1583	SDValue ANDC =
1584	TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1585	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1586	}
1587
1588	// If the RHS is a constant, see if we can change it. Don't alter a -1
1589	// constant because that's a 'not' op, and that is better for combining
1590	// and codegen.
1591	if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1592	// We're flipping all demanded bits. Flip the undemanded bits too.
1593	SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1594	return TLO.CombineTo(O: Op, N: New);
1595	}
1596
1597	unsigned Op0Opcode = Op0.getOpcode();
1598	if ((Op0Opcode == ISD::SRL \|\| Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1599	if (ConstantSDNode *ShiftC =
1600	isConstOrConstSplat(N: Op0.getOperand(i: `1`), DemandedElts)) {
1601	// Don't crash on an oversized shift. We can not guarantee that a
1602	// bogus shift has been simplified to undef.
1603	if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1604	uint64_t ShiftAmt = ShiftC->getZExtValue();
1605	APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1606	Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1607	: Ones.lshr(shiftAmt: ShiftAmt);
1608	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1609	if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1610	TLI.isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1611	// If the xor constant is a demanded mask, do a 'not' before the
1612	// shift:
1613	// xor (X << ShiftC), XorC --> (not X) << ShiftC
1614	// xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1615	SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: `0`), VT);
1616	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1617	N2: Op0.getOperand(i: `1`)));
1618	}
1619	}
1620	}
1621	}
1622	}
1623
1624	// If we can't turn this into a 'not', try to shrink the constant.
1625	if (!C \|\| !C->isAllOnes())
1626	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1627	return true;
1628
1629	// Attempt to avoid multi-use ops if we don't need anything from them.
1630	if (!DemandedBits.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1631	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1632	Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1633	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1634	Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1635	if (DemandedOp0 \|\| DemandedOp1) {
1636	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1637	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1638	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1639	return TLO.CombineTo(O: Op, N: NewOp);
1640	}
1641	}
1642
1643	Known ^= Known2;
1644	break;
1645	}
1646	case ISD::SELECT:
1647	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1648	Known, TLO, Depth: Depth + `1`))
1649	return true;
1650	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1651	Known&: Known2, TLO, Depth: Depth + `1`))
1652	return true;
1653	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1654	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1655
1656	// If the operands are constants, see if we can simplify them.
1657	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1658	return true;
1659
1660	// Only known if known in both the LHS and RHS.
1661	Known = Known.intersectWith(RHS: Known2);
1662	break;
1663	case ISD::VSELECT:
1664	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1665	Known, TLO, Depth: Depth + `1`))
1666	return true;
1667	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1668	Known&: Known2, TLO, Depth: Depth + `1`))
1669	return true;
1670	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1671	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1672
1673	// Only known if known in both the LHS and RHS.
1674	Known = Known.intersectWith(RHS: Known2);
1675	break;
1676	case ISD::SELECT_CC:
1677	if (SimplifyDemandedBits(Op: Op.getOperand(i: `3`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1678	Known, TLO, Depth: Depth + `1`))
1679	return true;
1680	if (SimplifyDemandedBits(Op: Op.getOperand(i: `2`), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1681	Known&: Known2, TLO, Depth: Depth + `1`))
1682	return true;
1683	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1684	assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1685
1686	// If the operands are constants, see if we can simplify them.
1687	if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1688	return true;
1689
1690	// Only known if known in both the LHS and RHS.
1691	Known = Known.intersectWith(RHS: Known2);
1692	break;
1693	case ISD::SETCC: {
1694	SDValue Op0 = Op.getOperand(i: `0`);
1695	SDValue Op1 = Op.getOperand(i: `1`);
1696	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
1697	// If (1) we only need the sign-bit, (2) the setcc operands are the same
1698	// width as the setcc result, and (3) the result of a setcc conforms to 0 or
1699	// -1, we may be able to bypass the setcc.
1700	if (DemandedBits.isSignMask() &&
1701	Op0.getScalarValueSizeInBits() == BitWidth &&
1702	getBooleanContents(Type: Op0.getValueType()) ==
1703	BooleanContent::ZeroOrNegativeOneBooleanContent) {
1704	// If we're testing X < 0, then this compare isn't needed - just use X!
1705	// FIXME: We're limiting to integer types here, but this should also work
1706	// if we don't care about FP signed-zero. The use of SETLT with FP means
1707	// that we don't care about NaNs.
1708	if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1709	(isNullConstant(V: Op1) \|\| ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1710	return TLO.CombineTo(O: Op, N: Op0);
1711
1712	// TODO: Should we check for other forms of sign-bit comparisons?
1713	// Examples: X <= -1, X >= 0
1714	}
1715	if (getBooleanContents(Type: Op0.getValueType()) ==
1716	TargetLowering::ZeroOrOneBooleanContent &&
1717	BitWidth > `1`)
1718	Known.Zero.setBitsFrom(`1`);
1719	break;
1720	}
1721	case ISD::SHL: {
1722	SDValue Op0 = Op.getOperand(i: `0`);
1723	SDValue Op1 = Op.getOperand(i: `1`);
1724	EVT ShiftVT = Op1.getValueType();
1725
1726	if (const APInt *SA =
1727	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1728	unsigned ShAmt = SA->getZExtValue();
1729	if (ShAmt == `0`)
1730	return TLO.CombineTo(O: Op, N: Op0);
1731
1732	// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1733	// single shift. We can do this if the bottom bits (which are shifted
1734	// out) are never demanded.
1735	// TODO - support non-uniform vector amounts.
1736	if (Op0.getOpcode() == ISD::SRL) {
1737	if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1738	if (const APInt *SA2 =
1739	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1740	unsigned C1 = SA2->getZExtValue();
1741	unsigned Opc = ISD::SHL;
1742	int Diff = ShAmt - C1;
1743	if (Diff < `0`) {
1744	Diff = -Diff;
1745	Opc = ISD::SRL;
1746	}
1747	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1748	return TLO.CombineTo(
1749	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1750	}
1751	}
1752	}
1753
1754	// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1755	// are not demanded. This will likely allow the anyext to be folded away.
1756	// TODO - support non-uniform vector amounts.
1757	if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1758	SDValue InnerOp = Op0.getOperand(i: `0`);
1759	EVT InnerVT = InnerOp.getValueType();
1760	unsigned InnerBits = InnerVT.getScalarSizeInBits();
1761	if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1762	isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1763	SDValue NarrowShl = TLO.DAG.getNode(
1764	Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1765	N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1766	return TLO.CombineTo(
1767	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1768	}
1769
1770	// Repeat the SHL optimization above in cases where an extension
1771	// intervenes: (shl (anyext (shr x, c1)), c2) to
1772	// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1773	// aren't demanded (as above) and that the shifted upper c1 bits of
1774	// x aren't demanded.
1775	// TODO - support non-uniform vector amounts.
1776	if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1777	InnerOp.hasOneUse()) {
1778	if (const APInt *SA2 =
1779	TLO.DAG.getValidShiftAmountConstant(V: InnerOp, DemandedElts)) {
1780	unsigned InnerShAmt = SA2->getZExtValue();
1781	if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1782	DemandedBits.getActiveBits() <=
1783	(InnerBits - InnerShAmt + ShAmt) &&
1784	DemandedBits.countr_zero() >= ShAmt) {
1785	SDValue NewSA =
1786	TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1787	SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1788	Operand: InnerOp.getOperand(i: `0`));
1789	return TLO.CombineTo(
1790	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1791	}
1792	}
1793	}
1794	}
1795
1796	APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1797	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1798	Depth: Depth + `1`)) {
1799	SDNodeFlags Flags = Op.getNode()->getFlags();
1800	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1801	// Disable the nsw and nuw flags. We can no longer guarantee that we
1802	// won't wrap after simplification.
1803	Flags.setNoSignedWrap(false);
1804	Flags.setNoUnsignedWrap(false);
1805	Op ->setFlags(Flags);
1806	}
1807	return true;
1808	}
1809	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1810	Known.Zero <<= ShAmt;
1811	Known.One <<= ShAmt;
1812	// low bits known zero.
1813	Known.Zero.setLowBits(ShAmt);
1814
1815	// Attempt to avoid multi-use ops if we don't need anything from them.
1816	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1817	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1818	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1819	if (DemandedOp0) {
1820	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1821	return TLO.CombineTo(O: Op, N: NewOp);
1822	}
1823	}
1824
1825	// Try shrinking the operation as long as the shift amount will still be
1826	// in range.
1827	if ((ShAmt < DemandedBits.getActiveBits()) &&
1828	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1829	return true;
1830
1831	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1832	// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1833	// Only do this if we demand the upper half so the knownbits are correct.
1834	unsigned HalfWidth = BitWidth / `2`;
1835	if ((BitWidth % `2`) == `0` && !VT.isVector() && ShAmt < HalfWidth &&
1836	DemandedBits.countLeadingOnes() >= HalfWidth) {
1837	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1838	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1839	isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1840	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1841	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1842	// If we're demanding the upper bits at all, we must ensure
1843	// that the upper bits of the shift result are known to be zero,
1844	// which is equivalent to the narrow shift being NUW.
1845	if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1846	bool IsNSW = Known.countMinSignBits() > HalfWidth;
1847	SDNodeFlags Flags;
1848	Flags.setNoSignedWrap(IsNSW);
1849	Flags.setNoUnsignedWrap(IsNUW);
1850	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1851	SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1852	Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1853	SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1854	N2: NewShiftAmt, Flags);
1855	SDValue NewExt =
1856	TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1857	return TLO.CombineTo(O: Op, N: NewExt);
1858	}
1859	}
1860	}
1861	} else {
1862	// This is a variable shift, so we can't shift the demand mask by a known
1863	// amount. But if we are not demanding high bits, then we are not
1864	// demanding those bits from the pre-shifted operand either.
1865	if (unsigned CTLZ = DemandedBits.countl_zero()) {
1866	APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1867	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1868	Depth: Depth + `1`)) {
1869	SDNodeFlags Flags = Op.getNode()->getFlags();
1870	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
1871	// Disable the nsw and nuw flags. We can no longer guarantee that we
1872	// won't wrap after simplification.
1873	Flags.setNoSignedWrap(false);
1874	Flags.setNoUnsignedWrap(false);
1875	Op ->setFlags(Flags);
1876	}
1877	return true;
1878	}
1879	Known.resetAll();
1880	}
1881	}
1882
1883	// If we are only demanding sign bits then we can use the shift source
1884	// directly.
1885	if (const APInt *MaxSA =
1886	TLO.DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
1887	unsigned ShAmt = MaxSA->getZExtValue();
1888	unsigned NumSignBits =
1889	TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
1890	unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1891	if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1892	return TLO.CombineTo(O: Op, N: Op0);
1893	}
1894	break;
1895	}
1896	case ISD::SRL: {
1897	SDValue Op0 = Op.getOperand(i: `0`);
1898	SDValue Op1 = Op.getOperand(i: `1`);
1899	EVT ShiftVT = Op1.getValueType();
1900
1901	// Try to match AVG patterns.
1902	if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
1903	DemandedElts, Depth: Depth + `1`))
1904	return TLO.CombineTo(O: Op, N: AVG);
1905
1906	if (const APInt *SA =
1907	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1908	unsigned ShAmt = SA->getZExtValue();
1909	if (ShAmt == `0`)
1910	return TLO.CombineTo(O: Op, N: Op0);
1911
1912	// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1913	// single shift. We can do this if the top bits (which are shifted out)
1914	// are never demanded.
1915	// TODO - support non-uniform vector amounts.
1916	if (Op0.getOpcode() == ISD::SHL) {
1917	if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1918	if (const APInt *SA2 =
1919	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1920	unsigned C1 = SA2->getZExtValue();
1921	unsigned Opc = ISD::SRL;
1922	int Diff = ShAmt - C1;
1923	if (Diff < `0`) {
1924	Diff = -Diff;
1925	Opc = ISD::SHL;
1926	}
1927	SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1928	return TLO.CombineTo(
1929	O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: `0`), N2: NewSA));
1930	}
1931	}
1932	}
1933
1934	APInt InDemandedMask = (DemandedBits << ShAmt);
1935
1936	// If the shift is exact, then it does demand the low bits (and knows that
1937	// they are zero).
1938	if (Op ->getFlags().hasExact())
1939	InDemandedMask.setLowBits(ShAmt);
1940
1941	// Narrow shift to lower half - similar to ShrinkDemandedOp.
1942	// (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1943	if ((BitWidth % `2`) == `0` && !VT.isVector()) {
1944	APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / `2`);
1945	EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / `2`);
1946	if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1947	isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
1948	isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1949	(!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
1950	((InDemandedMask.countLeadingZeros() >= (BitWidth / `2`)) \|\|
1951	TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
1952	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1953	SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1954	Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1955	SDValue NewShift =
1956	TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
1957	return TLO.CombineTo(
1958	O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
1959	}
1960	}
1961
1962	// Compute the new bits that are at the top now.
1963	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1964	Depth: Depth + `1`))
1965	return true;
1966	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1967	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
1968	Known.One.lshrInPlace(ShiftAmt: ShAmt);
1969	// High bits known zero.
1970	Known.Zero.setHighBits(ShAmt);
1971
1972	// Attempt to avoid multi-use ops if we don't need anything from them.
1973	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
1974	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1975	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
1976	if (DemandedOp0) {
1977	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1978	return TLO.CombineTo(O: Op, N: NewOp);
1979	}
1980	}
1981	} else {
1982	// Use generic knownbits computation as it has support for non-uniform
1983	// shift amounts.
1984	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1985	}
1986	break;
1987	}
1988	case ISD::SRA: {
1989	SDValue Op0 = Op.getOperand(i: `0`);
1990	SDValue Op1 = Op.getOperand(i: `1`);
1991	EVT ShiftVT = Op1.getValueType();
1992
1993	// If we only want bits that already match the signbit then we don't need
1994	// to shift.
1995	unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1996	if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`) >=
1997	NumHiDemandedBits)
1998	return TLO.CombineTo(O: Op, N: Op0);
1999
2000	// If this is an arithmetic shift right and only the low-bit is set, we can
2001	// always convert this into a logical shr, even if the shift amount is
2002	// variable. The low bit of the shift cannot be an input sign bit unless
2003	// the shift amount is >= the size of the datatype, which is undefined.
2004	if (DemandedBits.isOne())
2005	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2006
2007	// Try to match AVG patterns.
2008	if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
2009	DemandedElts, Depth: Depth + `1`))
2010	return TLO.CombineTo(O: Op, N: AVG);
2011
2012	if (const APInt *SA =
2013	TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
2014	unsigned ShAmt = SA->getZExtValue();
2015	if (ShAmt == `0`)
2016	return TLO.CombineTo(O: Op, N: Op0);
2017
2018	// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2019	// supports sext_inreg.
2020	if (Op0.getOpcode() == ISD::SHL) {
2021	if (const APInt *InnerSA =
2022	TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
2023	unsigned LowBits = BitWidth - ShAmt;
2024	EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2025	if (VT.isVector())
2026	ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2027	EC: VT.getVectorElementCount());
2028
2029	if (*InnerSA == ShAmt) {
2030	if (!TLO.LegalOperations() \|\|
2031	getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2032	return TLO.CombineTo(
2033	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2034	N1: Op0.getOperand(i: `0`),
2035	N2: TLO.DAG.getValueType(ExtVT)));
2036
2037	// Even if we can't convert to sext_inreg, we might be able to
2038	// remove this shift pair if the input is already sign extended.
2039	unsigned NumSignBits =
2040	TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: `0`), DemandedElts);
2041	if (NumSignBits > ShAmt)
2042	return TLO.CombineTo(O: Op, N: Op0.getOperand(i: `0`));
2043	}
2044	}
2045	}
2046
2047	APInt InDemandedMask = (DemandedBits << ShAmt);
2048
2049	// If the shift is exact, then it does demand the low bits (and knows that
2050	// they are zero).
2051	if (Op ->getFlags().hasExact())
2052	InDemandedMask.setLowBits(ShAmt);
2053
2054	// If any of the demanded bits are produced by the sign extension, we also
2055	// demand the input sign bit.
2056	if (DemandedBits.countl_zero() < ShAmt)
2057	InDemandedMask.setSignBit();
2058
2059	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2060	Depth: Depth + `1`))
2061	return true;
2062	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2063	Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2064	Known.One.lshrInPlace(ShiftAmt: ShAmt);
2065
2066	// If the input sign bit is known to be zero, or if none of the top bits
2067	// are demanded, turn this into an unsigned shift right.
2068	if (Known.Zero [BitWidth - ShAmt - `1`] \|\|
2069	DemandedBits.countl_zero() >= ShAmt) {
2070	SDNodeFlags Flags;
2071	Flags.setExact(Op ->getFlags().hasExact());
2072	return TLO.CombineTo(
2073	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2074	}
2075
2076	int Log2 = DemandedBits.exactLogBase2();
2077	if (Log2 >= `0`) {
2078	// The bit must come from the sign.
2079	SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - `1` - Log2, DL: dl, VT: ShiftVT);
2080	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2081	}
2082
2083	if (Known.One [BitWidth - ShAmt - `1`])
2084	// New bits are known one.
2085	Known.One.setHighBits(ShAmt);
2086
2087	// Attempt to avoid multi-use ops if we don't need anything from them.
2088	if (!InDemandedMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2089	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090	Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2091	if (DemandedOp0) {
2092	SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2093	return TLO.CombineTo(O: Op, N: NewOp);
2094	}
2095	}
2096	}
2097	break;
2098	}
2099	case ISD::FSHL:
2100	case ISD::FSHR: {
2101	SDValue Op0 = Op.getOperand(i: `0`);
2102	SDValue Op1 = Op.getOperand(i: `1`);
2103	SDValue Op2 = Op.getOperand(i: `2`);
2104	bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2105
2106	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2107	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2108
2109	// For fshl, 0-shift returns the 1st arg.
2110	// For fshr, 0-shift returns the 2nd arg.
2111	if (Amt == `0`) {
2112	if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2113	Known, TLO, Depth: Depth + `1`))
2114	return true;
2115	break;
2116	}
2117
2118	// fshl: (Op0 << Amt) \| (Op1 >> (BW - Amt))
2119	// fshr: (Op0 << (BW - Amt)) \| (Op1 >> Amt)
2120	APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2121	APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2122	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2123	Depth: Depth + `1`))
2124	return true;
2125	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2126	Depth: Depth + `1`))
2127	return true;
2128
2129	Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2130	Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131	Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2132	Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2133	Known = Known.unionWith(RHS: Known2);
2134
2135	// Attempt to avoid multi-use ops if we don't need anything from them.
2136	if (!Demanded0.isAllOnes() \|\| !Demanded1.isAllOnes() \|\|
2137	!DemandedElts.isAllOnes()) {
2138	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2139	Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2140	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2141	Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2142	if (DemandedOp0 \|\| DemandedOp1) {
2143	DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2144	DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2145	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2146	N2: DemandedOp1, N3: Op2);
2147	return TLO.CombineTo(O: Op, N: NewOp);
2148	}
2149	}
2150	}
2151
2152	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2153	if (isPowerOf2_32(Value: BitWidth)) {
2154	APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - `1`);
2155	if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2156	Known&: Known2, TLO, Depth: Depth + `1`))
2157	return true;
2158	}
2159	break;
2160	}
2161	case ISD::ROTL:
2162	case ISD::ROTR: {
2163	SDValue Op0 = Op.getOperand(i: `0`);
2164	SDValue Op1 = Op.getOperand(i: `1`);
2165	bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2166
2167	// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2168	if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`))
2169	return TLO.CombineTo(O: Op, N: Op0);
2170
2171	if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2172	unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2173	unsigned RevAmt = BitWidth - Amt;
2174
2175	// rotl: (Op0 << Amt) \| (Op0 >> (BW - Amt))
2176	// rotr: (Op0 << (BW - Amt)) \| (Op0 >> Amt)
2177	APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2178	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2179	Depth: Depth + `1`))
2180	return true;
2181
2182	// rot(x, 0) --> x*
2183	if (Amt == `0`)
2184	return TLO.CombineTo(O: Op, N: Op0);
2185
2186	// See if we don't demand either half of the rotated bits.
2187	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SHL, VT)) &&
2188	DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2189	Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2190	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2191	}
2192	if ((!TLO.LegalOperations() \|\| isOperationLegal(Op: ISD::SRL, VT)) &&
2193	DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2194	Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2195	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2196	}
2197	}
2198
2199	// For pow-2 bitwidths we only demand the bottom modulo amt bits.
2200	if (isPowerOf2_32(Value: BitWidth)) {
2201	APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - `1`);
2202	if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2203	Depth: Depth + `1`))
2204	return true;
2205	}
2206	break;
2207	}
2208	case ISD::SMIN:
2209	case ISD::SMAX:
2210	case ISD::UMIN:
2211	case ISD::UMAX: {
2212	unsigned Opc = Op.getOpcode();
2213	SDValue Op0 = Op.getOperand(i: `0`);
2214	SDValue Op1 = Op.getOperand(i: `1`);
2215
2216	// If we're only demanding signbits, then we can simplify to OR/AND node.
2217	unsigned BitOp =
2218	(Opc == ISD::SMIN \|\| Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2219	unsigned NumSignBits =
2220	std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + `1`),
2221	b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + `1`));
2222	unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2223	if (NumSignBits >= NumDemandedUpperBits)
2224	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc (Op), VT, N1: Op0, N2: Op1));
2225
2226	// Check if one arg is always less/greater than (or equal) to the other arg.
2227	KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2228	KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + `1`);
2229	switch (Opc) {
2230	case ISD::SMIN:
2231	if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2232	return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2233	if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2234	return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2235	Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2236	break;
2237	case ISD::SMAX:
2238	if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2239	return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2240	if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2241	return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2242	Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2243	break;
2244	case ISD::UMIN:
2245	if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2246	return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2247	if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2248	return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2249	Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2250	break;
2251	case ISD::UMAX:
2252	if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2253	return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2254	if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2255	return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2256	Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2257	break;
2258	}
2259	break;
2260	}
2261	case ISD::BITREVERSE: {
2262	SDValue Src = Op.getOperand(i: `0`);
2263	APInt DemandedSrcBits = DemandedBits.reverseBits();
2264	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2265	Depth: Depth + `1`))
2266	return true;
2267	Known.One = Known2.One.reverseBits();
2268	Known.Zero = Known2.Zero.reverseBits();
2269	break;
2270	}
2271	case ISD::BSWAP: {
2272	SDValue Src = Op.getOperand(i: `0`);
2273
2274	// If the only bits demanded come from one byte of the bswap result,
2275	// just shift the input byte into position to eliminate the bswap.
2276	unsigned NLZ = DemandedBits.countl_zero();
2277	unsigned NTZ = DemandedBits.countr_zero();
2278
2279	// Round NTZ down to the next byte. If we have 11 trailing zeros, then
2280	// we need all the bits down to bit 8. Likewise, round NLZ. If we
2281	// have 14 leading zeros, round to 8.
2282	NLZ = alignDown(Value: NLZ, Align: `8`);
2283	NTZ = alignDown(Value: NTZ, Align: `8`);
2284	// If we need exactly one byte, we can do this transformation.
2285	if (BitWidth - NLZ - NTZ == `8`) {
2286	// Replace this with either a left or right shift to get the byte into
2287	// the right place.
2288	unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2289	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: ShiftOpcode, VT)) {
2290	unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2291	SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2292	SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2293	return TLO.CombineTo(O: Op, N: NewOp);
2294	}
2295	}
2296
2297	APInt DemandedSrcBits = DemandedBits.byteSwap();
2298	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2299	Depth: Depth + `1`))
2300	return true;
2301	Known.One = Known2.One.byteSwap();
2302	Known.Zero = Known2.Zero.byteSwap();
2303	break;
2304	}
2305	case ISD::CTPOP: {
2306	// If only 1 bit is demanded, replace with PARITY as long as we're before
2307	// op legalization.
2308	// FIXME: Limit to scalars for now.
2309	if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2310	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2311	Operand: Op.getOperand(i: `0`)));
2312
2313	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2314	break;
2315	}
2316	case ISD::SIGN_EXTEND_INREG: {
2317	SDValue Op0 = Op.getOperand(i: `0`);
2318	EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2319	unsigned ExVTBits = ExVT.getScalarSizeInBits();
2320
2321	// If we only care about the highest bit, don't bother shifting right.
2322	if (DemandedBits.isSignMask()) {
2323	unsigned MinSignedBits =
2324	TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + `1`);
2325	bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2326	// However if the input is already sign extended we expect the sign
2327	// extension to be dropped altogether later and do not simplify.
2328	if (!AlreadySignExtended) {
2329	// Compute the correct shift amount type, which must be getShiftAmountTy
2330	// for scalar types after legalization.
2331	SDValue ShiftAmt =
2332	TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2333	return TLO.CombineTo(O: Op,
2334	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2335	}
2336	}
2337
2338	// If none of the extended bits are demanded, eliminate the sextinreg.
2339	if (DemandedBits.getActiveBits() <= ExVTBits)
2340	return TLO.CombineTo(O: Op, N: Op0);
2341
2342	APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2343
2344	// Since the sign extended bits are demanded, we know that the sign
2345	// bit is demanded.
2346	InputDemandedBits.setBit(ExVTBits - `1`);
2347
2348	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2349	Depth: Depth + `1`))
2350	return true;
2351	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2352
2353	// If the sign bit of the input is known set or clear, then we know the
2354	// top bits of the result.
2355
2356	// If the input sign bit is known zero, convert this into a zero extension.
2357	if (Known.Zero [ExVTBits - `1`])
2358	return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2359
2360	APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2361	if (Known.One [ExVTBits - `1`]) { // Input sign bit known set
2362	Known.One.setBitsFrom(ExVTBits);
2363	Known.Zero &= Mask;
2364	} else { // Input sign bit unknown
2365	Known.Zero &= Mask;
2366	Known.One &= Mask;
2367	}
2368	break;
2369	}
2370	case ISD::BUILD_PAIR: {
2371	EVT HalfVT = Op.getOperand(i: `0`).getValueType();
2372	unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2373
2374	APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2375	APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2376
2377	KnownBits KnownLo, KnownHi;
2378
2379	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + `1`))
2380	return true;
2381
2382	if (SimplifyDemandedBits(Op: Op.getOperand(i: `1`), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + `1`))
2383	return true;
2384
2385	Known = KnownHi.concat(Lo: KnownLo);
2386	break;
2387	}
2388	case ISD::ZERO_EXTEND_VECTOR_INREG:
2389	if (VT.isScalableVector())
2390	return false;
2391	[[fallthrough]];
2392	case ISD::ZERO_EXTEND: {
2393	SDValue Src = Op.getOperand(i: `0`);
2394	EVT SrcVT = Src.getValueType();
2395	unsigned InBits = SrcVT.getScalarSizeInBits();
2396	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2397	bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2398
2399	// If none of the top bits are demanded, convert this into an any_extend.
2400	if (DemandedBits.getActiveBits() <= InBits) {
2401	// If we only need the non-extended bits of the bottom element
2402	// then we can just bitcast to the result.
2403	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2404	VT.getSizeInBits() == SrcVT.getSizeInBits())
2405	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2406
2407	unsigned Opc =
2408	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2409	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2410	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2411	}
2412
2413	SDNodeFlags Flags = Op ->getFlags();
2414	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2415	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2416	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2417	Depth: Depth + `1`)) {
2418	if (Flags.hasNonNeg()) {
2419	Flags.setNonNeg(false);
2420	Op ->setFlags(Flags);
2421	}
2422	return true;
2423	}
2424	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2425	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2426	Known = Known.zext(BitWidth);
2427
2428	// Attempt to avoid multi-use ops if we don't need anything from them.
2429	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2430	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2431	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2432	break;
2433	}
2434	case ISD::SIGN_EXTEND_VECTOR_INREG:
2435	if (VT.isScalableVector())
2436	return false;
2437	[[fallthrough]];
2438	case ISD::SIGN_EXTEND: {
2439	SDValue Src = Op.getOperand(i: `0`);
2440	EVT SrcVT = Src.getValueType();
2441	unsigned InBits = SrcVT.getScalarSizeInBits();
2442	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2443	bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2444
2445	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2446	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2447
2448	// Since some of the sign extended bits are demanded, we know that the sign
2449	// bit is demanded.
2450	InDemandedBits.setBit(InBits - `1`);
2451
2452	// If none of the top bits are demanded, convert this into an any_extend.
2453	if (DemandedBits.getActiveBits() <= InBits) {
2454	// If we only need the non-extended bits of the bottom element
2455	// then we can just bitcast to the result.
2456	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2457	VT.getSizeInBits() == SrcVT.getSizeInBits())
2458	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2459
2460	// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2461	if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent \|\|
2462	TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + `1`) !=
2463	InBits) {
2464	unsigned Opc =
2465	IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2466	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT))
2467	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2468	}
2469	}
2470
2471	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2472	Depth: Depth + `1`))
2473	return true;
2474	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2475	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2476
2477	// If the sign bit is known one, the top bits match.
2478	Known = Known.sext(BitWidth);
2479
2480	// If the sign bit is known zero, convert this to a zero extend.
2481	if (Known.isNonNegative()) {
2482	unsigned Opc =
2483	IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2484	if (!TLO.LegalOperations() \|\| isOperationLegal(Op: Opc, VT)) {
2485	SDNodeFlags Flags;
2486	if (!IsVecInReg)
2487	Flags.setNonNeg(true);
2488	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2489	}
2490	}
2491
2492	// Attempt to avoid multi-use ops if we don't need anything from them.
2493	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2494	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2495	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2496	break;
2497	}
2498	case ISD::ANY_EXTEND_VECTOR_INREG:
2499	if (VT.isScalableVector())
2500	return false;
2501	[[fallthrough]];
2502	case ISD::ANY_EXTEND: {
2503	SDValue Src = Op.getOperand(i: `0`);
2504	EVT SrcVT = Src.getValueType();
2505	unsigned InBits = SrcVT.getScalarSizeInBits();
2506	unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : `1`;
2507	bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2508
2509	// If we only need the bottom element then we can just bitcast.
2510	// TODO: Handle ANY_EXTEND?
2511	if (IsLE && IsVecInReg && DemandedElts == `1` &&
2512	VT.getSizeInBits() == SrcVT.getSizeInBits())
2513	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2514
2515	APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2516	APInt InDemandedElts = DemandedElts.zext(width: InElts);
2517	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2518	Depth: Depth + `1`))
2519	return true;
2520	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2521	assert(Known.getBitWidth() == InBits && "Src width has changed?");
2522	Known = Known.anyext(BitWidth);
2523
2524	// Attempt to avoid multi-use ops if we don't need anything from them.
2525	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2526	Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2527	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2528	break;
2529	}
2530	case ISD::TRUNCATE: {
2531	SDValue Src = Op.getOperand(i: `0`);
2532
2533	// Simplify the input, using demanded bit information, and compute the known
2534	// zero/one bits live out.
2535	unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2536	APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2537	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2538	Depth: Depth + `1`))
2539	return true;
2540	Known = Known.trunc(BitWidth);
2541
2542	// Attempt to avoid multi-use ops if we don't need anything from them.
2543	if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2544	Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`))
2545	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2546
2547	// If the input is only used by this truncate, see if we can shrink it based
2548	// on the known demanded bits.
2549	switch (Src.getOpcode()) {
2550	default:
2551	break;
2552	case ISD::SRL:
2553	// Shrink SRL by a constant if none of the high bits shifted in are
2554	// demanded.
2555	if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2556	// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2557	// undesirable.
2558	break;
2559
2560	if (Src.getNode()->hasOneUse()) {
2561	const APInt *ShAmtC =
2562	TLO.DAG.getValidShiftAmountConstant(V: Src, DemandedElts);
2563	if (!ShAmtC \|\| ShAmtC->uge(RHS: BitWidth))
2564	break;
2565	uint64_t ShVal = ShAmtC->getZExtValue();
2566
2567	APInt HighBits =
2568	APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2569	HighBits.lshrInPlace(ShiftAmt: ShVal);
2570	HighBits = HighBits.trunc(width: BitWidth);
2571
2572	if (!(HighBits & DemandedBits)) {
2573	// None of the shifted in bits are needed. Add a truncate of the
2574	// shift input, then shift it.
2575	SDValue NewShAmt =
2576	TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl, LegalTypes: TLO.LegalTypes());
2577	SDValue NewTrunc =
2578	TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: `0`));
2579	return TLO.CombineTo(
2580	O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2581	}
2582	}
2583	break;
2584	}
2585
2586	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2587	break;
2588	}
2589	case ISD::AssertZext: {
2590	// AssertZext demands all of the high bits, plus any of the low bits
2591	// demanded by its users.
2592	EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2593	APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2594	if (SimplifyDemandedBits(Op: Op.getOperand(i: `0`), DemandedBits: ~InMask \| DemandedBits, Known,
2595	TLO, Depth: Depth + `1`))
2596	return true;
2597	assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2598
2599	Known.Zero \|= ~InMask;
2600	Known.One &= (~Known.Zero);
2601	break;
2602	}
2603	case ISD::EXTRACT_VECTOR_ELT: {
2604	SDValue Src = Op.getOperand(i: `0`);
2605	SDValue Idx = Op.getOperand(i: `1`);
2606	ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2607	unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2608
2609	if (SrcEltCnt.isScalable())
2610	return false;
2611
2612	// Demand the bits from every vector element without a constant index.
2613	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2614	APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2615	if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2616	if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2617	DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2618
2619	// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2620	// anything about the extended bits.
2621	APInt DemandedSrcBits = DemandedBits;
2622	if (BitWidth > EltBitWidth)
2623	DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2624
2625	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2626	Depth: Depth + `1`))
2627	return true;
2628
2629	// Attempt to avoid multi-use ops if we don't need anything from them.
2630	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2631	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2632	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2633	SDValue NewOp =
2634	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2635	return TLO.CombineTo(O: Op, N: NewOp);
2636	}
2637	}
2638
2639	Known = Known2;
2640	if (BitWidth > EltBitWidth)
2641	Known = Known.anyext(BitWidth);
2642	break;
2643	}
2644	case ISD::BITCAST: {
2645	if (VT.isScalableVector())
2646	return false;
2647	SDValue Src = Op.getOperand(i: `0`);
2648	EVT SrcVT = Src.getValueType();
2649	unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2650
2651	// If this is an FP->Int bitcast and if the sign bit is the only
2652	// thing demanded, turn this into a FGETSIGN.
2653	if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2654	DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2655	SrcVT.isFloatingPoint()) {
2656	bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2657	bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2658	if ((OpVTLegal \|\| i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2659	SrcVT != MVT::f128) {
2660	// Cannot eliminate/lower SHL for f128 yet.
2661	EVT Ty = OpVTLegal ? VT : MVT::i32;
2662	// Make a FGETSIGN + SHL to move the sign bit into the appropriate
2663	// place. We expect the SHL to be eliminated by other optimizations.
2664	SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2665	unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2666	if (!OpVTLegal && OpVTSizeInBits > `32`)
2667	Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2668	unsigned ShVal = Op.getValueSizeInBits() - `1`;
2669	SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2670	return TLO.CombineTo(O: Op,
2671	N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2672	}
2673	}
2674
2675	// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2676	// Demand the elt/bit if any of the original elts/bits are demanded.
2677	if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == `0`) {
2678	unsigned Scale = BitWidth / NumSrcEltBits;
2679	unsigned NumSrcElts = SrcVT.getVectorNumElements();
2680	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2681	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2682	for (unsigned i = `0`; i != Scale; ++i) {
2683	unsigned EltOffset = IsLE ? i : (Scale - `1` - i);
2684	unsigned BitOffset = EltOffset * NumSrcEltBits;
2685	APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2686	if (!Sub.isZero()) {
2687	DemandedSrcBits \|= Sub;
2688	for (unsigned j = `0`; j != NumElts; ++j)
2689	if (DemandedElts [j])
2690	DemandedSrcElts.setBit((j * Scale) + i);
2691	}
2692	}
2693
2694	APInt KnownSrcUndef, KnownSrcZero;
2695	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2696	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2697	return true;
2698
2699	KnownBits KnownSrcBits;
2700	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2701	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2702	return true;
2703	} else if (IsLE && (NumSrcEltBits % BitWidth) == `0`) {
2704	// TODO - bigendian once we have test coverage.
2705	unsigned Scale = NumSrcEltBits / BitWidth;
2706	unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : `1`;
2707	APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2708	APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2709	for (unsigned i = `0`; i != NumElts; ++i)
2710	if (DemandedElts [i]) {
2711	unsigned Offset = (i % Scale) * BitWidth;
2712	DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2713	DemandedSrcElts.setBit(i / Scale);
2714	}
2715
2716	if (SrcVT.isVector()) {
2717	APInt KnownSrcUndef, KnownSrcZero;
2718	if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2719	KnownZero&: KnownSrcZero, TLO, Depth: Depth + `1`))
2720	return true;
2721	}
2722
2723	KnownBits KnownSrcBits;
2724	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2725	Known&: KnownSrcBits, TLO, Depth: Depth + `1`))
2726	return true;
2727
2728	// Attempt to avoid multi-use ops if we don't need anything from them.
2729	if (!DemandedSrcBits.isAllOnes() \|\| !DemandedSrcElts.isAllOnes()) {
2730	if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2731	Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`)) {
2732	SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2733	return TLO.CombineTo(O: Op, N: NewOp);
2734	}
2735	}
2736	}
2737
2738	// If this is a bitcast, let computeKnownBits handle it. Only do this on a
2739	// recursive call where Known may be useful to the caller.
2740	if (Depth > `0`) {
2741	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2742	return false;
2743	}
2744	break;
2745	}
2746	case ISD::MUL:
2747	if (DemandedBits.isPowerOf2()) {
2748	// The LSB of XY is set only if (X & 1) == 1 and (Y & 1) == 1.*
2749	// If we demand exactly one bit N and we have "X (C' << N)" where C' is*
2750	// odd (has LSB set), then the left-shifted low bit of X is the answer.
2751	unsigned CTZ = DemandedBits.countr_zero();
2752	ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: `1`), DemandedElts);
2753	if (C && C->getAPIntValue().countr_zero() == CTZ) {
2754	SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2755	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: AmtC);
2756	return TLO.CombineTo(O: Op, N: Shl);
2757	}
2758	}
2759	// For a squared value "X X", the bottom 2 bits are 0 and X[0] because:*
2760	// X X is odd iff X is odd.*
2761	// 'Quadratic Reciprocity': X X -> 0 for bit[1]*
2762	if (Op.getOperand(i: `0`) == Op.getOperand(i: `1`) && DemandedBits.ult(RHS: `4`)) {
2763	SDValue One = TLO.DAG.getConstant(Val: `1`, DL: dl, VT);
2764	SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: `0`), N2: One);
2765	return TLO.CombineTo(O: Op, N: And1);
2766	}
2767	[[fallthrough]];
2768	case ISD::ADD:
2769	case ISD::SUB: {
2770	// Add, Sub, and Mul don't demand any bits in positions beyond that
2771	// of the highest bit demanded of them.
2772	SDValue Op0 = Op.getOperand(i: `0`), Op1 = Op.getOperand(i: `1`);
2773	SDNodeFlags Flags = Op.getNode()->getFlags();
2774	unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2775	APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2776	KnownBits KnownOp0, KnownOp1;
2777	if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO,
2778	Depth: Depth + `1`) \|\|
2779	SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2780	Depth: Depth + `1`) \|\|
2781	// See if the operation should be performed at a smaller bit width.
2782	ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2783	if (Flags.hasNoSignedWrap() \|\| Flags.hasNoUnsignedWrap()) {
2784	// Disable the nsw and nuw flags. We can no longer guarantee that we
2785	// won't wrap after simplification.
2786	Flags.setNoSignedWrap(false);
2787	Flags.setNoUnsignedWrap(false);
2788	Op ->setFlags(Flags);
2789	}
2790	return true;
2791	}
2792
2793	// neg x with only low bit demanded is simply x.
2794	if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2795	isNullConstant(V: Op0))
2796	return TLO.CombineTo(O: Op, N: Op1);
2797
2798	// Attempt to avoid multi-use ops if we don't need anything from them.
2799	if (!LoMask.isAllOnes() \|\| !DemandedElts.isAllOnes()) {
2800	SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2801	Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2802	SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2803	Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + `1`);
2804	if (DemandedOp0 \|\| DemandedOp1) {
2805	Flags.setNoSignedWrap(false);
2806	Flags.setNoUnsignedWrap(false);
2807	Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2808	Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2809	SDValue NewOp =
2810	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1, Flags);
2811	return TLO.CombineTo(O: Op, N: NewOp);
2812	}
2813	}
2814
2815	// If we have a constant operand, we may be able to turn it into -1 if we
2816	// do not demand the high bits. This can make the constant smaller to
2817	// encode, allow more general folding, or match specialized instruction
2818	// patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2819	// is probably not useful (and could be detrimental).
2820	ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2821	APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2822	if (C && !C->isAllOnes() && !C->isOne() &&
2823	(C->getAPIntValue() \| HighMask).isAllOnes()) {
2824	SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2825	// Disable the nsw and nuw flags. We can no longer guarantee that we
2826	// won't wrap after simplification.
2827	Flags.setNoSignedWrap(false);
2828	Flags.setNoUnsignedWrap(false);
2829	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1, Flags);
2830	return TLO.CombineTo(O: Op, N: NewOp);
2831	}
2832
2833	// Match a multiply with a disguised negated-power-of-2 and convert to a
2834	// an equivalent shift-left amount.
2835	// Example: (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2836	auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2837	if (Mul.getOpcode() != ISD::MUL \|\| !Mul.hasOneUse())
2838	return `0`;
2839
2840	// Don't touch opaque constants. Also, ignore zero and power-of-2
2841	// multiplies. Those will get folded later.
2842	ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: `1`));
2843	if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2844	!MulC->getAPIntValue().isPowerOf2()) {
2845	APInt UnmaskedC = MulC->getAPIntValue() \| HighMask;
2846	if (UnmaskedC.isNegatedPowerOf2())
2847	return (-UnmaskedC).logBase2();
2848	}
2849	return `0`;
2850	};
2851
2852	auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2853	unsigned ShlAmt) {
2854	SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2855	SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2856	SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2857	return TLO.CombineTo(O: Op, N: Res);
2858	};
2859
2860	if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2861	if (Op.getOpcode() == ISD::ADD) {
2862	// (X MulC) + Op1 --> Op1 - (X << log2(-MulC))*
2863	if (unsigned ShAmt = getShiftLeftAmt (Op0))
2864	return foldMul (ISD::SUB, Op0.getOperand(i: `0`), Op1, ShAmt);
2865	// Op0 + (X MulC) --> Op0 - (X << log2(-MulC))*
2866	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2867	return foldMul (ISD::SUB, Op1.getOperand(i: `0`), Op0, ShAmt);
2868	}
2869	if (Op.getOpcode() == ISD::SUB) {
2870	// Op0 - (X MulC) --> Op0 + (X << log2(-MulC))*
2871	if (unsigned ShAmt = getShiftLeftAmt (Op1))
2872	return foldMul (ISD::ADD, Op1.getOperand(i: `0`), Op0, ShAmt);
2873	}
2874	}
2875
2876	if (Op.getOpcode() == ISD::MUL) {
2877	Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2878	} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2879	Known = KnownBits::computeForAddSub(Add: Op.getOpcode() == ISD::ADD,
2880	NSW: Flags.hasNoSignedWrap(), LHS: KnownOp0,
2881	RHS: KnownOp1);
2882	}
2883	break;
2884	}
2885	default:
2886	// We also ask the target about intrinsics (which could be specific to it).
2887	if (Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
2888	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2889	// TODO: Probably okay to remove after audit; here to reduce change size
2890	// in initial enablement patch for scalable vectors
2891	if (Op.getValueType().isScalableVector())
2892	break;
2893	if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2894	Known, TLO, Depth))
2895	return true;
2896	break;
2897	}
2898
2899	// Just use computeKnownBits to compute output bits.
2900	Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2901	break;
2902	}
2903
2904	// If we know the value of all of the demanded bits, return this as a
2905	// constant.
2906	if (!isTargetCanonicalConstantNode(Op) &&
2907	DemandedBits.isSubsetOf(RHS: Known.Zero \| Known.One)) {
2908	// Avoid folding to a constant if any OpaqueConstant is involved.
2909	const SDNode *N = Op.getNode();
2910	for (SDNode *Op :
2911	llvm::make_range(x: SDNodeIterator::begin(N), y: SDNodeIterator::end(N))) {
2912	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op))
2913	if (C->isOpaque())
2914	return false;
2915	}
2916	if (VT.isInteger())
2917	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
2918	if (VT.isFloatingPoint())
2919	return TLO.CombineTo(
2920	O: Op,
2921	N: TLO.DAG.getConstantFP(
2922	Val: APFloat (TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), DL: dl, VT));
2923	}
2924
2925	// A multi use 'all demanded elts' simplify failed to find any knownbits.
2926	// Try again just for the original demanded elts.
2927	// Ensure we do this AFTER constant folding above.
2928	if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2929	Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
2930
2931	return false;
2932	}
2933
2934	bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2935	const APInt &DemandedElts,
2936	DAGCombinerInfo &DCI) const {
2937	SelectionDAG &DAG = DCI.DAG;
2938	TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2939	!DCI.isBeforeLegalizeOps());
2940
2941	APInt KnownUndef, KnownZero;
2942	bool Simplified =
2943	SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
2944	if (Simplified) {
2945	DCI.AddToWorklist(N: Op.getNode());
2946	DCI.CommitTargetLoweringOpt(TLO);
2947	}
2948
2949	return Simplified;
2950	}
2951
2952	/// Given a vector binary operation and known undefined elements for each input
2953	/// operand, compute whether each element of the output is undefined.
2954	static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2955	const APInt &UndefOp0,
2956	const APInt &UndefOp1) {
2957	EVT VT = BO.getValueType();
2958	assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2959	"Vector binop only");
2960
2961	EVT EltVT = VT.getVectorElementType();
2962	unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : `1`;
2963	assert(UndefOp0.getBitWidth() == NumElts &&
2964	UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2965
2966	auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2967	const APInt &UndefVals) {
2968	if (UndefVals [Index])
2969	return DAG.getUNDEF(VT: EltVT);
2970
2971	if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
2972	// Try hard to make sure that the getNode() call is not creating temporary
2973	// nodes. Ignore opaque integers because they do not constant fold.
2974	SDValue Elt = BV->getOperand(Num: Index);
2975	auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
2976	if (isa<ConstantFPSDNode>(Val: Elt) \|\| Elt.isUndef() \|\| (C && !C->isOpaque()))
2977	return Elt;
2978	}
2979
2980	return SDValue ();
2981	};
2982
2983	APInt KnownUndef = APInt::getZero(numBits: NumElts);
2984	for (unsigned i = `0`; i != NumElts; ++i) {
2985	// If both inputs for this element are either constant or undef and match
2986	// the element type, compute the constant/undef result for this element of
2987	// the vector.
2988	// TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2989	// not handle FP constants. The code within getNode() should be refactored
2990	// to avoid the danger of creating a bogus temporary node here.
2991	SDValue C0 = getUndefOrConstantElt (BO.getOperand(i: `0`), i, UndefOp0);
2992	SDValue C1 = getUndefOrConstantElt (BO.getOperand(i: `1`), i, UndefOp1);
2993	if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2994	if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc (BO), VT: EltVT, N1: C0, N2: C1).isUndef())
2995	KnownUndef.setBit(i);
2996	}
2997	return KnownUndef;
2998	}
2999
3000	bool TargetLowering::SimplifyDemandedVectorElts(
3001	SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3002	APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3003	bool AssumeSingleUse) const {
3004	EVT VT = Op.getValueType();
3005	unsigned Opcode = Op.getOpcode();
3006	APInt DemandedElts = OriginalDemandedElts;
3007	unsigned NumElts = DemandedElts.getBitWidth();
3008	assert(VT.isVector() && "Expected vector op");
3009
3010	KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3011
3012	const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3013	if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3014	return false;
3015
3016	// TODO: For now we assume we know nothing about scalable vectors.
3017	if (VT.isScalableVector())
3018	return false;
3019
3020	assert(VT.getVectorNumElements() == NumElts &&
3021	"Mask size mismatches value type element count!");
3022
3023	// Undef operand.
3024	if (Op.isUndef()) {
3025	KnownUndef.setAllBits();
3026	return false;
3027	}
3028
3029	// If Op has other users, assume that all elements are needed.
3030	if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3031	DemandedElts.setAllBits();
3032
3033	// Not demanding any elements from Op.
3034	if (DemandedElts == `0`) {
3035	KnownUndef.setAllBits();
3036	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3037	}
3038
3039	// Limit search depth.
3040	if (Depth >= SelectionDAG::MaxRecursionDepth)
3041	return false;
3042
3043	SDLoc DL(Op);
3044	unsigned EltSizeInBits = VT.getScalarSizeInBits();
3045	bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3046
3047	// Helper for demanding the specified elements and all the bits of both binary
3048	// operands.
3049	auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3050	SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3051	DAG&: TLO.DAG, Depth: Depth + `1`);
3052	SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3053	DAG&: TLO.DAG, Depth: Depth + `1`);
3054	if (NewOp0 \|\| NewOp1) {
3055	SDValue NewOp =
3056	TLO.DAG.getNode(Opcode, DL: SDLoc (Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3057	N2: NewOp1 ? NewOp1 : Op1, Flags: Op ->getFlags());
3058	return TLO.CombineTo(O: Op, N: NewOp);
3059	}
3060	return false;
3061	};
3062
3063	switch (Opcode) {
3064	case ISD::SCALAR_TO_VECTOR: {
3065	if (!DemandedElts [`0`]) {
3066	KnownUndef.setAllBits();
3067	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3068	}
3069	SDValue ScalarSrc = Op.getOperand(i: `0`);
3070	if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3071	SDValue Src = ScalarSrc.getOperand(i: `0`);
3072	SDValue Idx = ScalarSrc.getOperand(i: `1`);
3073	EVT SrcVT = Src.getValueType();
3074
3075	ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3076
3077	if (SrcEltCnt.isScalable())
3078	return false;
3079
3080	unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3081	if (isNullConstant(V: Idx)) {
3082	APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: `0`);
3083	APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3084	APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3085	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3086	TLO, Depth: Depth + `1`))
3087	return true;
3088	}
3089	}
3090	KnownUndef.setHighBits(NumElts - `1`);
3091	break;
3092	}
3093	case ISD::BITCAST: {
3094	SDValue Src = Op.getOperand(i: `0`);
3095	EVT SrcVT = Src.getValueType();
3096
3097	// We only handle vectors here.
3098	// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3099	if (!SrcVT.isVector())
3100	break;
3101
3102	// Fast handling of 'identity' bitcasts.
3103	unsigned NumSrcElts = SrcVT.getVectorNumElements();
3104	if (NumSrcElts == NumElts)
3105	return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3106	KnownZero, TLO, Depth: Depth + `1`);
3107
3108	APInt SrcDemandedElts, SrcZero, SrcUndef;
3109
3110	// Bitcast from 'large element' src vector to 'small element' vector, we
3111	// must demand a source element if any DemandedElt maps to it.
3112	if ((NumElts % NumSrcElts) == `0`) {
3113	unsigned Scale = NumElts / NumSrcElts;
3114	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3115	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3116	TLO, Depth: Depth + `1`))
3117	return true;
3118
3119	// Try calling SimplifyDemandedBits, converting demanded elts to the bits
3120	// of the large element.
3121	// TODO - bigendian once we have test coverage.
3122	if (IsLE) {
3123	unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3124	APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3125	for (unsigned i = `0`; i != NumElts; ++i)
3126	if (DemandedElts [i]) {
3127	unsigned Ofs = (i % Scale) * EltSizeInBits;
3128	SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3129	}
3130
3131	KnownBits Known;
3132	if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3133	TLO, Depth: Depth + `1`))
3134	return true;
3135
3136	// The bitcast has split each wide element into a number of
3137	// narrow subelements. We have just computed the Known bits
3138	// for wide elements. See if element splitting results in
3139	// some subelements being zero. Only for demanded elements!
3140	for (unsigned SubElt = `0`; SubElt != Scale; ++SubElt) {
3141	if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3142	.isAllOnes())
3143	continue;
3144	for (unsigned SrcElt = `0`; SrcElt != NumSrcElts; ++SrcElt) {
3145	unsigned Elt = Scale * SrcElt + SubElt;
3146	if (DemandedElts [Elt])
3147	KnownZero.setBit(Elt);
3148	}
3149	}
3150	}
3151
3152	// If the src element is zero/undef then all the output elements will be -
3153	// only demanded elements are guaranteed to be correct.
3154	for (unsigned i = `0`; i != NumSrcElts; ++i) {
3155	if (SrcDemandedElts [i]) {
3156	if (SrcZero [i])
3157	KnownZero.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3158	if (SrcUndef [i])
3159	KnownUndef.setBits(loBit: i * Scale, hiBit: (i + `1`) * Scale);
3160	}
3161	}
3162	}
3163
3164	// Bitcast from 'small element' src vector to 'large element' vector, we
3165	// demand all smaller source elements covered by the larger demanded element
3166	// of this vector.
3167	if ((NumSrcElts % NumElts) == `0`) {
3168	unsigned Scale = NumSrcElts / NumElts;
3169	SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3170	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3171	TLO, Depth: Depth + `1`))
3172	return true;
3173
3174	// If all the src elements covering an output element are zero/undef, then
3175	// the output element will be as well, assuming it was demanded.
3176	for (unsigned i = `0`; i != NumElts; ++i) {
3177	if (DemandedElts [i]) {
3178	if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3179	KnownZero.setBit(i);
3180	if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3181	KnownUndef.setBit(i);
3182	}
3183	}
3184	}
3185	break;
3186	}
3187	case ISD::BUILD_VECTOR: {
3188	// Check all elements and simplify any unused elements with UNDEF.
3189	if (!DemandedElts.isAllOnes()) {
3190	// Don't simplify BROADCASTS.
3191	if (llvm::any_of(Range: Op ->op_values(),
3192	P: [&](SDValue Elt) { return Op.getOperand(i: `0`) != Elt; })) {
3193	SmallVector<SDValue, `32`> Ops(Op ->op_begin(), Op ->op_end());
3194	bool Updated = false;
3195	for (unsigned i = `0`; i != NumElts; ++i) {
3196	if (!DemandedElts [i] && !Ops [i].isUndef()) {
3197	Ops [i] = TLO.DAG.getUNDEF(VT: Ops [`0`].getValueType());
3198	KnownUndef.setBit(i);
3199	Updated = true;
3200	}
3201	}
3202	if (Updated)
3203	return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3204	}
3205	}
3206	for (unsigned i = `0`; i != NumElts; ++i) {
3207	SDValue SrcOp = Op.getOperand(i);
3208	if (SrcOp.isUndef()) {
3209	KnownUndef.setBit(i);
3210	} else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3211	(isNullConstant(V: SrcOp) \|\| isNullFPConstant(V: SrcOp))) {
3212	KnownZero.setBit(i);
3213	}
3214	}
3215	break;
3216	}
3217	case ISD::CONCAT_VECTORS: {
3218	EVT SubVT = Op.getOperand(i: `0`).getValueType();
3219	unsigned NumSubVecs = Op.getNumOperands();
3220	unsigned NumSubElts = SubVT.getVectorNumElements();
3221	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3222	SDValue SubOp = Op.getOperand(i);
3223	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3224	APInt SubUndef, SubZero;
3225	if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3226	Depth: Depth + `1`))
3227	return true;
3228	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3229	KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3230	}
3231
3232	// Attempt to avoid multi-use ops if we don't need anything from them.
3233	if (!DemandedElts.isAllOnes()) {
3234	bool FoundNewSub = false;
3235	SmallVector<SDValue, `2`> DemandedSubOps;
3236	for (unsigned i = `0`; i != NumSubVecs; ++i) {
3237	SDValue SubOp = Op.getOperand(i);
3238	APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3239	SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3240	Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3241	DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3242	FoundNewSub = NewSubOp ? true : FoundNewSub;
3243	}
3244	if (FoundNewSub) {
3245	SDValue NewOp =
3246	TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, Ops: DemandedSubOps);
3247	return TLO.CombineTo(O: Op, N: NewOp);
3248	}
3249	}
3250	break;
3251	}
3252	case ISD::INSERT_SUBVECTOR: {
3253	// Demand any elements from the subvector and the remainder from the src its
3254	// inserted into.
3255	SDValue Src = Op.getOperand(i: `0`);
3256	SDValue Sub = Op.getOperand(i: `1`);
3257	uint64_t Idx = Op.getConstantOperandVal(i: `2`);
3258	unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3259	APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3260	APInt DemandedSrcElts = DemandedElts;
3261	DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
3262
3263	APInt SubUndef, SubZero;
3264	if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3265	Depth: Depth + `1`))
3266	return true;
3267
3268	// If none of the src operand elements are demanded, replace it with undef.
3269	if (!DemandedSrcElts && !Src.isUndef())
3270	return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3271	N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3272	N3: Op.getOperand(i: `2`)));
3273
3274	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3275	TLO, Depth: Depth + `1`))
3276	return true;
3277	KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3278	KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3279
3280	// Attempt to avoid multi-use ops if we don't need anything from them.
3281	if (!DemandedSrcElts.isAllOnes() \|\| !DemandedSubElts.isAllOnes()) {
3282	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3283	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3284	SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3285	Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3286	if (NewSrc \|\| NewSub) {
3287	NewSrc = NewSrc ? NewSrc : Src;
3288	NewSub = NewSub ? NewSub : Sub;
3289	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3290	N2: NewSub, N3: Op.getOperand(i: `2`));
3291	return TLO.CombineTo(O: Op, N: NewOp);
3292	}
3293	}
3294	break;
3295	}
3296	case ISD::EXTRACT_SUBVECTOR: {
3297	// Offset the demanded elts by the subvector index.
3298	SDValue Src = Op.getOperand(i: `0`);
3299	if (Src.getValueType().isScalableVector())
3300	break;
3301	uint64_t Idx = Op.getConstantOperandVal(i: `1`);
3302	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3303	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3304
3305	APInt SrcUndef, SrcZero;
3306	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3307	Depth: Depth + `1`))
3308	return true;
3309	KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3310	KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3311
3312	// Attempt to avoid multi-use ops if we don't need anything from them.
3313	if (!DemandedElts.isAllOnes()) {
3314	SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3315	Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + `1`);
3316	if (NewSrc) {
3317	SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT, N1: NewSrc,
3318	N2: Op.getOperand(i: `1`));
3319	return TLO.CombineTo(O: Op, N: NewOp);
3320	}
3321	}
3322	break;
3323	}
3324	case ISD::INSERT_VECTOR_ELT: {
3325	SDValue Vec = Op.getOperand(i: `0`);
3326	SDValue Scl = Op.getOperand(i: `1`);
3327	auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `2`));
3328
3329	// For a legal, constant insertion index, if we don't need this insertion
3330	// then strip it, else remove it from the demanded elts.
3331	if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3332	unsigned Idx = CIdx->getZExtValue();
3333	if (!DemandedElts [Idx])
3334	return TLO.CombineTo(O: Op, N: Vec);
3335
3336	APInt DemandedVecElts(DemandedElts);
3337	DemandedVecElts.clearBit(BitPosition: Idx);
3338	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3339	KnownZero, TLO, Depth: Depth + `1`))
3340	return true;
3341
3342	KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3343
3344	KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) \|\| isNullFPConstant(V: Scl));
3345	break;
3346	}
3347
3348	APInt VecUndef, VecZero;
3349	if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3350	Depth: Depth + `1`))
3351	return true;
3352	// Without knowing the insertion index we can't set KnownUndef/KnownZero.
3353	break;
3354	}
3355	case ISD::VSELECT: {
3356	SDValue Sel = Op.getOperand(i: `0`);
3357	SDValue LHS = Op.getOperand(i: `1`);
3358	SDValue RHS = Op.getOperand(i: `2`);
3359
3360	// Try to transform the select condition based on the current demanded
3361	// elements.
3362	APInt UndefSel, ZeroSel;
3363	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3364	Depth: Depth + `1`))
3365	return true;
3366
3367	// See if we can simplify either vselect operand.
3368	APInt DemandedLHS(DemandedElts);
3369	APInt DemandedRHS(DemandedElts);
3370	APInt UndefLHS, ZeroLHS;
3371	APInt UndefRHS, ZeroRHS;
3372	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3373	Depth: Depth + `1`))
3374	return true;
3375	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3376	Depth: Depth + `1`))
3377	return true;
3378
3379	KnownUndef = UndefLHS & UndefRHS;
3380	KnownZero = ZeroLHS & ZeroRHS;
3381
3382	// If we know that the selected element is always zero, we don't need the
3383	// select value element.
3384	APInt DemandedSel = DemandedElts & ~KnownZero;
3385	if (DemandedSel != DemandedElts)
3386	if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3387	Depth: Depth + `1`))
3388	return true;
3389
3390	break;
3391	}
3392	case ISD::VECTOR_SHUFFLE: {
3393	SDValue LHS = Op.getOperand(i: `0`);
3394	SDValue RHS = Op.getOperand(i: `1`);
3395	ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3396
3397	// Collect demanded elements from shuffle operands..
3398	APInt DemandedLHS(NumElts, `0`);
3399	APInt DemandedRHS(NumElts, `0`);
3400	for (unsigned i = `0`; i != NumElts; ++i) {
3401	int M = ShuffleMask [i];
3402	if (M < `0` \|\| !DemandedElts [i])
3403	continue;
3404	assert(`0` <= M && M < (int)(`2` * NumElts) && "Shuffle index out of range");
3405	if (M < (int)NumElts)
3406	DemandedLHS.setBit(M);
3407	else
3408	DemandedRHS.setBit(M - NumElts);
3409	}
3410
3411	// See if we can simplify either shuffle operand.
3412	APInt UndefLHS, ZeroLHS;
3413	APInt UndefRHS, ZeroRHS;
3414	if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3415	Depth: Depth + `1`))
3416	return true;
3417	if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3418	Depth: Depth + `1`))
3419	return true;
3420
3421	// Simplify mask using undef elements from LHS/RHS.
3422	bool Updated = false;
3423	bool IdentityLHS = true, IdentityRHS = true;
3424	SmallVector<int, `32`> NewMask(ShuffleMask);
3425	for (unsigned i = `0`; i != NumElts; ++i) {
3426	int &M = NewMask [i];
3427	if (M < `0`)
3428	continue;
3429	if (!DemandedElts [i] \|\| (M < (int)NumElts && UndefLHS [M]) \|\|
3430	(M >= (int)NumElts && UndefRHS [M - NumElts])) {
3431	Updated = true;
3432	M = -`1`;
3433	}
3434	IdentityLHS &= (M < `0`) \|\| (M == (int)i);
3435	IdentityRHS &= (M < `0`) \|\| ((M - NumElts) == i);
3436	}
3437
3438	// Update legal shuffle masks based on demanded elements if it won't reduce
3439	// to Identity which can cause premature removal of the shuffle mask.
3440	if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3441	SDValue LegalShuffle =
3442	buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3443	if (LegalShuffle)
3444	return TLO.CombineTo(O: Op, N: LegalShuffle);
3445	}
3446
3447	// Propagate undef/zero elements from LHS/RHS.
3448	for (unsigned i = `0`; i != NumElts; ++i) {
3449	int M = ShuffleMask [i];
3450	if (M < `0`) {
3451	KnownUndef.setBit(i);
3452	} else if (M < (int)NumElts) {
3453	if (UndefLHS [M])
3454	KnownUndef.setBit(i);
3455	if (ZeroLHS [M])
3456	KnownZero.setBit(i);
3457	} else {
3458	if (UndefRHS [M - NumElts])
3459	KnownUndef.setBit(i);
3460	if (ZeroRHS [M - NumElts])
3461	KnownZero.setBit(i);
3462	}
3463	}
3464	break;
3465	}
3466	case ISD::ANY_EXTEND_VECTOR_INREG:
3467	case ISD::SIGN_EXTEND_VECTOR_INREG:
3468	case ISD::ZERO_EXTEND_VECTOR_INREG: {
3469	APInt SrcUndef, SrcZero;
3470	SDValue Src = Op.getOperand(i: `0`);
3471	unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3472	APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3473	if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3474	Depth: Depth + `1`))
3475	return true;
3476	KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3477	KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3478
3479	if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3480	Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3481	DemandedSrcElts == `1`) {
3482	// aext - if we just need the bottom element then we can bitcast.
3483	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3484	}
3485
3486	if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3487	// zext(undef) upper bits are guaranteed to be zero.
3488	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3489	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3490	KnownUndef.clearAllBits();
3491
3492	// zext - if we just need the bottom element then we can mask:
3493	// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3494	if (IsLE && DemandedSrcElts == `1` && Src.getOpcode() == ISD::AND &&
3495	Op ->isOnlyUserOf(N: Src.getNode()) &&
3496	Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3497	SDLoc DL(Op);
3498	EVT SrcVT = Src.getValueType();
3499	EVT SrcSVT = SrcVT.getScalarType();
3500	SmallVector<SDValue> MaskElts;
3501	MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3502	MaskElts.append(NumInputs: NumSrcElts - `1`, Elt: TLO.DAG.getConstant(Val: `0`, DL, VT: SrcSVT));
3503	SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3504	if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3505	Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: `1`), Mask})) {
3506	Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: `0`), N2: Fold);
3507	return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3508	}
3509	}
3510	}
3511	break;
3512	}
3513
3514	// TODO: There are more binop opcodes that could be handled here - MIN,
3515	// MAX, saturated math, etc.
3516	case ISD::ADD: {
3517	SDValue Op0 = Op.getOperand(i: `0`);
3518	SDValue Op1 = Op.getOperand(i: `1`);
3519	if (Op0 == Op1 && Op ->isOnlyUserOf(N: Op0.getNode())) {
3520	APInt UndefLHS, ZeroLHS;
3521	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3522	Depth: Depth + `1`, /AssumeSingleUse/ true))
3523	return true;
3524	}
3525	[[fallthrough]];
3526	}
3527	case ISD::OR:
3528	case ISD::XOR:
3529	case ISD::SUB:
3530	case ISD::FADD:
3531	case ISD::FSUB:
3532	case ISD::FMUL:
3533	case ISD::FDIV:
3534	case ISD::FREM: {
3535	SDValue Op0 = Op.getOperand(i: `0`);
3536	SDValue Op1 = Op.getOperand(i: `1`);
3537
3538	APInt UndefRHS, ZeroRHS;
3539	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3540	Depth: Depth + `1`))
3541	return true;
3542	APInt UndefLHS, ZeroLHS;
3543	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3544	Depth: Depth + `1`))
3545	return true;
3546
3547	KnownZero = ZeroLHS & ZeroRHS;
3548	KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3549
3550	// Attempt to avoid multi-use ops if we don't need anything from them.
3551	// TODO - use KnownUndef to relax the demandedelts?
3552	if (!DemandedElts.isAllOnes())
3553	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3554	return true;
3555	break;
3556	}
3557	case ISD::SHL:
3558	case ISD::SRL:
3559	case ISD::SRA:
3560	case ISD::ROTL:
3561	case ISD::ROTR: {
3562	SDValue Op0 = Op.getOperand(i: `0`);
3563	SDValue Op1 = Op.getOperand(i: `1`);
3564
3565	APInt UndefRHS, ZeroRHS;
3566	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3567	Depth: Depth + `1`))
3568	return true;
3569	APInt UndefLHS, ZeroLHS;
3570	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3571	Depth: Depth + `1`))
3572	return true;
3573
3574	KnownZero = ZeroLHS;
3575	KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3576
3577	// Attempt to avoid multi-use ops if we don't need anything from them.
3578	// TODO - use KnownUndef to relax the demandedelts?
3579	if (!DemandedElts.isAllOnes())
3580	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3581	return true;
3582	break;
3583	}
3584	case ISD::MUL:
3585	case ISD::MULHU:
3586	case ISD::MULHS:
3587	case ISD::AND: {
3588	SDValue Op0 = Op.getOperand(i: `0`);
3589	SDValue Op1 = Op.getOperand(i: `1`);
3590
3591	APInt SrcUndef, SrcZero;
3592	if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3593	Depth: Depth + `1`))
3594	return true;
3595	// If we know that a demanded element was zero in Op1 we don't need to
3596	// demand it in Op0 - its guaranteed to be zero.
3597	APInt DemandedElts0 = DemandedElts & ~SrcZero;
3598	if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3599	TLO, Depth: Depth + `1`))
3600	return true;
3601
3602	KnownUndef &= DemandedElts0;
3603	KnownZero &= DemandedElts0;
3604
3605	// If every element pair has a zero/undef then just fold to zero.
3606	// fold (and x, undef) -> 0 / (and x, 0) -> 0
3607	// fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3608	if (DemandedElts.isSubsetOf(RHS: SrcZero \| KnownZero \| SrcUndef \| KnownUndef))
3609	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3610
3611	// If either side has a zero element, then the result element is zero, even
3612	// if the other is an UNDEF.
3613	// TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3614	// and then handle 'and' nodes with the rest of the binop opcodes.
3615	KnownZero \|= SrcZero;
3616	KnownUndef &= SrcUndef;
3617	KnownUndef &= ~KnownZero;
3618
3619	// Attempt to avoid multi-use ops if we don't need anything from them.
3620	if (!DemandedElts.isAllOnes())
3621	if (SimplifyDemandedVectorEltsBinOp (Op0, Op1))
3622	return true;
3623	break;
3624	}
3625	case ISD::TRUNCATE:
3626	case ISD::SIGN_EXTEND:
3627	case ISD::ZERO_EXTEND:
3628	if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: `0`), OriginalDemandedElts: DemandedElts, KnownUndef,
3629	KnownZero, TLO, Depth: Depth + `1`))
3630	return true;
3631
3632	if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3633	// zext(undef) upper bits are guaranteed to be zero.
3634	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3635	return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: `0`, DL: SDLoc (Op), VT));
3636	KnownUndef.clearAllBits();
3637	}
3638	break;
3639	default: {
3640	if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3641	if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3642	KnownZero, TLO, Depth))
3643	return true;
3644	} else {
3645	KnownBits Known;
3646	APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3647	if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3648	TLO, Depth, AssumeSingleUse))
3649	return true;
3650	}
3651	break;
3652	}
3653	}
3654	assert((KnownUndef & KnownZero) == `0` && "Elements flagged as undef AND zero");
3655
3656	// Constant fold all undef cases.
3657	// TODO: Handle zero cases as well.
3658	if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3659	return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3660
3661	return false;
3662	}
3663
3664	/// Determine which of the bits specified in Mask are known to be either zero or
3665	/// one and return them in the Known.
3666	void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3667	KnownBits &Known,
3668	const APInt &DemandedElts,
3669	const SelectionDAG &DAG,
3670	unsigned Depth) const {
3671	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3672	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3673	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3674	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3675	"Should use MaskedValueIsZero if you don't know whether Op"
3676	" is a target node!");
3677	Known.resetAll();
3678	}
3679
3680	void TargetLowering::computeKnownBitsForTargetInstr(
3681	GISelKnownBits &Analysis, Register R, KnownBits &Known,
3682	const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3683	unsigned Depth) const {
3684	Known.resetAll();
3685	}
3686
3687	void TargetLowering::computeKnownBitsForFrameIndex(
3688	const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3689	// The low bits are known zero if the pointer is aligned.
3690	Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3691	}
3692
3693	Align TargetLowering::computeKnownAlignForTargetInstr(
3694	GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3695	unsigned Depth) const {
3696	return Align (`1`);
3697	}
3698
3699	/// This method can be implemented by targets that want to expose additional
3700	/// information about sign bits to the DAG Combiner.
3701	unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3702	const APInt &,
3703	const SelectionDAG &,
3704	unsigned Depth) const {
3705	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3706	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3707	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3708	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3709	"Should use ComputeNumSignBits if you don't know whether Op"
3710	" is a target node!");
3711	return `1`;
3712	}
3713
3714	unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3715	GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3716	const MachineRegisterInfo &MRI, unsigned Depth) const {
3717	return `1`;
3718	}
3719
3720	bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3721	SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3722	TargetLoweringOpt &TLO, unsigned Depth) const {
3723	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3724	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3725	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3726	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3727	"Should use SimplifyDemandedVectorElts if you don't know whether Op"
3728	" is a target node!");
3729	return false;
3730	}
3731
3732	bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3733	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3734	KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3735	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3736	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3737	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3738	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3739	"Should use SimplifyDemandedBits if you don't know whether Op"
3740	" is a target node!");
3741	computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3742	return false;
3743	}
3744
3745	SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3746	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3747	SelectionDAG &DAG, unsigned Depth) const {
3748	assert(
3749	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3750	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3751	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3752	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753	"Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3754	" is a target node!");
3755	return SDValue ();
3756	}
3757
3758	SDValue
3759	TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3760	SDValue N1, MutableArrayRef<int> Mask,
3761	SelectionDAG &DAG) const {
3762	bool LegalMask = isShuffleMaskLegal(Mask, VT);
3763	if (!LegalMask) {
3764	std::swap(a&: N0, b&: N1);
3765	ShuffleVectorSDNode::commuteMask(Mask);
3766	LegalMask = isShuffleMaskLegal(Mask, VT);
3767	}
3768
3769	if (!LegalMask)
3770	return SDValue ();
3771
3772	return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3773	}
3774
3775	const Constant TargetLowering::getTargetConstantFromLoad(LoadSDNode) const {
3776	return nullptr;
3777	}
3778
3779	bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3780	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3781	bool PoisonOnly, unsigned Depth) const {
3782	assert(
3783	(Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3784	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3785	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3786	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3787	"Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3788	" is a target node!");
3789	return false;
3790	}
3791
3792	bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3793	SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3794	bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3795	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3796	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3797	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3798	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3799	"Should use canCreateUndefOrPoison if you don't know whether Op"
3800	" is a target node!");
3801	// Be conservative and return true.
3802	return true;
3803	}
3804
3805	bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3806	const SelectionDAG &DAG,
3807	bool SNaN,
3808	unsigned Depth) const {
3809	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3810	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3811	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3812	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813	"Should use isKnownNeverNaN if you don't know whether Op"
3814	" is a target node!");
3815	return false;
3816	}
3817
3818	bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3819	const APInt &DemandedElts,
3820	APInt &UndefElts,
3821	const SelectionDAG &DAG,
3822	unsigned Depth) const {
3823	assert((Op.getOpcode() >= ISD::BUILTIN_OP_END \|\|
3824	Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN \|\|
3825	Op.getOpcode() == ISD::INTRINSIC_W_CHAIN \|\|
3826	Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3827	"Should use isSplatValue if you don't know whether Op"
3828	" is a target node!");
3829	return false;
3830	}
3831
3832	// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3833	// work with truncating build vectors and vectors with elements of less than
3834	// 8 bits.
3835	bool TargetLowering::isConstTrueVal(SDValue N) const {
3836	if (!N)
3837	return false;
3838
3839	unsigned EltWidth;
3840	APInt CVal;
3841	if (ConstantSDNode CN = isConstOrConstSplat(N, /AllowUndefs=/*false,
3842	/AllowTruncation=/true)) {
3843	CVal = CN->getAPIntValue();
3844	EltWidth = N.getValueType().getScalarSizeInBits();
3845	} else
3846	return false;
3847
3848	// If this is a truncating splat, truncate the splat value.
3849	// Otherwise, we may fail to match the expected values below.
3850	if (EltWidth < CVal.getBitWidth())
3851	CVal = CVal.trunc(width: EltWidth);
3852
3853	switch (getBooleanContents(Type: N.getValueType())) {
3854	case UndefinedBooleanContent:
3855	return CVal [`0`];
3856	case ZeroOrOneBooleanContent:
3857	return CVal.isOne();
3858	case ZeroOrNegativeOneBooleanContent:
3859	return CVal.isAllOnes();
3860	}
3861
3862	llvm_unreachable("Invalid boolean contents");
3863	}
3864
3865	bool TargetLowering::isConstFalseVal(SDValue N) const {
3866	if (!N)
3867	return false;
3868
3869	const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
3870	if (!CN) {
3871	const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
3872	if (!BV)
3873	return false;
3874
3875	// Only interested in constant splats, we don't care about undef
3876	// elements in identifying boolean constants and getConstantSplatNode
3877	// returns NULL if all ops are undef;
3878	CN = BV->getConstantSplatNode();
3879	if (!CN)
3880	return false;
3881	}
3882
3883	if (getBooleanContents(Type: N ->getValueType(ResNo: `0`)) == UndefinedBooleanContent)
3884	return !CN->getAPIntValue()[`0`];
3885
3886	return CN->isZero();
3887	}
3888
3889	bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3890	bool SExt) const {
3891	if (VT == MVT::i1)
3892	return N->isOne();
3893
3894	TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
3895	switch (Cnt) {
3896	case TargetLowering::ZeroOrOneBooleanContent:
3897	// An extended value of 1 is always true, unless its original type is i1,
3898	// in which case it will be sign extended to -1.
3899	return (N->isOne() && !SExt) \|\| (SExt && (N->getValueType(`0`) != MVT::i1));
3900	case TargetLowering::UndefinedBooleanContent:
3901	case TargetLowering::ZeroOrNegativeOneBooleanContent:
3902	return N->isAllOnes() && SExt;
3903	}
3904	llvm_unreachable("Unexpected enumeration.");
3905	}
3906
3907	/// This helper function of SimplifySetCC tries to optimize the comparison when
3908	/// either operand of the SetCC node is a bitwise-and instruction.
3909	SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3910	ISD::CondCode Cond, const SDLoc &DL,
3911	DAGCombinerInfo &DCI) const {
3912	if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3913	std::swap(a&: N0, b&: N1);
3914
3915	SelectionDAG &DAG = DCI.DAG;
3916	EVT OpVT = N0.getValueType();
3917	if (N0.getOpcode() != ISD::AND \|\| !OpVT.isInteger() \|\|
3918	(Cond != ISD::SETEQ && Cond != ISD::SETNE))
3919	return SDValue ();
3920
3921	// (X & Y) != 0 --> zextOrTrunc(X & Y)
3922	// iff everything but LSB is known zero:
3923	if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
3924	(getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent \|\|
3925	getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3926	unsigned NumEltBits = OpVT.getScalarSizeInBits();
3927	APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - `1`);
3928	if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
3929	return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
3930	}
3931
3932	// Try to eliminate a power-of-2 mask constant by converting to a signbit
3933	// test in a narrow type that we can truncate to with no cost. Examples:
3934	// (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3935	// (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3936	// TODO: This conservatively checks for type legality on the source and
3937	// destination types. That may inhibit optimizations, but it also
3938	// allows setcc->shift transforms that may be more beneficial.
3939	auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`));
3940	if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
3941	isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
3942	EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
3943	BitWidth: AndC->getAPIntValue().getActiveBits());
3944	if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
3945	SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: `0`), DL, VT: NarrowVT);
3946	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: NarrowVT);
3947	return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
3948	Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3949	}
3950	}
3951
3952	// Match these patterns in any of their permutations:
3953	// (X & Y) == Y
3954	// (X & Y) != Y
3955	SDValue X, Y;
3956	if (N0.getOperand(i: `0`) == N1) {
3957	X = N0.getOperand(i: `1`);
3958	Y = N0.getOperand(i: `0`);
3959	} else if (N0.getOperand(i: `1`) == N1) {
3960	X = N0.getOperand(i: `0`);
3961	Y = N0.getOperand(i: `1`);
3962	} else {
3963	return SDValue ();
3964	}
3965
3966	// TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3967	// `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3968	// its liable to create and infinite loop.
3969	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: OpVT);
3970	if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3971	DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
3972	// Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3973	// Note that where Y is variable and is known to have at most one bit set
3974	// (for example, if it is Z & 1) we cannot do this; the expressions are not
3975	// equivalent when Y == 0.
3976	assert(OpVT.isInteger());
3977	Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
3978	if (DCI.isBeforeLegalizeOps() \|\|
3979	isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
3980	return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
3981	} else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3982	// If the target supports an 'and-not' or 'and-complement' logic operation,
3983	// try to use that to make a comparison operation more efficient.
3984	// But don't do this transform if the mask is a single bit because there are
3985	// more efficient ways to deal with that case (for example, 'bt' on x86 or
3986	// 'rlwinm' on PPC).
3987
3988	// Bail out if the compare operand that we want to turn into a zero is
3989	// already a zero (otherwise, infinite loop).
3990	if (isNullConstant(V: Y))
3991	return SDValue ();
3992
3993	// Transform this into: ~X & Y == 0.
3994	SDValue NotX = DAG.getNOT(DL: SDLoc (X), Val: X, VT: OpVT);
3995	SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc (N0), VT: OpVT, N1: NotX, N2: Y);
3996	return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
3997	}
3998
3999	return SDValue ();
4000	}
4001
4002	/// There are multiple IR patterns that could be checking whether certain
4003	/// truncation of a signed number would be lossy or not. The pattern which is
4004	/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4005	/// We are looking for the following pattern: (KeptBits is a constant)
4006	/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4007	/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4008	/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4009	/// We will unfold it into the natural trunc+sext pattern:
4010	/// ((%x << C) a>> C) dstcond %x
4011	/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4012	SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4013	EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4014	const SDLoc &DL) const {
4015	// We must be comparing with a constant.
4016	ConstantSDNode *C1;
4017	if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4018	return SDValue ();
4019
4020	// N0 should be: add %x, (1 << (KeptBits-1))
4021	if (N0 ->getOpcode() != ISD::ADD)
4022	return SDValue ();
4023
4024	// And we must be 'add'ing a constant.
4025	ConstantSDNode *C01;
4026	if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`))))
4027	return SDValue ();
4028
4029	SDValue X = N0 ->getOperand(Num: `0`);
4030	EVT XVT = X.getValueType();
4031
4032	// Validate constants ...
4033
4034	APInt I1 = C1->getAPIntValue();
4035
4036	ISD::CondCode NewCond;
4037	if (Cond == ISD::CondCode::SETULT) {
4038	NewCond = ISD::CondCode::SETEQ;
4039	} else if (Cond == ISD::CondCode::SETULE) {
4040	NewCond = ISD::CondCode::SETEQ;
4041	// But need to 'canonicalize' the constant.
4042	I1 += `1`;
4043	} else if (Cond == ISD::CondCode::SETUGT) {
4044	NewCond = ISD::CondCode::SETNE;
4045	// But need to 'canonicalize' the constant.
4046	I1 += `1`;
4047	} else if (Cond == ISD::CondCode::SETUGE) {
4048	NewCond = ISD::CondCode::SETNE;
4049	} else
4050	return SDValue ();
4051
4052	APInt I01 = C01->getAPIntValue();
4053
4054	auto checkConstants = [&I1, &I01]() -> bool {
4055	// Both of them must be power-of-two, and the constant from setcc is bigger.
4056	return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4057	};
4058
4059	if (checkConstants ()) {
4060	// Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4061	} else {
4062	// What if we invert constants? (and the target predicate)
4063	I1.negate();
4064	I01.negate();
4065	assert(XVT.isInteger());
4066	NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4067	if (!checkConstants ())
4068	return SDValue ();
4069	// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4070	}
4071
4072	// They are power-of-two, so which bit is set?
4073	const unsigned KeptBits = I1.logBase2();
4074	const unsigned KeptBitsMinusOne = I01.logBase2();
4075
4076	// Magic!
4077	if (KeptBits != (KeptBitsMinusOne + `1`))
4078	return SDValue ();
4079	assert(KeptBits > `0` && KeptBits < XVT.getSizeInBits() && "unreachable");
4080
4081	// We don't want to do this in every single case.
4082	SelectionDAG &DAG = DCI.DAG;
4083	if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4084	XVT, KeptBits))
4085	return SDValue ();
4086
4087	const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
4088	assert(MaskedBits > `0` && MaskedBits < XVT.getSizeInBits() && "unreachable");
4089
4090	// Unfold into: ((%x << C) a>> C) cond %x
4091	// Where 'cond' will be either 'eq' or 'ne'.
4092	SDValue ShiftAmt = DAG.getConstant(Val: MaskedBits, DL, VT: XVT);
4093	SDValue T0 = DAG.getNode(Opcode: ISD::SHL, DL, VT: XVT, N1: X, N2: ShiftAmt);
4094	SDValue T1 = DAG.getNode(Opcode: ISD::SRA, DL, VT: XVT, N1: T0, N2: ShiftAmt);
4095	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: X, Cond: NewCond);
4096
4097	return T2;
4098	}
4099
4100	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4101	SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4102	EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4103	DAGCombinerInfo &DCI, const SDLoc &DL) const {
4104	assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4105	"Should be a comparison with 0.");
4106	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4107	"Valid only for [in]equality comparisons.");
4108
4109	unsigned NewShiftOpcode;
4110	SDValue X, C, Y;
4111
4112	SelectionDAG &DAG = DCI.DAG;
4113	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4114
4115	// Look for '(C l>>/<< Y)'.
4116	auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4117	// The shift should be one-use.
4118	if (!V.hasOneUse())
4119	return false;
4120	unsigned OldShiftOpcode = V.getOpcode();
4121	switch (OldShiftOpcode) {
4122	case ISD::SHL:
4123	NewShiftOpcode = ISD::SRL;
4124	break;
4125	case ISD::SRL:
4126	NewShiftOpcode = ISD::SHL;
4127	break;
4128	default:
4129	return false; // must be a logical shift.
4130	}
4131	// We should be shifting a constant.
4132	// FIXME: best to use isConstantOrConstantVector().
4133	C = V.getOperand(i: `0`);
4134	ConstantSDNode *CC =
4135	isConstOrConstSplat(N: C, /AllowUndefs=/true, /AllowTruncation=/true);
4136	if (!CC)
4137	return false;
4138	Y = V.getOperand(i: `1`);
4139
4140	ConstantSDNode *XC =
4141	isConstOrConstSplat(N: X, /AllowUndefs=/true, /AllowTruncation=/true);
4142	return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4143	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4144	};
4145
4146	// LHS of comparison should be an one-use 'and'.
4147	if (N0.getOpcode() != ISD::AND \|\| !N0.hasOneUse())
4148	return SDValue ();
4149
4150	X = N0.getOperand(i: `0`);
4151	SDValue Mask = N0.getOperand(i: `1`);
4152
4153	// 'and' is commutative!
4154	if (!Match (Mask)) {
4155	std::swap(a&: X, b&: Mask);
4156	if (!Match (Mask))
4157	return SDValue ();
4158	}
4159
4160	EVT VT = X.getValueType();
4161
4162	// Produce:
4163	// ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4164	SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4165	SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4166	SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4167	return T2;
4168	}
4169
4170	/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4171	/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4172	/// handle the commuted versions of these patterns.
4173	SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4174	ISD::CondCode Cond, const SDLoc &DL,
4175	DAGCombinerInfo &DCI) const {
4176	unsigned BOpcode = N0.getOpcode();
4177	assert((BOpcode == ISD::ADD \|\| BOpcode == ISD::SUB \|\| BOpcode == ISD::XOR) &&
4178	"Unexpected binop");
4179	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && "Unexpected condcode");
4180
4181	// (X + Y) == X --> Y == 0
4182	// (X - Y) == X --> Y == 0
4183	// (X ^ Y) == X --> Y == 0
4184	SelectionDAG &DAG = DCI.DAG;
4185	EVT OpVT = N0.getValueType();
4186	SDValue X = N0.getOperand(i: `0`);
4187	SDValue Y = N0.getOperand(i: `1`);
4188	if (X == N1)
4189	return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4190
4191	if (Y != N1)
4192	return SDValue ();
4193
4194	// (X + Y) == Y --> X == 0
4195	// (X ^ Y) == Y --> X == 0
4196	if (BOpcode == ISD::ADD \|\| BOpcode == ISD::XOR)
4197	return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: `0`, DL, VT: OpVT), Cond);
4198
4199	// The shift would not be valid if the operands are boolean (i1).
4200	if (!N0.hasOneUse() \|\| OpVT.getScalarSizeInBits() == `1`)
4201	return SDValue ();
4202
4203	// (X - Y) == Y --> X == Y << 1
4204	SDValue One =
4205	DAG.getShiftAmountConstant(Val: `1`, VT: OpVT, DL, LegalTypes: !DCI.isBeforeLegalize());
4206	SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4207	if (!DCI.isCalledByLegalizer())
4208	DCI.AddToWorklist(N: YShl1.getNode());
4209	return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4210	}
4211
4212	static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4213	SDValue N0, const APInt &C1,
4214	ISD::CondCode Cond, const SDLoc &dl,
4215	SelectionDAG &DAG) {
4216	// Look through truncs that don't change the value of a ctpop.
4217	// FIXME: Add vector support? Need to be careful with setcc result type below.
4218	SDValue CTPOP = N0;
4219	if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4220	N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: `0`).getScalarValueSizeInBits()))
4221	CTPOP = N0.getOperand(i: `0`);
4222
4223	if (CTPOP.getOpcode() != ISD::CTPOP \|\| !CTPOP.hasOneUse())
4224	return SDValue ();
4225
4226	EVT CTVT = CTPOP.getValueType();
4227	SDValue CTOp = CTPOP.getOperand(i: `0`);
4228
4229	// Expand a power-of-2-or-zero comparison based on ctpop:
4230	// (ctpop x) u< 2 -> (x & x-1) == 0
4231	// (ctpop x) u> 1 -> (x & x-1) != 0
4232	if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGT) {
4233	// Keep the CTPOP if it is a cheap vector op.
4234	if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4235	return SDValue ();
4236
4237	unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4238	if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4239	return SDValue ();
4240	if (C1 == `0` && (Cond == ISD::SETULT))
4241	return SDValue (); // This is handled elsewhere.
4242
4243	unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4244
4245	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4246	SDValue Result = CTOp;
4247	for (unsigned i = `0`; i < Passes; i++) {
4248	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4249	Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4250	}
4251	ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4252	return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: CTVT), Cond: CC);
4253	}
4254
4255	// Expand a power-of-2 comparison based on ctpop
4256	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && C1 == `1`) {
4257	// Keep the CTPOP if it is cheap.
4258	if (TLI.isCtpopFast(VT: CTVT))
4259	return SDValue ();
4260
4261	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: CTVT);
4262	SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4263	assert(CTVT.isInteger());
4264	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4265
4266	// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4267	// check before emitting a potentially unnecessary op.
4268	if (DAG.isKnownNeverZero(Op: CTOp)) {
4269	// (ctpop x) == 1 --> (x & x-1) == 0
4270	// (ctpop x) != 1 --> (x & x-1) != 0
4271	SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4272	SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4273	return RHS;
4274	}
4275
4276	// (ctpop x) == 1 --> (x ^ x-1) > x-1
4277	// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4278	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4279	ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4280	return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4281	}
4282
4283	return SDValue ();
4284	}
4285
4286	static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4287	ISD::CondCode Cond, const SDLoc &dl,
4288	SelectionDAG &DAG) {
4289	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4290	return SDValue ();
4291
4292	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4293	if (!C1 \|\| !(C1->isZero() \|\| C1->isAllOnes()))
4294	return SDValue ();
4295
4296	auto getRotateSource = [](SDValue X) {
4297	if (X.getOpcode() == ISD::ROTL \|\| X.getOpcode() == ISD::ROTR)
4298	return X.getOperand(i: `0`);
4299	return SDValue ();
4300	};
4301
4302	// Peek through a rotated value compared against 0 or -1:
4303	// (rot X, Y) == 0/-1 --> X == 0/-1
4304	// (rot X, Y) != 0/-1 --> X != 0/-1
4305	if (SDValue R = getRotateSource (N0))
4306	return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4307
4308	// Peek through an 'or' of a rotated value compared against 0:
4309	// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4310	// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4311	//
4312	// TODO: Add the 'and' with -1 sibling.
4313	// TODO: Recurse through a series of 'or' ops to find the rotate.
4314	EVT OpVT = N0.getValueType();
4315	if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4316	if (SDValue R = getRotateSource (N0.getOperand(i: `0`))) {
4317	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `1`));
4318	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4319	}
4320	if (SDValue R = getRotateSource (N0.getOperand(i: `1`))) {
4321	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: `0`));
4322	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4323	}
4324	}
4325
4326	return SDValue ();
4327	}
4328
4329	static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4330	ISD::CondCode Cond, const SDLoc &dl,
4331	SelectionDAG &DAG) {
4332	// If we are testing for all-bits-clear, we might be able to do that with
4333	// less shifting since bit-order does not matter.
4334	if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4335	return SDValue ();
4336
4337	auto C1 = isConstOrConstSplat(N: N1, /* AllowUndefs / true);
4338	if (!C1 \|\| !C1->isZero())
4339	return SDValue ();
4340
4341	if (!N0.hasOneUse() \|\|
4342	(N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4343	return SDValue ();
4344
4345	unsigned BitWidth = N0.getScalarValueSizeInBits();
4346	auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: `2`));
4347	if (!ShAmtC \|\| ShAmtC->getAPIntValue().uge(RHS: BitWidth))
4348	return SDValue ();
4349
4350	// Canonicalize fshr as fshl to reduce pattern-matching.
4351	unsigned ShAmt = ShAmtC->getZExtValue();
4352	if (N0.getOpcode() == ISD::FSHR)
4353	ShAmt = BitWidth - ShAmt;
4354
4355	// Match an 'or' with a specific operand 'Other' in either commuted variant.
4356	SDValue X, Y;
4357	auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4358	if (Or.getOpcode() != ISD::OR \|\| !Or.hasOneUse())
4359	return false;
4360	if (Or.getOperand(i: `0`) == Other) {
4361	X = Or.getOperand(i: `0`);
4362	Y = Or.getOperand(i: `1`);
4363	return true;
4364	}
4365	if (Or.getOperand(i: `1`) == Other) {
4366	X = Or.getOperand(i: `1`);
4367	Y = Or.getOperand(i: `0`);
4368	return true;
4369	}
4370	return false;
4371	};
4372
4373	EVT OpVT = N0.getValueType();
4374	EVT ShAmtVT = N0.getOperand(i: `2`).getValueType();
4375	SDValue F0 = N0.getOperand(i: `0`);
4376	SDValue F1 = N0.getOperand(i: `1`);
4377	if (matchOr (F0, F1)) {
4378	// fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4379	SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4380	SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4381	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4382	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4383	}
4384	if (matchOr (F1, F0)) {
4385	// fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4386	SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4387	SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4388	SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4389	return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4390	}
4391
4392	return SDValue ();
4393	}
4394
4395	/// Try to simplify a setcc built with the specified operands and cc. If it is
4396	/// unable to simplify it, return a null SDValue.
4397	SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4398	ISD::CondCode Cond, bool foldBooleans,
4399	DAGCombinerInfo &DCI,
4400	const SDLoc &dl) const {
4401	SelectionDAG &DAG = DCI.DAG;
4402	const DataLayout &Layout = DAG.getDataLayout();
4403	EVT OpVT = N0.getValueType();
4404	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4405
4406	// Constant fold or commute setcc.
4407	if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4408	return Fold;
4409
4410	bool N0ConstOrSplat =
4411	isConstOrConstSplat(N: N0, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4412	bool N1ConstOrSplat =
4413	isConstOrConstSplat(N: N1, /AllowUndefs/ false, /AllowTruncate/ AllowTruncation: true);
4414
4415	// Canonicalize toward having the constant on the RHS.
4416	// TODO: Handle non-splat vector constants. All undef causes trouble.
4417	// FIXME: We can't yet fold constant scalable vector splats, so avoid an
4418	// infinite loop here when we encounter one.
4419	ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4420	if (N0ConstOrSplat && !N1ConstOrSplat &&
4421	(DCI.isBeforeLegalizeOps() \|\|
4422	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4423	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4424
4425	// If we have a subtract with the same 2 non-constant operands as this setcc
4426	// -- but in reverse order -- then try to commute the operands of this setcc
4427	// to match. A matching pair of setcc (cmp) and sub may be combined into 1
4428	// instruction on some targets.
4429	if (!N0ConstOrSplat && !N1ConstOrSplat &&
4430	(DCI.isBeforeLegalizeOps() \|\|
4431	isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4432	DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4433	!DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4434	return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4435
4436	if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4437	return V;
4438
4439	if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4440	return V;
4441
4442	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4443	const APInt &C1 = N1C->getAPIntValue();
4444
4445	// Optimize some CTPOP cases.
4446	if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4447	return V;
4448
4449	// For equality to 0 of a no-wrap multiply, decompose and test each op:
4450	// X Y == 0 --> (X == 0) \|\| (Y == 0)*
4451	// X Y != 0 --> (X != 0) && (Y != 0)*
4452	// TODO: This bails out if minsize is set, but if the target doesn't have a
4453	// single instruction multiply for this type, it would likely be
4454	// smaller to decompose.
4455	if (C1.isZero() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4456	N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4457	(N0->getFlags().hasNoUnsignedWrap() \|\|
4458	N0->getFlags().hasNoSignedWrap()) &&
4459	!Attr.hasFnAttr(Attribute::MinSize)) {
4460	SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4461	SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1, Cond);
4462	unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4463	return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4464	}
4465
4466	// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4467	// equality comparison, then we're just comparing whether X itself is
4468	// zero.
4469	if (N0.getOpcode() == ISD::SRL && (C1.isZero() \|\| C1.isOne()) &&
4470	N0.getOperand(i: `0`).getOpcode() == ISD::CTLZ &&
4471	llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4472	if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: `1`))) {
4473	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4474	ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4475	if ((C1 == `0`) == (Cond == ISD::SETEQ)) {
4476	// (srl (ctlz x), 5) == 0 -> X != 0
4477	// (srl (ctlz x), 5) != 1 -> X != 0
4478	Cond = ISD::SETNE;
4479	} else {
4480	// (srl (ctlz x), 5) != 0 -> X == 0
4481	// (srl (ctlz x), 5) == 1 -> X == 0
4482	Cond = ISD::SETEQ;
4483	}
4484	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: N0.getValueType());
4485	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`).getOperand(i: `0`), RHS: Zero,
4486	Cond);
4487	}
4488	}
4489	}
4490	}
4491
4492	// FIXME: Support vectors.
4493	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4494	const APInt &C1 = N1C->getAPIntValue();
4495
4496	// (zext x) == C --> x == (trunc C)
4497	// (sext x) == C --> x == (trunc C)
4498	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4499	DCI.isBeforeLegalize() && N0 ->hasOneUse()) {
4500	unsigned MinBits = N0.getValueSizeInBits();
4501	SDValue PreExt;
4502	bool Signed = false;
4503	if (N0 ->getOpcode() == ISD::ZERO_EXTEND) {
4504	// ZExt
4505	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4506	PreExt = N0 ->getOperand(Num: `0`);
4507	} else if (N0 ->getOpcode() == ISD::AND) {
4508	// DAGCombine turns costly ZExts into ANDs
4509	if (auto *C = dyn_cast<ConstantSDNode>(Val: N0 ->getOperand(Num: `1`)))
4510	if ((C->getAPIntValue()+`1`).isPowerOf2()) {
4511	MinBits = C->getAPIntValue().countr_one();
4512	PreExt = N0 ->getOperand(Num: `0`);
4513	}
4514	} else if (N0 ->getOpcode() == ISD::SIGN_EXTEND) {
4515	// SExt
4516	MinBits = N0 ->getOperand(Num: `0`).getValueSizeInBits();
4517	PreExt = N0 ->getOperand(Num: `0`);
4518	Signed = true;
4519	} else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4520	// ZEXTLOAD / SEXTLOAD
4521	if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4522	MinBits = LN0->getMemoryVT().getSizeInBits();
4523	PreExt = N0;
4524	} else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4525	Signed = true;
4526	MinBits = LN0->getMemoryVT().getSizeInBits();
4527	PreExt = N0;
4528	}
4529	}
4530
4531	// Figure out how many bits we need to preserve this constant.
4532	unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4533
4534	// Make sure we're not losing bits from the constant.
4535	if (MinBits > `0` &&
4536	MinBits < C1.getBitWidth() &&
4537	MinBits >= ReqdBits) {
4538	EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4539	if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4540	// Will get folded away.
4541	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4542	if (MinBits == `1` && C1 == `1`)
4543	// Invert the condition.
4544	return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(`0`, dl, MVT::i1),
4545	Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4546	SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4547	return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4548	}
4549
4550	// If truncating the setcc operands is not desirable, we can still
4551	// simplify the expression in some cases:
4552	// setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4553	// setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4554	// setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4555	// setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4556	// setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4557	// setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4558	SDValue TopSetCC = N0 ->getOperand(Num: `0`);
4559	unsigned N0Opc = N0 ->getOpcode();
4560	bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4561	if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4562	TopSetCC.getOpcode() == ISD::SETCC &&
4563	(N0Opc == ISD::ZERO_EXTEND \|\| N0Opc == ISD::SIGN_EXTEND) &&
4564	(isConstFalseVal(N1) \|\|
4565	isExtendedTrueVal(N1C, N0->getValueType(`0`), SExt))) {
4566
4567	bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) \|\|
4568	(!N1C->isZero() && Cond == ISD::SETNE);
4569
4570	if (!Inverse)
4571	return TopSetCC;
4572
4573	ISD::CondCode InvCond = ISD::getSetCCInverse(
4574	Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: `2`))->get(),
4575	Type: TopSetCC.getOperand(i: `0`).getValueType());
4576	return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: `0`),
4577	RHS: TopSetCC.getOperand(i: `1`),
4578	Cond: InvCond);
4579	}
4580	}
4581	}
4582
4583	// If the LHS is '(and load, const)', the RHS is 0, the test is for
4584	// equality or unsigned, and all 1 bits of the const are in the same
4585	// partial word, see if we can shorten the load.
4586	if (DCI.isBeforeLegalize() &&
4587	!ISD::isSignedIntSetCC(Code: Cond) &&
4588	N0.getOpcode() == ISD::AND && C1 == `0` &&
4589	N0.getNode()->hasOneUse() &&
4590	isa<LoadSDNode>(Val: N0.getOperand(i: `0`)) &&
4591	N0.getOperand(i: `0`).getNode()->hasOneUse() &&
4592	isa<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
4593	LoadSDNode *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: `0`));
4594	APInt bestMask;
4595	unsigned bestWidth = `0`, bestOffset = `0`;
4596	if (Lod->isSimple() && Lod->isUnindexed()) {
4597	unsigned origWidth = N0.getValueSizeInBits();
4598	unsigned maskWidth = origWidth;
4599	// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4600	// 8 bits, but have to be careful...
4601	if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4602	origWidth = Lod->getMemoryVT().getSizeInBits();
4603	const APInt &Mask = N0.getConstantOperandAPInt(i: `1`);
4604	for (unsigned width = origWidth / `2`; width>=`8`; width /= `2`) {
4605	APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4606	for (unsigned offset=`0`; offset<origWidth/width; offset++) {
4607	if (Mask.isSubsetOf(RHS: newMask)) {
4608	if (Layout.isLittleEndian())
4609	bestOffset = (uint64_t)offset * (width/`8`);
4610	else
4611	bestOffset = (origWidth/width - offset - `1`) * (width/`8`);
4612	bestMask = Mask.lshr(shiftAmt: offset * (width/`8`) * `8`);
4613	bestWidth = width;
4614	break;
4615	}
4616	newMask <<= width;
4617	}
4618	}
4619	}
4620	if (bestWidth) {
4621	EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4622	if (newVT.isRound() &&
4623	shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT)) {
4624	SDValue Ptr = Lod->getBasePtr();
4625	if (bestOffset != `0`)
4626	Ptr = DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset),
4627	DL: dl);
4628	SDValue NewLoad =
4629	DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4630	PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4631	Alignment: Lod->getOriginalAlign());
4632	return DAG.getSetCC(DL: dl, VT,
4633	LHS: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4634	N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth),
4635	DL: dl, VT: newVT)),
4636	RHS: DAG.getConstant(Val: `0LL`, DL: dl, VT: newVT), Cond);
4637	}
4638	}
4639	}
4640
4641	// If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4642	if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4643	unsigned InSize = N0.getOperand(i: `0`).getValueSizeInBits();
4644
4645	// If the comparison constant has bits in the upper part, the
4646	// zero-extended value could never match.
4647	if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4648	hiBitsSet: C1.getBitWidth() - InSize))) {
4649	switch (Cond) {
4650	case ISD::SETUGT:
4651	case ISD::SETUGE:
4652	case ISD::SETEQ:
4653	return DAG.getConstant(Val: `0`, DL: dl, VT);
4654	case ISD::SETULT:
4655	case ISD::SETULE:
4656	case ISD::SETNE:
4657	return DAG.getConstant(Val: `1`, DL: dl, VT);
4658	case ISD::SETGT:
4659	case ISD::SETGE:
4660	// True if the sign bit of C1 is set.
4661	return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4662	case ISD::SETLT:
4663	case ISD::SETLE:
4664	// True if the sign bit of C1 isn't set.
4665	return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4666	default:
4667	break;
4668	}
4669	}
4670
4671	// Otherwise, we can perform the comparison with the low bits.
4672	switch (Cond) {
4673	case ISD::SETEQ:
4674	case ISD::SETNE:
4675	case ISD::SETUGT:
4676	case ISD::SETUGE:
4677	case ISD::SETULT:
4678	case ISD::SETULE: {
4679	EVT newVT = N0.getOperand(i: `0`).getValueType();
4680	if (DCI.isBeforeLegalizeOps() \|\|
4681	(isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4682	isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()))) {
4683	EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4684	SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4685
4686	SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: `0`),
4687	RHS: NewConst, Cond);
4688	return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4689	}
4690	break;
4691	}
4692	default:
4693	break; // todo, be more careful with signed comparisons
4694	}
4695	} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4696	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4697	!isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT(),
4698	ToTy: OpVT)) {
4699	EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: `1`))->getVT();
4700	unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4701	EVT ExtDstTy = N0.getValueType();
4702	unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4703
4704	// If the constant doesn't fit into the number of bits for the source of
4705	// the sign extension, it is impossible for both sides to be equal.
4706	if (C1.getSignificantBits() > ExtSrcTyBits)
4707	return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4708
4709	assert(ExtDstTy == N0.getOperand(`0`).getValueType() &&
4710	ExtDstTy != ExtSrcTy && "Unexpected types!");
4711	APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4712	SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: `0`),
4713	N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4714	if (!DCI.isCalledByLegalizer())
4715	DCI.AddToWorklist(N: ZextOp.getNode());
4716	// Otherwise, make this a use of a zext.
4717	return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4718	RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4719	} else if ((N1C->isZero() \|\| N1C->isOne()) &&
4720	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4721	// SETCC (X), [0\|1], [EQ\|NE] -> X if X is known 0/1. i1 types are
4722	// excluded as they are handled below whilst checking for foldBooleans.
4723	if ((N0.getOpcode() == ISD::SETCC \|\| VT.getScalarType() != MVT::i1) &&
4724	isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4725	(N0.getValueType() == MVT::i1 \|\|
4726	getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4727	DAG.MaskedValueIsZero(
4728	N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), `1`))) {
4729	bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4730	if (TrueWhenTrue)
4731	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
4732	// Invert the condition.
4733	if (N0.getOpcode() == ISD::SETCC) {
4734	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: `2`))->get();
4735	CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: `0`).getValueType());
4736	if (DCI.isBeforeLegalizeOps() \|\|
4737	isCondCodeLegal(CC, VT: N0.getOperand(i: `0`).getSimpleValueType()))
4738	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N0.getOperand(i: `1`), Cond: CC);
4739	}
4740	}
4741
4742	if ((N0.getOpcode() == ISD::XOR \|\|
4743	(N0.getOpcode() == ISD::AND &&
4744	N0.getOperand(i: `0`).getOpcode() == ISD::XOR &&
4745	N0.getOperand(i: `1`) == N0.getOperand(i: `0`).getOperand(i: `1`))) &&
4746	isOneConstant(V: N0.getOperand(i: `1`))) {
4747	// If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4748	// can only do this if the top bits are known zero.
4749	unsigned BitWidth = N0.getValueSizeInBits();
4750	if (DAG.MaskedValueIsZero(Op: N0,
4751	Mask: APInt::getHighBitsSet(numBits: BitWidth,
4752	hiBitsSet: BitWidth-`1`))) {
4753	// Okay, get the un-inverted input value.
4754	SDValue Val;
4755	if (N0.getOpcode() == ISD::XOR) {
4756	Val = N0.getOperand(i: `0`);
4757	} else {
4758	assert(N0.getOpcode() == ISD::AND &&
4759	N0.getOperand(`0`).getOpcode() == ISD::XOR);
4760	// ((X^1)&1)^1 -> X & 1
4761	Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
4762	N1: N0.getOperand(i: `0`).getOperand(i: `0`),
4763	N2: N0.getOperand(i: `1`));
4764	}
4765
4766	return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
4767	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4768	}
4769	} else if (N1C->isOne()) {
4770	SDValue Op0 = N0;
4771	if (Op0.getOpcode() == ISD::TRUNCATE)
4772	Op0 = Op0.getOperand(i: `0`);
4773
4774	if ((Op0.getOpcode() == ISD::XOR) &&
4775	Op0.getOperand(i: `0`).getOpcode() == ISD::SETCC &&
4776	Op0.getOperand(i: `1`).getOpcode() == ISD::SETCC) {
4777	SDValue XorLHS = Op0.getOperand(i: `0`);
4778	SDValue XorRHS = Op0.getOperand(i: `1`);
4779	// Ensure that the input setccs return an i1 type or 0/1 value.
4780	if (Op0.getValueType() == MVT::i1 \|\|
4781	(getBooleanContents(XorLHS.getOperand(`0`).getValueType()) ==
4782	ZeroOrOneBooleanContent &&
4783	getBooleanContents(XorRHS.getOperand(`0`).getValueType()) ==
4784	ZeroOrOneBooleanContent)) {
4785	// (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4786	Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4787	return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
4788	}
4789	}
4790	if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: `1`))) {
4791	// If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4792	if (Op0.getValueType().bitsGT(VT))
4793	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4794	N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4795	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4796	else if (Op0.getValueType().bitsLT(VT))
4797	Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4798	N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: `0`)),
4799	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
4800
4801	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4802	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4803	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4804	}
4805	if (Op0.getOpcode() == ISD::AssertZext &&
4806	cast<VTSDNode>(Op0.getOperand(`1`))->getVT() == MVT::i1)
4807	return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4808	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Op0.getValueType()),
4809	Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4810	}
4811	}
4812
4813	// Given:
4814	// icmp eq/ne (urem %x, %y), 0
4815	// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4816	// icmp eq/ne %x, 0
4817	if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4818	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
4819	KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `0`));
4820	KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: `1`));
4821	if (XKnown.countMaxPopulation() == `1` && YKnown.countMinPopulation() >= `2`)
4822	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1, Cond);
4823	}
4824
4825	// Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4826	// and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4827	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
4828	N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: `1`)) &&
4829	N0.getConstantOperandAPInt(i: `1`) == OpVT.getScalarSizeInBits() - `1` &&
4830	N1C && N1C->isAllOnes()) {
4831	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`),
4832	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: OpVT),
4833	Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4834	}
4835
4836	if (SDValue V =
4837	optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
4838	return V;
4839	}
4840
4841	// These simplifications apply to splat vectors as well.
4842	// TODO: Handle more splat vector cases.
4843	if (auto *N1C = isConstOrConstSplat(N: N1)) {
4844	const APInt &C1 = N1C->getAPIntValue();
4845
4846	APInt MinVal, MaxVal;
4847	unsigned OperandBitSize = N1C->getValueType(ResNo: `0`).getScalarSizeInBits();
4848	if (ISD::isSignedIntSetCC(Code: Cond)) {
4849	MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
4850	MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
4851	} else {
4852	MinVal = APInt::getMinValue(numBits: OperandBitSize);
4853	MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
4854	}
4855
4856	// Canonicalize GE/LE comparisons to use GT/LT comparisons.
4857	if (Cond == ISD::SETGE \|\| Cond == ISD::SETUGE) {
4858	// X >= MIN --> true
4859	if (C1 == MinVal)
4860	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4861
4862	if (!VT.isVector()) { // TODO: Support this for vectors.
4863	// X >= C0 --> X > (C0 - 1)
4864	APInt C = C1 - `1`;
4865	ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4866	if ((DCI.isBeforeLegalizeOps() \|\|
4867	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4868	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4869	isLegalICmpImmediate(C.getSExtValue())))) {
4870	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4871	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4872	Cond: NewCC);
4873	}
4874	}
4875	}
4876
4877	if (Cond == ISD::SETLE \|\| Cond == ISD::SETULE) {
4878	// X <= MAX --> true
4879	if (C1 == MaxVal)
4880	return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4881
4882	// X <= C0 --> X < (C0 + 1)
4883	if (!VT.isVector()) { // TODO: Support this for vectors.
4884	APInt C = C1 + `1`;
4885	ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4886	if ((DCI.isBeforeLegalizeOps() \|\|
4887	isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4888	(!N1C->isOpaque() \|\| (C.getBitWidth() <= `64` &&
4889	isLegalICmpImmediate(C.getSExtValue())))) {
4890	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4891	RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4892	Cond: NewCC);
4893	}
4894	}
4895	}
4896
4897	if (Cond == ISD::SETLT \|\| Cond == ISD::SETULT) {
4898	if (C1 == MinVal)
4899	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
4900
4901	// TODO: Support this for vectors after legalize ops.
4902	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4903	// Canonicalize setlt X, Max --> setne X, Max
4904	if (C1 == MaxVal)
4905	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4906
4907	// If we have setult X, 1, turn it into seteq X, 0
4908	if (C1 == MinVal +`1`)
4909	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4910	RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
4911	Cond: ISD::SETEQ);
4912	}
4913	}
4914
4915	if (Cond == ISD::SETGT \|\| Cond == ISD::SETUGT) {
4916	if (C1 == MaxVal)
4917	return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
4918
4919	// TODO: Support this for vectors after legalize ops.
4920	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
4921	// Canonicalize setgt X, Min --> setne X, Min
4922	if (C1 == MinVal)
4923	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4924
4925	// If we have setugt X, Max-1, turn it into seteq X, Max
4926	if (C1 == MaxVal -`1`)
4927	return DAG.getSetCC(DL: dl, VT, LHS: N0,
4928	RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
4929	Cond: ISD::SETEQ);
4930	}
4931	}
4932
4933	if (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) {
4934	// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4935	if (C1.isZero())
4936	if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4937	SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
4938	return CC;
4939
4940	// For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4941	// For example, when high 32-bits of i64 X are known clear:
4942	// all bits clear: (X \| (Y<<32)) == 0 --> (X \| Y) == 0
4943	// all bits set: (X \| (Y<<32)) == -1 --> (X & Y) == -1
4944	bool CmpZero = N1C->isZero();
4945	bool CmpNegOne = N1C->isAllOnes();
4946	if ((CmpZero \|\| CmpNegOne) && N0.hasOneUse()) {
4947	// Match or(lo,shl(hi,bw/2)) pattern.
4948	auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4949	unsigned EltBits = V.getScalarValueSizeInBits();
4950	if (V.getOpcode() != ISD::OR \|\| (EltBits % `2`) != `0`)
4951	return false;
4952	SDValue LHS = V.getOperand(i: `0`);
4953	SDValue RHS = V.getOperand(i: `1`);
4954	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / `2`);
4955	// Unshifted element must have zero upperbits.
4956	if (RHS.getOpcode() == ISD::SHL &&
4957	isa<ConstantSDNode>(Val: RHS.getOperand(i: `1`)) &&
4958	RHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
4959	DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
4960	Lo = LHS;
4961	Hi = RHS.getOperand(i: `0`);
4962	return true;
4963	}
4964	if (LHS.getOpcode() == ISD::SHL &&
4965	isa<ConstantSDNode>(Val: LHS.getOperand(i: `1`)) &&
4966	LHS.getConstantOperandAPInt(i: `1`) == (EltBits / `2`) &&
4967	DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
4968	Lo = RHS;
4969	Hi = LHS.getOperand(i: `0`);
4970	return true;
4971	}
4972	return false;
4973	};
4974
4975	auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4976	unsigned EltBits = N0.getScalarValueSizeInBits();
4977	unsigned HalfBits = EltBits / `2`;
4978	APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
4979	SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
4980	SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
4981	SDValue NewN0 =
4982	DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
4983	SDValue NewN1 = CmpZero ? DAG.getConstant(Val: `0`, DL: dl, VT: OpVT) : LoBits;
4984	return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
4985	};
4986
4987	SDValue Lo, Hi;
4988	if (IsConcat (N0, Lo, Hi))
4989	return MergeConcat (Lo, Hi);
4990
4991	if (N0.getOpcode() == ISD::AND \|\| N0.getOpcode() == ISD::OR) {
4992	SDValue Lo0, Lo1, Hi0, Hi1;
4993	if (IsConcat (N0.getOperand(i: `0`), Lo0, Hi0) &&
4994	IsConcat (N0.getOperand(i: `1`), Lo1, Hi1)) {
4995	return MergeConcat (DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
4996	DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
4997	}
4998	}
4999	}
5000	}
5001
5002	// If we have "setcc X, C0", check to see if we can shrink the immediate
5003	// by changing cc.
5004	// TODO: Support this for vectors after legalize ops.
5005	if (!VT.isVector() \|\| DCI.isBeforeLegalizeOps()) {
5006	// SETUGT X, SINTMAX -> SETLT X, 0
5007	// SETUGE X, SINTMIN -> SETLT X, 0
5008	if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) \|\|
5009	(Cond == ISD::SETUGE && C1.isMinSignedValue()))
5010	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5011	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: N1.getValueType()),
5012	Cond: ISD::SETLT);
5013
5014	// SETULT X, SINTMIN -> SETGT X, -1
5015	// SETULE X, SINTMAX -> SETGT X, -1
5016	if ((Cond == ISD::SETULT && C1.isMinSignedValue()) \|\|
5017	(Cond == ISD::SETULE && C1.isMaxSignedValue()))
5018	return DAG.getSetCC(DL: dl, VT, LHS: N0,
5019	RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5020	Cond: ISD::SETGT);
5021	}
5022	}
5023
5024	// Back to non-vector simplifications.
5025	// TODO: Can we do these for vector splats?
5026	if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5027	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5028	const APInt &C1 = N1C->getAPIntValue();
5029	EVT ShValTy = N0.getValueType();
5030
5031	// Fold bit comparisons when we can. This will result in an
5032	// incorrect value when boolean false is negative one, unless
5033	// the bitsize is 1 in which case the false value is the same
5034	// in practice regardless of the representation.
5035	if ((VT.getSizeInBits() == `1` \|\|
5036	getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5037	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5038	(VT == ShValTy \|\| (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5039	N0.getOpcode() == ISD::AND) {
5040	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5041	if (Cond == ISD::SETNE && C1 == `0`) {// (X & 8) != 0 --> (X & 8) >> 3
5042	// Perform the xform if the AND RHS is a single bit.
5043	unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5044	if (AndRHS->getAPIntValue().isPowerOf2() &&
5045	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5046	return DAG.getNode(
5047	Opcode: ISD::TRUNCATE, DL: dl, VT,
5048	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5049	N2: DAG.getShiftAmountConstant(
5050	Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5051	}
5052	} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5053	// (X & 8) == 8 --> (X & 8) >> 3
5054	// Perform the xform if C1 is a single bit.
5055	unsigned ShCt = C1.logBase2();
5056	if (C1.isPowerOf2() &&
5057	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5058	return DAG.getNode(
5059	Opcode: ISD::TRUNCATE, DL: dl, VT,
5060	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5061	N2: DAG.getShiftAmountConstant(
5062	Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5063	}
5064	}
5065	}
5066	}
5067
5068	if (C1.getSignificantBits() <= `64` &&
5069	!isLegalICmpImmediate(C1.getSExtValue())) {
5070	// (X & -256) == 256 -> (X >> 8) == 1
5071	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5072	N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5073	if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5074	const APInt &AndRHSC = AndRHS->getAPIntValue();
5075	if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5076	unsigned ShiftBits = AndRHSC.countr_zero();
5077	if (!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5078	SDValue Shift = DAG.getNode(
5079	Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: `0`),
5080	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5081	LegalTypes: !DCI.isBeforeLegalize()));
5082	SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5083	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5084	}
5085	}
5086	}
5087	} else if (Cond == ISD::SETULT \|\| Cond == ISD::SETUGE \|\|
5088	Cond == ISD::SETULE \|\| Cond == ISD::SETUGT) {
5089	bool AdjOne = (Cond == ISD::SETULE \|\| Cond == ISD::SETUGT);
5090	// X < 0x100000000 -> (X >> 32) < 1
5091	// X >= 0x100000000 -> (X >> 32) >= 1
5092	// X <= 0x0ffffffff -> (X >> 32) < 1
5093	// X > 0x0ffffffff -> (X >> 32) >= 1
5094	unsigned ShiftBits;
5095	APInt NewC = C1;
5096	ISD::CondCode NewCond = Cond;
5097	if (AdjOne) {
5098	ShiftBits = C1.countr_one();
5099	NewC = NewC + `1`;
5100	NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5101	} else {
5102	ShiftBits = C1.countr_zero();
5103	}
5104	NewC.lshrInPlace(ShiftAmt: ShiftBits);
5105	if (ShiftBits && NewC.getSignificantBits() <= `64` &&
5106	isLegalICmpImmediate(NewC.getSExtValue()) &&
5107	!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5108	SDValue Shift =
5109	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5110	N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5111	LegalTypes: !DCI.isBeforeLegalize()));
5112	SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5113	return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5114	}
5115	}
5116	}
5117	}
5118
5119	if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5120	auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5121	assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5122
5123	// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5124	// constant if knowing that the operand is non-nan is enough. We prefer to
5125	// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5126	// materialize 0.0.
5127	if (Cond == ISD::SETO \|\| Cond == ISD::SETUO)
5128	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5129
5130	// setcc (fneg x), C -> setcc swap(pred) x, -C
5131	if (N0.getOpcode() == ISD::FNEG) {
5132	ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5133	if (DCI.isBeforeLegalizeOps() \|\|
5134	isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5135	SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5136	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: NegN1, Cond: SwapCond);
5137	}
5138	}
5139
5140	// setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5141	if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5142	!isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: `0`))) {
5143	bool IsFabs = N0.getOpcode() == ISD::FABS;
5144	SDValue Op = IsFabs ? N0.getOperand(i: `0`) : N0;
5145	if ((Cond == ISD::SETOEQ \|\| Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5146	FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5147	: (IsFabs ? fcInf : fcPosInf);
5148	if (Cond == ISD::SETUEQ)
5149	Flag \|= fcNan;
5150	return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5151	DAG.getTargetConstant(Flag, dl, MVT::i32));
5152	}
5153	}
5154
5155	// If the condition is not legal, see if we can find an equivalent one
5156	// which is legal.
5157	if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5158	// If the comparison was an awkward floating-point == or != and one of
5159	// the comparison operands is infinity or negative infinity, convert the
5160	// condition to a less-awkward <= or >=.
5161	if (CFP->getValueAPF().isInfinity()) {
5162	bool IsNegInf = CFP->getValueAPF().isNegative();
5163	ISD::CondCode NewCond = ISD::SETCC_INVALID;
5164	switch (Cond) {
5165	case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5166	case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5167	case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5168	case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5169	default: break;
5170	}
5171	if (NewCond != ISD::SETCC_INVALID &&
5172	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5173	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5174	}
5175	}
5176	}
5177
5178	if (N0 == N1) {
5179	// The sext(setcc()) => setcc() optimization relies on the appropriate
5180	// constant being emitted.
5181	assert(!N0.getValueType().isInteger() &&
5182	"Integer types should be handled by FoldSetCC");
5183
5184	bool EqTrue = ISD::isTrueWhenEqual(Cond);
5185	unsigned UOF = ISD::getUnorderedFlavor(Cond);
5186	if (UOF == `2`) // FP operators that are undefined on NaNs.
5187	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5188	if (UOF == unsigned(EqTrue))
5189	return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5190	// Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5191	// if it is not already.
5192	ISD::CondCode NewCond = UOF == `0` ? ISD::SETO : ISD::SETUO;
5193	if (NewCond != Cond &&
5194	(DCI.isBeforeLegalizeOps() \|\|
5195	isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5196	return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5197	}
5198
5199	// ~X > ~Y --> Y > X
5200	// ~X < ~Y --> Y < X
5201	// ~X < C --> X > ~C
5202	// ~X > C --> X < ~C
5203	if ((isSignedIntSetCC(Code: Cond) \|\| isUnsignedIntSetCC(Code: Cond)) &&
5204	N0.getValueType().isInteger()) {
5205	if (isBitwiseNot(V: N0)) {
5206	if (isBitwiseNot(V: N1))
5207	return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: `0`), RHS: N0.getOperand(i: `0`), Cond);
5208
5209	if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5210	!DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: `0`))) {
5211	SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5212	return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: `0`), Cond);
5213	}
5214	}
5215	}
5216
5217	if ((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
5218	N0.getValueType().isInteger()) {
5219	if (N0.getOpcode() == ISD::ADD \|\| N0.getOpcode() == ISD::SUB \|\|
5220	N0.getOpcode() == ISD::XOR) {
5221	// Simplify (X+Y) == (X+Z) --> Y == Z
5222	if (N0.getOpcode() == N1.getOpcode()) {
5223	if (N0.getOperand(i: `0`) == N1.getOperand(i: `0`))
5224	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `1`), Cond);
5225	if (N0.getOperand(i: `1`) == N1.getOperand(i: `1`))
5226	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `0`), Cond);
5227	if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5228	// If X op Y == Y op X, try other combinations.
5229	if (N0.getOperand(i: `0`) == N1.getOperand(i: `1`))
5230	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `1`), RHS: N1.getOperand(i: `0`),
5231	Cond);
5232	if (N0.getOperand(i: `1`) == N1.getOperand(i: `0`))
5233	return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: `0`), RHS: N1.getOperand(i: `1`),
5234	Cond);
5235	}
5236	}
5237
5238	// If RHS is a legal immediate value for a compare instruction, we need
5239	// to be careful about increasing register pressure needlessly.
5240	bool LegalRHSImm = false;
5241
5242	if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5243	if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `1`))) {
5244	// Turn (X+C1) == C2 --> X == C2-C1
5245	if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5246	return DAG.getSetCC(
5247	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5248	RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5249	DL: dl, VT: N0.getValueType()),
5250	Cond);
5251
5252	// Turn (X^C1) == C2 --> X == C1^C2
5253	if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5254	return DAG.getSetCC(
5255	DL: dl, VT, LHS: N0.getOperand(i: `0`),
5256	RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5257	DL: dl, VT: N0.getValueType()),
5258	Cond);
5259	}
5260
5261	// Turn (C1-X) == C2 --> X == C1-C2
5262	if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: `0`)))
5263	if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5264	return DAG.getSetCC(
5265	DL: dl, VT, LHS: N0.getOperand(i: `1`),
5266	RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5267	DL: dl, VT: N0.getValueType()),
5268	Cond);
5269
5270	// Could RHSC fold directly into a compare?
5271	if (RHSC->getValueType(ResNo: `0`).getSizeInBits() <= `64`)
5272	LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5273	}
5274
5275	// (X+Y) == X --> Y == 0 and similar folds.
5276	// Don't do this if X is an immediate that can fold into a cmp
5277	// instruction and X+Y has other uses. It could be an induction variable
5278	// chain, and the transform would increase register pressure.
5279	if (!LegalRHSImm \|\| N0.hasOneUse())
5280	if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5281	return V;
5282	}
5283
5284	if (N1.getOpcode() == ISD::ADD \|\| N1.getOpcode() == ISD::SUB \|\|
5285	N1.getOpcode() == ISD::XOR)
5286	if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5287	return V;
5288
5289	if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5290	return V;
5291	}
5292
5293	// Fold remainder of division by a constant.
5294	if ((N0.getOpcode() == ISD::UREM \|\| N0.getOpcode() == ISD::SREM) &&
5295	N0.hasOneUse() && (Cond == ISD::SETEQ \|\| Cond == ISD::SETNE)) {
5296	// When division is cheap or optimizing for minimum size,
5297	// fall through to DIVREM creation by skipping this fold.
5298	if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5299	if (N0.getOpcode() == ISD::UREM) {
5300	if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5301	return Folded;
5302	} else if (N0.getOpcode() == ISD::SREM) {
5303	if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5304	return Folded;
5305	}
5306	}
5307	}
5308
5309	// Fold away ALL boolean setcc's.
5310	if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5311	SDValue Temp;
5312	switch (Cond) {
5313	default: llvm_unreachable("Unknown integer setcc!");
5314	case ISD::SETEQ: // X == Y -> ~(X^Y)
5315	Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5316	N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5317	if (!DCI.isCalledByLegalizer())
5318	DCI.AddToWorklist(N: Temp.getNode());
5319	break;
5320	case ISD::SETNE: // X != Y --> (X^Y)
5321	N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5322	break;
5323	case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5324	case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5325	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5326	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5327	if (!DCI.isCalledByLegalizer())
5328	DCI.AddToWorklist(N: Temp.getNode());
5329	break;
5330	case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5331	case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5332	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5333	N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5334	if (!DCI.isCalledByLegalizer())
5335	DCI.AddToWorklist(N: Temp.getNode());
5336	break;
5337	case ISD::SETULE: // X <=u Y --> X == 0 \| Y == 1 --> ~X \| Y
5338	case ISD::SETGE: // X >=s Y --> X == 0 \| Y == 1 --> ~X \| Y
5339	Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5340	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5341	if (!DCI.isCalledByLegalizer())
5342	DCI.AddToWorklist(N: Temp.getNode());
5343	break;
5344	case ISD::SETUGE: // X >=u Y --> X == 1 \| Y == 0 --> ~Y \| X
5345	case ISD::SETLE: // X <=s Y --> X == 1 \| Y == 0 --> ~Y \| X
5346	Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5347	N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5348	break;
5349	}
5350	if (VT.getScalarType() != MVT::i1) {
5351	if (!DCI.isCalledByLegalizer())
5352	DCI.AddToWorklist(N: N0.getNode());
5353	// FIXME: If running after legalize, we probably can't do this.
5354	ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5355	N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5356	}
5357	return N0;
5358	}
5359
5360	// Could not fold it.
5361	return SDValue ();
5362	}
5363
5364	/// Returns true (and the GlobalValue and the offset) if the node is a
5365	/// GlobalAddress + offset.
5366	bool TargetLowering::isGAPlusOffset(SDNode WN, const* GlobalValue *&GA,
5367	int64_t &Offset) const {
5368
5369	SDNode *N = unwrapAddress(N: SDValue (WN, `0`)).getNode();
5370
5371	if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5372	GA = GASD->getGlobal();
5373	Offset += GASD->getOffset();
5374	return true;
5375	}
5376
5377	if (N->getOpcode() == ISD::ADD) {
5378	SDValue N1 = N->getOperand(Num: `0`);
5379	SDValue N2 = N->getOperand(Num: `1`);
5380	if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5381	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5382	Offset += V->getSExtValue();
5383	return true;
5384	}
5385	} else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5386	if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5387	Offset += V->getSExtValue();
5388	return true;
5389	}
5390	}
5391	}
5392
5393	return false;
5394	}
5395
5396	SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5397	DAGCombinerInfo &DCI) const {
5398	// Default implementation: no optimization.
5399	return SDValue ();
5400	}
5401
5402	//===----------------------------------------------------------------------===//
5403	// Inline Assembler Implementation Methods
5404	//===----------------------------------------------------------------------===//
5405
5406	TargetLowering::ConstraintType
5407	TargetLowering::getConstraintType(StringRef Constraint) const {
5408	unsigned S = Constraint.size();
5409
5410	if (S == `1`) {
5411	switch (Constraint [`0`]) {
5412	default: break;
5413	case `'r'`:
5414	return C_RegisterClass;
5415	case `'m'`: // memory
5416	case `'o'`: // offsetable
5417	case `'V'`: // not offsetable
5418	return C_Memory;
5419	case `'p'`: // Address.
5420	return C_Address;
5421	case `'n'`: // Simple Integer
5422	case `'E'`: // Floating Point Constant
5423	case `'F'`: // Floating Point Constant
5424	return C_Immediate;
5425	case `'i'`: // Simple Integer or Relocatable Constant
5426	case `'s'`: // Relocatable Constant
5427	case `'X'`: // Allow ANY value.
5428	case `'I'`: // Target registers.
5429	case `'J'`:
5430	case `'K'`:
5431	case `'L'`:
5432	case `'M'`:
5433	case `'N'`:
5434	case `'O'`:
5435	case `'P'`:
5436	case `'<'`:
5437	case `'>'`:
5438	return C_Other;
5439	}
5440	}
5441
5442	if (S > `1` && Constraint [`0`] == `'{'` && Constraint [S - `1`] == `'}'`) {
5443	if (S == `8` && Constraint.substr(Start: `1`, N: `6`) == "memory") // "{memory}"
5444	return C_Memory;
5445	return C_Register;
5446	}
5447	return C_Unknown;
5448	}
5449
5450	/// Try to replace an X constraint, which matches anything, with another that
5451	/// has more specific requirements based on the type of the corresponding
5452	/// operand.
5453	const char TargetLowering::LowerXConstraint(EVT ConstraintVT) const* {
5454	if (ConstraintVT.isInteger())
5455	return "r";
5456	if (ConstraintVT.isFloatingPoint())
5457	return "f"; // works for many targets
5458	return nullptr;
5459	}
5460
5461	SDValue TargetLowering::LowerAsmOutputForConstraint(
5462	SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5463	const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5464	return SDValue ();
5465	}
5466
5467	/// Lower the specified operand into the Ops vector.
5468	/// If it is invalid, don't add anything to Ops.
5469	void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5470	StringRef Constraint,
5471	std::vector<SDValue> &Ops,
5472	SelectionDAG &DAG) const {
5473
5474	if (Constraint.size() > `1`)
5475	return;
5476
5477	char ConstraintLetter = Constraint [`0`];
5478	switch (ConstraintLetter) {
5479	default: break;
5480	case `'X'`: // Allows any operand
5481	case `'i'`: // Simple Integer or Relocatable Constant
5482	case `'n'`: // Simple Integer
5483	case `'s'`: { // Relocatable Constant
5484
5485	ConstantSDNode *C;
5486	uint64_t Offset = `0`;
5487
5488	// Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5489	// etc., since getelementpointer is variadic. We can't use
5490	// SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5491	// while in this case the GA may be furthest from the root node which is
5492	// likely an ISD::ADD.
5493	while (true) {
5494	if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != `'s'`) {
5495	// gcc prints these as sign extended. Sign extend value to 64 bits
5496	// now; without this it would get ZExt'd later in
5497	// ScheduleDAGSDNodes::EmitNode, which is very generic.
5498	bool IsBool = C->getConstantIntValue()->getBitWidth() == `1`;
5499	BooleanContent BCont = getBooleanContents(MVT::i64);
5500	ISD::NodeType ExtOpc =
5501	IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5502	int64_t ExtVal =
5503	ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5504	Ops.push_back(
5505	DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5506	return;
5507	}
5508	if (ConstraintLetter != `'n'`) {
5509	if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5510	Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc (Op),
5511	VT: GA->getValueType(ResNo: `0`),
5512	offset: Offset + GA->getOffset()));
5513	return;
5514	}
5515	if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5516	Ops.push_back(x: DAG.getTargetBlockAddress(
5517	BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: `0`),
5518	Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5519	return;
5520	}
5521	if (isa<BasicBlockSDNode>(Val: Op)) {
5522	Ops.push_back(x: Op);
5523	return;
5524	}
5525	}
5526	const unsigned OpCode = Op.getOpcode();
5527	if (OpCode == ISD::ADD \|\| OpCode == ISD::SUB) {
5528	if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `0`))))
5529	Op = Op.getOperand(i: `1`);
5530	// Subtraction is not commutative.
5531	else if (OpCode == ISD::ADD &&
5532	(C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: `1`))))
5533	Op = Op.getOperand(i: `0`);
5534	else
5535	return;
5536	Offset += (OpCode == ISD::ADD ? `1` : -`1`) * C->getSExtValue();
5537	continue;
5538	}
5539	return;
5540	}
5541	break;
5542	}
5543	}
5544	}
5545
5546	void TargetLowering::CollectTargetIntrinsicOperands(
5547	const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5548	}
5549
5550	std::pair<unsigned, const TargetRegisterClass *>
5551	TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5552	StringRef Constraint,
5553	MVT VT) const {
5554	if (!Constraint.starts_with(Prefix: "{"))
5555	return std::make_pair(x: `0u`, y: static_cast<TargetRegisterClass >(nullptr*));
5556	assert(*(Constraint.end() - `1`) == `'}'` && "Not a brace enclosed constraint?");
5557
5558	// Remove the braces from around the name.
5559	StringRef RegName(Constraint.data() + `1`, Constraint.size() - `2`);
5560
5561	std::pair<unsigned, const TargetRegisterClass *> R =
5562	std::make_pair(x: `0u`, y: static_cast<const TargetRegisterClass >(nullptr*));
5563
5564	// Figure out which register class contains this reg.
5565	for (const TargetRegisterClass *RC : RI->regclasses()) {
5566	// If none of the value types for this register class are valid, we
5567	// can't use it. For example, 64-bit reg classes on 32-bit targets.
5568	if (!isLegalRC(TRI: RI, RC: RC))
5569	continue;
5570
5571	for (const MCPhysReg &PR : *RC) {
5572	if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5573	std::pair<unsigned, const TargetRegisterClass *> S =
5574	std::make_pair(x: PR, y&: RC);
5575
5576	// If this register class has the requested value type, return it,
5577	// otherwise keep searching and return the first class found
5578	// if no other is found which explicitly has the requested type.
5579	if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5580	return S;
5581	if (!R.second)
5582	R = S;
5583	}
5584	}
5585	}
5586
5587	return R;
5588	}
5589
5590	//===----------------------------------------------------------------------===//
5591	// Constraint Selection.
5592
5593	/// Return true of this is an input operand that is a matching constraint like
5594	/// "4".
5595	bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5596	assert(!ConstraintCode.empty() && "No known constraint!");
5597	return isdigit(static_cast<unsigned char>(ConstraintCode [`0`]));
5598	}
5599
5600	/// If this is an input matching constraint, this method returns the output
5601	/// operand it matches.
5602	unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5603	assert(!ConstraintCode.empty() && "No known constraint!");
5604	return atoi(nptr: ConstraintCode.c_str());
5605	}
5606
5607	/// Split up the constraint string from the inline assembly value into the
5608	/// specific constraints and their prefixes, and also tie in the associated
5609	/// operand values.
5610	/// If this returns an empty vector, and if the constraint string itself
5611	/// isn't empty, there was an error parsing.
5612	TargetLowering::AsmOperandInfoVector
5613	TargetLowering::ParseConstraints(const DataLayout &DL,
5614	const TargetRegisterInfo *TRI,
5615	const CallBase &Call) const {
5616	/// Information about all of the constraints.
5617	AsmOperandInfoVector ConstraintOperands;
5618	const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5619	unsigned maCount = `0`; // Largest number of multiple alternative constraints.
5620
5621	// Do a prepass over the constraints, canonicalizing them, and building up the
5622	// ConstraintOperands list.
5623	unsigned ArgNo = `0`; // ArgNo - The argument of the CallInst.
5624	unsigned ResNo = `0`; // ResNo - The result number of the next output.
5625	unsigned LabelNo = `0`; // LabelNo - CallBr indirect dest number.
5626
5627	for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5628	ConstraintOperands.emplace_back(args: std::move(CI));
5629	AsmOperandInfo &OpInfo = ConstraintOperands.back();
5630
5631	// Update multiple alternative constraint count.
5632	if (OpInfo.multipleAlternatives.size() > maCount)
5633	maCount = OpInfo.multipleAlternatives.size();
5634
5635	OpInfo.ConstraintVT = MVT::Other;
5636
5637	// Compute the value type for each operand.
5638	switch (OpInfo.Type) {
5639	case InlineAsm::isOutput:
5640	// Indirect outputs just consume an argument.
5641	if (OpInfo.isIndirect) {
5642	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5643	break;
5644	}
5645
5646	// The return value of the call is this value. As such, there is no
5647	// corresponding argument.
5648	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5649	if (StructType *STy = dyn_cast<StructType>(Val: Call.getType())) {
5650	OpInfo.ConstraintVT =
5651	getSimpleValueType(DL, Ty: STy->getElementType(N: ResNo));
5652	} else {
5653	assert(ResNo == `0` && "Asm only has one result!");
5654	OpInfo.ConstraintVT =
5655	getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5656	}
5657	++ResNo;
5658	break;
5659	case InlineAsm::isInput:
5660	OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5661	break;
5662	case InlineAsm::isLabel:
5663	OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5664	++LabelNo;
5665	continue;
5666	case InlineAsm::isClobber:
5667	// Nothing to do.
5668	break;
5669	}
5670
5671	if (OpInfo.CallOperandVal) {
5672	llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5673	if (OpInfo.isIndirect) {
5674	OpTy = Call.getParamElementType(ArgNo);
5675	assert(OpTy && "Indirect operand must have elementtype attribute");
5676	}
5677
5678	// Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5679	if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5680	if (STy->getNumElements() == `1`)
5681	OpTy = STy->getElementType(N: `0`);
5682
5683	// If OpTy is not a single value, it may be a struct/union that we
5684	// can tile with integers.
5685	if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5686	unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5687	switch (BitSize) {
5688	default: break;
5689	case `1`:
5690	case `8`:
5691	case `16`:
5692	case `32`:
5693	case `64`:
5694	case `128`:
5695	OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5696	break;
5697	}
5698	}
5699
5700	EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5701	OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5702	ArgNo++;
5703	}
5704	}
5705
5706	// If we have multiple alternative constraints, select the best alternative.
5707	if (!ConstraintOperands.empty()) {
5708	if (maCount) {
5709	unsigned bestMAIndex = `0`;
5710	int bestWeight = -`1`;
5711	// weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5712	int weight = -`1`;
5713	unsigned maIndex;
5714	// Compute the sums of the weights for each alternative, keeping track
5715	// of the best (highest weight) one so far.
5716	for (maIndex = `0`; maIndex < maCount; ++maIndex) {
5717	int weightSum = `0`;
5718	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5719	cIndex != eIndex; ++cIndex) {
5720	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5721	if (OpInfo.Type == InlineAsm::isClobber)
5722	continue;
5723
5724	// If this is an output operand with a matching input operand,
5725	// look up the matching input. If their types mismatch, e.g. one
5726	// is an integer, the other is floating point, or their sizes are
5727	// different, flag it as an maCantMatch.
5728	if (OpInfo.hasMatchingInput()) {
5729	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5730	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5731	if ((OpInfo.ConstraintVT.isInteger() !=
5732	Input.ConstraintVT.isInteger()) \|\|
5733	(OpInfo.ConstraintVT.getSizeInBits() !=
5734	Input.ConstraintVT.getSizeInBits())) {
5735	weightSum = -`1`; // Can't match.
5736	break;
5737	}
5738	}
5739	}
5740	weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
5741	if (weight == -`1`) {
5742	weightSum = -`1`;
5743	break;
5744	}
5745	weightSum += weight;
5746	}
5747	// Update best.
5748	if (weightSum > bestWeight) {
5749	bestWeight = weightSum;
5750	bestMAIndex = maIndex;
5751	}
5752	}
5753
5754	// Now select chosen alternative in each constraint.
5755	for (AsmOperandInfo &cInfo : ConstraintOperands)
5756	if (cInfo.Type != InlineAsm::isClobber)
5757	cInfo.selectAlternative(index: bestMAIndex);
5758	}
5759	}
5760
5761	// Check and hook up tied operands, choose constraint code to use.
5762	for (unsigned cIndex = `0`, eIndex = ConstraintOperands.size();
5763	cIndex != eIndex; ++cIndex) {
5764	AsmOperandInfo &OpInfo = ConstraintOperands [cIndex];
5765
5766	// If this is an output operand with a matching input operand, look up the
5767	// matching input. If their types mismatch, e.g. one is an integer, the
5768	// other is floating point, or their sizes are different, flag it as an
5769	// error.
5770	if (OpInfo.hasMatchingInput()) {
5771	AsmOperandInfo &Input = ConstraintOperands [OpInfo.MatchingInput];
5772
5773	if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5774	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5775	getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
5776	VT: OpInfo.ConstraintVT);
5777	std::pair<unsigned, const TargetRegisterClass *> InputRC =
5778	getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
5779	VT: Input.ConstraintVT);
5780	if ((OpInfo.ConstraintVT.isInteger() !=
5781	Input.ConstraintVT.isInteger()) \|\|
5782	(MatchRC.second != InputRC.second)) {
5783	report_fatal_error(reason: "Unsupported asm: input constraint"
5784	" with a matching output constraint of"
5785	" incompatible type!");
5786	}
5787	}
5788	}
5789	}
5790
5791	return ConstraintOperands;
5792	}
5793
5794	/// Return a number indicating our preference for chosing a type of constraint
5795	/// over another, for the purpose of sorting them. Immediates are almost always
5796	/// preferrable (when they can be emitted). A higher return value means a
5797	/// stronger preference for one constraint type relative to another.
5798	/// FIXME: We should prefer registers over memory but doing so may lead to
5799	/// unrecoverable register exhaustion later.
5800	/// https://github.com/llvm/llvm-project/issues/20571
5801	static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5802	switch (CT) {
5803	case TargetLowering::C_Immediate:
5804	case TargetLowering::C_Other:
5805	return `4`;
5806	case TargetLowering::C_Memory:
5807	case TargetLowering::C_Address:
5808	return `3`;
5809	case TargetLowering::C_RegisterClass:
5810	return `2`;
5811	case TargetLowering::C_Register:
5812	return `1`;
5813	case TargetLowering::C_Unknown:
5814	return `0`;
5815	}
5816	llvm_unreachable("Invalid constraint type");
5817	}
5818
5819	/// Examine constraint type and operand type and determine a weight value.
5820	/// This object must already have been set up with the operand type
5821	/// and the current alternative constraint selected.
5822	TargetLowering::ConstraintWeight
5823	TargetLowering::getMultipleConstraintMatchWeight(
5824	AsmOperandInfo &info, int maIndex) const {
5825	InlineAsm::ConstraintCodeVector *rCodes;
5826	if (maIndex >= (int)info.multipleAlternatives.size())
5827	rCodes = &info.Codes;
5828	else
5829	rCodes = &info.multipleAlternatives [maIndex].Codes;
5830	ConstraintWeight BestWeight = CW_Invalid;
5831
5832	// Loop over the options, keeping track of the most general one.
5833	for (const std::string &rCode : *rCodes) {
5834	ConstraintWeight weight =
5835	getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
5836	if (weight > BestWeight)
5837	BestWeight = weight;
5838	}
5839
5840	return BestWeight;
5841	}
5842
5843	/// Examine constraint type and operand type and determine a weight value.
5844	/// This object must already have been set up with the operand type
5845	/// and the current alternative constraint selected.
5846	TargetLowering::ConstraintWeight
5847	TargetLowering::getSingleConstraintMatchWeight(
5848	AsmOperandInfo &info, const char constraint) const* {
5849	ConstraintWeight weight = CW_Invalid;
5850	Value *CallOperandVal = info.CallOperandVal;
5851	// If we don't have a value, we can't do a match,
5852	// but allow it at the lowest weight.
5853	if (!CallOperandVal)
5854	return CW_Default;
5855	// Look at the constraint type.
5856	switch (*constraint) {
5857	case `'i'`: // immediate integer.
5858	case `'n'`: // immediate integer with a known value.
5859	if (isa<ConstantInt>(Val: CallOperandVal))
5860	weight = CW_Constant;
5861	break;
5862	case `'s'`: // non-explicit intregal immediate.
5863	if (isa<GlobalValue>(Val: CallOperandVal))
5864	weight = CW_Constant;
5865	break;
5866	case `'E'`: // immediate float if host format.
5867	case `'F'`: // immediate float.
5868	if (isa<ConstantFP>(Val: CallOperandVal))
5869	weight = CW_Constant;
5870	break;
5871	case `'<'`: // memory operand with autodecrement.
5872	case `'>'`: // memory operand with autoincrement.
5873	case `'m'`: // memory operand.
5874	case `'o'`: // offsettable memory operand
5875	case `'V'`: // non-offsettable memory operand
5876	weight = CW_Memory;
5877	break;
5878	case `'r'`: // general register.
5879	case `'g'`: // general register, memory operand or immediate integer.
5880	// note: Clang converts "g" to "imr".
5881	if (CallOperandVal->getType()->isIntegerTy())
5882	weight = CW_Register;
5883	break;
5884	case `'X'`: // any operand.
5885	default:
5886	weight = CW_Default;
5887	break;
5888	}
5889	return weight;
5890	}
5891
5892	/// If there are multiple different constraints that we could pick for this
5893	/// operand (e.g. "imr") try to pick the 'best' one.
5894	/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5895	/// into seven classes:
5896	/// Register -> one specific register
5897	/// RegisterClass -> a group of regs
5898	/// Memory -> memory
5899	/// Address -> a symbolic memory reference
5900	/// Immediate -> immediate values
5901	/// Other -> magic values (such as "Flag Output Operands")
5902	/// Unknown -> something we don't recognize yet and can't handle
5903	/// Ideally, we would pick the most specific constraint possible: if we have
5904	/// something that fits into a register, we would pick it. The problem here
5905	/// is that if we have something that could either be in a register or in
5906	/// memory that use of the register could cause selection of other
5907	/// operands to fail: they might only succeed if we pick memory. Because of
5908	/// this the heuristic we use is:
5909	///
5910	/// 1) If there is an 'other' constraint, and if the operand is valid for
5911	/// that constraint, use it. This makes us take advantage of 'i'
5912	/// constraints when available.
5913	/// 2) Otherwise, pick the most general constraint present. This prefers
5914	/// 'm' over 'r', for example.
5915	///
5916	TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5917	TargetLowering::AsmOperandInfo &OpInfo) const {
5918	ConstraintGroup Ret;
5919
5920	Ret.reserve(N: OpInfo.Codes.size());
5921	for (StringRef Code : OpInfo.Codes) {
5922	TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
5923
5924	// Indirect 'other' or 'immediate' constraints are not allowed.
5925	if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory \|\|
5926	CType == TargetLowering::C_Register \|\|
5927	CType == TargetLowering::C_RegisterClass))
5928	continue;
5929
5930	// Things with matching constraints can only be registers, per gcc
5931	// documentation. This mainly affects "g" constraints.
5932	if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5933	continue;
5934
5935	Ret.emplace_back(Args&: Code, Args&: CType);
5936	}
5937
5938	std::stable_sort(
5939	first: Ret.begin(), last: Ret.end(), comp: [](ConstraintPair a, ConstraintPair b) {
5940	return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
5941	});
5942
5943	return Ret;
5944	}
5945
5946	/// If we have an immediate, see if we can lower it. Return true if we can,
5947	/// false otherwise.
5948	static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5949	SDValue Op, SelectionDAG *DAG,
5950	const TargetLowering &TLI) {
5951
5952	assert((P.second == TargetLowering::C_Other \|\|
5953	P.second == TargetLowering::C_Immediate) &&
5954	"need immediate or other");
5955
5956	if (!Op.getNode())
5957	return false;
5958
5959	std::vector<SDValue> ResultOps;
5960	TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
5961	return !ResultOps.empty();
5962	}
5963
5964	/// Determines the constraint code and constraint type to use for the specific
5965	/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5966	void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5967	SDValue Op,
5968	SelectionDAG DAG) const* {
5969	assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5970
5971	// Single-letter constraints ('r') are very common.
5972	if (OpInfo.Codes.size() == `1`) {
5973	OpInfo.ConstraintCode = OpInfo.Codes [`0`];
5974	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
5975	} else {
5976	ConstraintGroup G = getConstraintPreferences(OpInfo);
5977	if (G.empty())
5978	return;
5979
5980	unsigned BestIdx = `0`;
5981	for (const unsigned E = G.size();
5982	BestIdx < E && (G [BestIdx].second == TargetLowering::C_Other \|\|
5983	G [BestIdx].second == TargetLowering::C_Immediate);
5984	++BestIdx) {
5985	if (lowerImmediateIfPossible(P&: G [BestIdx], Op, DAG, TLI: *this))
5986	break;
5987	// If we're out of constraints, just pick the first one.
5988	if (BestIdx + `1` == E) {
5989	BestIdx = `0`;
5990	break;
5991	}
5992	}
5993
5994	OpInfo.ConstraintCode = G [BestIdx].first;
5995	OpInfo.ConstraintType = G [BestIdx].second;
5996	}
5997
5998	// 'X' matches anything.
5999	if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6000	// Constants are handled elsewhere. For Functions, the type here is the
6001	// type of the result, which is not what we want to look at; leave them
6002	// alone.
6003	Value *v = OpInfo.CallOperandVal;
6004	if (isa<ConstantInt>(Val: v) \|\| isa<Function>(Val: v)) {
6005	return;
6006	}
6007
6008	if (isa<BasicBlock>(Val: v) \|\| isa<BlockAddress>(Val: v)) {
6009	OpInfo.ConstraintCode = "i";
6010	return;
6011	}
6012
6013	// Otherwise, try to resolve it to something we know about by looking at
6014	// the actual operand type.
6015	if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6016	OpInfo.ConstraintCode = Repl;
6017	OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6018	}
6019	}
6020	}
6021
6022	/// Given an exact SDIV by a constant, create a multiplication
6023	/// with the multiplicative inverse of the constant.
6024	static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6025	const SDLoc &dl, SelectionDAG &DAG,
6026	SmallVectorImpl<SDNode *> &Created) {
6027	SDValue Op0 = N->getOperand(Num: `0`);
6028	SDValue Op1 = N->getOperand(Num: `1`);
6029	EVT VT = N->getValueType(ResNo: `0`);
6030	EVT SVT = VT.getScalarType();
6031	EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6032	EVT ShSVT = ShVT.getScalarType();
6033
6034	bool UseSRA = false;
6035	SmallVector<SDValue, `16`> Shifts, Factors;
6036
6037	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6038	if (C->isZero())
6039	return false;
6040	APInt Divisor = C->getAPIntValue();
6041	unsigned Shift = Divisor.countr_zero();
6042	if (Shift) {
6043	Divisor.ashrInPlace(ShiftAmt: Shift);
6044	UseSRA = true;
6045	}
6046	// Calculate the multiplicative inverse, using Newton's method.
6047	APInt t;
6048	APInt Factor = Divisor;
6049	while ((t = Divisor * Factor) != `1`)
6050	Factor *= APInt (Divisor.getBitWidth(), `2`) - t;
6051	Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6052	Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6053	return true;
6054	};
6055
6056	// Collect all magic values from the build vector.
6057	if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6058	return SDValue ();
6059
6060	SDValue Shift, Factor;
6061	if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6062	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6063	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6064	} else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6065	assert(Shifts.size() == `1` && Factors.size() == `1` &&
6066	"Expected matchUnaryPredicate to return one element for scalable "
6067	"vectors");
6068	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6069	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6070	} else {
6071	assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6072	Shift = Shifts [`0`];
6073	Factor = Factors [`0`];
6074	}
6075
6076	SDValue Res = Op0;
6077
6078	// Shift the value upfront if it is even, so the LSB is one.
6079	if (UseSRA) {
6080	// TODO: For UDIV use SRL instead of SRA.
6081	SDNodeFlags Flags;
6082	Flags.setExact(true);
6083	Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags);
6084	Created.push_back(Elt: Res.getNode());
6085	}
6086
6087	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6088	}
6089
6090	SDValue TargetLowering::BuildSDIVPow2(SDNode N, const* APInt &Divisor,
6091	SelectionDAG &DAG,
6092	SmallVectorImpl<SDNode > &Created) const* {
6093	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6094	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6095	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6096	return SDValue (N, `0`); // Lower SDIV as SDIV
6097	return SDValue ();
6098	}
6099
6100	SDValue
6101	TargetLowering::BuildSREMPow2(SDNode N, const* APInt &Divisor,
6102	SelectionDAG &DAG,
6103	SmallVectorImpl<SDNode > &Created) const* {
6104	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6105	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6106	if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: `0`), Attr))
6107	return SDValue (N, `0`); // Lower SREM as SREM
6108	return SDValue ();
6109	}
6110
6111	/// Build sdiv by power-of-2 with conditional move instructions
6112	/// Ref: "Hacker's Delight" by Henry Warren 10-1
6113	/// If conditional move/branch is preferred, we lower sdiv x, +/-2k into:
6114	/// bgez x, label
6115	/// add x, x, 2k-1
6116	/// label:
6117	/// sra res, x, k
6118	/// neg res, res (when the divisor is negative)
6119	SDValue TargetLowering::buildSDIVPow2WithCMov(
6120	SDNode N, const* APInt &Divisor, SelectionDAG &DAG,
6121	SmallVectorImpl<SDNode > &Created) const* {
6122	unsigned Lg2 = Divisor.countr_zero();
6123	EVT VT = N->getValueType(ResNo: `0`);
6124
6125	SDLoc DL(N);
6126	SDValue N0 = N->getOperand(Num: `0`);
6127	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
6128	APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6129	SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6130
6131	// If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6132	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6133	SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6134	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6135	SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6136
6137	Created.push_back(Elt: Cmp.getNode());
6138	Created.push_back(Elt: Add.getNode());
6139	Created.push_back(Elt: CMov.getNode());
6140
6141	// Divide by pow2.
6142	SDValue SRA =
6143	DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6144
6145	// If we're dividing by a positive value, we're done. Otherwise, we must
6146	// negate the result.
6147	if (Divisor.isNonNegative())
6148	return SRA;
6149
6150	Created.push_back(Elt: SRA.getNode());
6151	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6152	}
6153
6154	/// Given an ISD::SDIV node expressing a divide by constant,
6155	/// return a DAG expression to select that will generate the same value by
6156	/// multiplying by a magic number.
6157	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6158	SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6159	bool IsAfterLegalization,
6160	SmallVectorImpl<SDNode > &Created) const* {
6161	SDLoc dl(N);
6162	EVT VT = N->getValueType(ResNo: `0`);
6163	EVT SVT = VT.getScalarType();
6164	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6165	EVT ShSVT = ShVT.getScalarType();
6166	unsigned EltBits = VT.getScalarSizeInBits();
6167	EVT MulVT;
6168
6169	// Check to see if we can do this.
6170	// FIXME: We should be more aggressive here.
6171	if (!isTypeLegal(VT)) {
6172	// Limit this to simple scalars for now.
6173	if (VT.isVector() \|\| !VT.isSimple())
6174	return SDValue ();
6175
6176	// If this type will be promoted to a large enough type with a legal
6177	// multiply operation, we can go ahead and do this transform.
6178	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6179	return SDValue ();
6180
6181	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6182	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6183	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6184	return SDValue ();
6185	}
6186
6187	// If the sdiv has an 'exact' bit we can use a simpler lowering.
6188	if (N->getFlags().hasExact())
6189	return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6190
6191	SmallVector<SDValue, `16`> MagicFactors, Factors, Shifts, ShiftMasks;
6192
6193	auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6194	if (C->isZero())
6195	return false;
6196
6197	const APInt &Divisor = C->getAPIntValue();
6198	SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6199	int NumeratorFactor = `0`;
6200	int ShiftMask = -`1`;
6201
6202	if (Divisor.isOne() \|\| Divisor.isAllOnes()) {
6203	// If d is +1/-1, we just multiply the numerator by +1/-1.
6204	NumeratorFactor = Divisor.getSExtValue();
6205	magics.Magic = `0`;
6206	magics.ShiftAmount = `0`;
6207	ShiftMask = `0`;
6208	} else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6209	// If d > 0 and m < 0, add the numerator.
6210	NumeratorFactor = `1`;
6211	} else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6212	// If d < 0 and m > 0, subtract the numerator.
6213	NumeratorFactor = -`1`;
6214	}
6215
6216	MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6217	Factors.push_back(Elt: DAG.getConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6218	Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6219	ShiftMasks.push_back(Elt: DAG.getConstant(Val: ShiftMask, DL: dl, VT: SVT));
6220	return true;
6221	};
6222
6223	SDValue N0 = N->getOperand(Num: `0`);
6224	SDValue N1 = N->getOperand(Num: `1`);
6225
6226	// Collect the shifts / magic values from each element.
6227	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6228	return SDValue ();
6229
6230	SDValue MagicFactor, Factor, Shift, ShiftMask;
6231	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6232	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6233	Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6234	Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6235	ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6236	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6237	assert(MagicFactors.size() == `1` && Factors.size() == `1` &&
6238	Shifts.size() == `1` && ShiftMasks.size() == `1` &&
6239	"Expected matchUnaryPredicate to return one element for scalable "
6240	"vectors");
6241	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6242	Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors [`0`]);
6243	Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts [`0`]);
6244	ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks [`0`]);
6245	} else {
6246	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6247	MagicFactor = MagicFactors [`0`];
6248	Factor = Factors [`0`];
6249	Shift = Shifts [`0`];
6250	ShiftMask = ShiftMasks [`0`];
6251	}
6252
6253	// Multiply the numerator (operand 0) by the magic value.
6254	// FIXME: We should support doing a MUL in a wider type.
6255	auto GetMULHS = [&](SDValue X, SDValue Y) {
6256	// If the type isn't legal, use a wider mul of the type calculated
6257	// earlier.
6258	if (!isTypeLegal(VT)) {
6259	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6260	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6261	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6262	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6263	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6264	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6265	}
6266
6267	if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6268	return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6269	if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6270	SDValue LoHi =
6271	DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6272	return SDValue (LoHi.getNode(), `1`);
6273	}
6274	// If type twice as wide legal, widen and use a mul plus a shift.
6275	unsigned Size = VT.getScalarSizeInBits();
6276	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6277	if (VT.isVector())
6278	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6279	EC: VT.getVectorElementCount());
6280	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6281	X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6282	Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6283	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6284	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6285	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6286	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6287	}
6288	return SDValue ();
6289	};
6290
6291	SDValue Q = GetMULHS (N0, MagicFactor);
6292	if (!Q)
6293	return SDValue ();
6294
6295	Created.push_back(Elt: Q.getNode());
6296
6297	// (Optionally) Add/subtract the numerator using Factor.
6298	Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6299	Created.push_back(Elt: Factor.getNode());
6300	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6301	Created.push_back(Elt: Q.getNode());
6302
6303	// Shift right algebraic by shift value.
6304	Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6305	Created.push_back(Elt: Q.getNode());
6306
6307	// Extract the sign bit, mask it and add it to the quotient.
6308	SDValue SignShift = DAG.getConstant(Val: EltBits - `1`, DL: dl, VT: ShVT);
6309	SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6310	Created.push_back(Elt: T.getNode());
6311	T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6312	Created.push_back(Elt: T.getNode());
6313	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6314	}
6315
6316	/// Given an ISD::UDIV node expressing a divide by constant,
6317	/// return a DAG expression to select that will generate the same value by
6318	/// multiplying by a magic number.
6319	/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6320	SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6321	bool IsAfterLegalization,
6322	SmallVectorImpl<SDNode > &Created) const* {
6323	SDLoc dl(N);
6324	EVT VT = N->getValueType(ResNo: `0`);
6325	EVT SVT = VT.getScalarType();
6326	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6327	EVT ShSVT = ShVT.getScalarType();
6328	unsigned EltBits = VT.getScalarSizeInBits();
6329	EVT MulVT;
6330
6331	// Check to see if we can do this.
6332	// FIXME: We should be more aggressive here.
6333	if (!isTypeLegal(VT)) {
6334	// Limit this to simple scalars for now.
6335	if (VT.isVector() \|\| !VT.isSimple())
6336	return SDValue ();
6337
6338	// If this type will be promoted to a large enough type with a legal
6339	// multiply operation, we can go ahead and do this transform.
6340	if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6341	return SDValue ();
6342
6343	MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6344	if (MulVT.getSizeInBits() < (`2` * EltBits) \|\|
6345	!isOperationLegal(Op: ISD::MUL, VT: MulVT))
6346	return SDValue ();
6347	}
6348
6349	SDValue N0 = N->getOperand(Num: `0`);
6350	SDValue N1 = N->getOperand(Num: `1`);
6351
6352	// Try to use leading zeros of the dividend to reduce the multiplier and
6353	// avoid expensive fixups.
6354	// TODO: Support vectors.
6355	unsigned LeadingZeros = `0`;
6356	if (!VT.isVector() && isa<ConstantSDNode>(Val: N1)) {
6357	assert(!isOneConstant(N1) && "Unexpected divisor");
6358	LeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6359	// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6360	// the dividend exceeds the leading zeros for the divisor.
6361	LeadingZeros = std::min(a: LeadingZeros, b: N1 ->getAsAPIntVal().countl_zero());
6362	}
6363
6364	bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6365	SmallVector<SDValue, `16`> PreShifts, PostShifts, MagicFactors, NPQFactors;
6366
6367	auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6368	if (C->isZero())
6369	return false;
6370	const APInt& Divisor = C->getAPIntValue();
6371
6372	SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6373
6374	// Magic algorithm doesn't work for division by 1. We need to emit a select
6375	// at the end.
6376	if (Divisor.isOne()) {
6377	PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6378	MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6379	} else {
6380	UnsignedDivisionByConstantInfo magics =
6381	UnsignedDivisionByConstantInfo::get(D: Divisor, LeadingZeros);
6382
6383	MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6384
6385	assert(magics.PreShift < Divisor.getBitWidth() &&
6386	"We shouldn't generate an undefined shift!");
6387	assert(magics.PostShift < Divisor.getBitWidth() &&
6388	"We shouldn't generate an undefined shift!");
6389	assert((!magics.IsAdd \|\| magics.PreShift == `0`) &&
6390	"Unexpected pre-shift");
6391	PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6392	PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6393	NPQFactor = DAG.getConstant(
6394	Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - `1`)
6395	: APInt::getZero(numBits: EltBits),
6396	DL: dl, VT: SVT);
6397	UseNPQ \|= magics.IsAdd;
6398	UsePreShift \|= magics.PreShift != `0`;
6399	UsePostShift \|= magics.PostShift != `0`;
6400	}
6401
6402	PreShifts.push_back(Elt: PreShift);
6403	MagicFactors.push_back(Elt: MagicFactor);
6404	NPQFactors.push_back(Elt: NPQFactor);
6405	PostShifts.push_back(Elt: PostShift);
6406	return true;
6407	};
6408
6409	// Collect the shifts/magic values from each element.
6410	if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6411	return SDValue ();
6412
6413	SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6414	if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6415	PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6416	MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6417	NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6418	PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6419	} else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6420	assert(PreShifts.size() == `1` && MagicFactors.size() == `1` &&
6421	NPQFactors.size() == `1` && PostShifts.size() == `1` &&
6422	"Expected matchUnaryPredicate to return one for scalable vectors");
6423	PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts [`0`]);
6424	MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors [`0`]);
6425	NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors [`0`]);
6426	PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts [`0`]);
6427	} else {
6428	assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6429	PreShift = PreShifts [`0`];
6430	MagicFactor = MagicFactors [`0`];
6431	PostShift = PostShifts [`0`];
6432	}
6433
6434	SDValue Q = N0;
6435	if (UsePreShift) {
6436	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6437	Created.push_back(Elt: Q.getNode());
6438	}
6439
6440	// FIXME: We should support doing a MUL in a wider type.
6441	auto GetMULHU = [&](SDValue X, SDValue Y) {
6442	// If the type isn't legal, use a wider mul of the type calculated
6443	// earlier.
6444	if (!isTypeLegal(VT)) {
6445	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6446	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6447	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6448	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6449	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6450	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6451	}
6452
6453	if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6454	return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6455	if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6456	SDValue LoHi =
6457	DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6458	return SDValue (LoHi.getNode(), `1`);
6459	}
6460	// If type twice as wide legal, widen and use a mul plus a shift.
6461	unsigned Size = VT.getScalarSizeInBits();
6462	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: Size `2`);
6463	if (VT.isVector())
6464	WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6465	EC: VT.getVectorElementCount());
6466	if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6467	X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6468	Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6469	Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6470	Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6471	N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6472	return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6473	}
6474	return SDValue (); // No mulhu or equivalent
6475	};
6476
6477	// Multiply the numerator (operand 0) by the magic value.
6478	Q = GetMULHU (Q, MagicFactor);
6479	if (!Q)
6480	return SDValue ();
6481
6482	Created.push_back(Elt: Q.getNode());
6483
6484	if (UseNPQ) {
6485	SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6486	Created.push_back(Elt: NPQ.getNode());
6487
6488	// For vectors we might have a mix of non-NPQ/NPQ paths, so use
6489	// MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6490	if (VT.isVector())
6491	NPQ = GetMULHU (NPQ, NPQFactor);
6492	else
6493	NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT));
6494
6495	Created.push_back(Elt: NPQ.getNode());
6496
6497	Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6498	Created.push_back(Elt: Q.getNode());
6499	}
6500
6501	if (UsePostShift) {
6502	Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6503	Created.push_back(Elt: Q.getNode());
6504	}
6505
6506	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6507
6508	SDValue One = DAG.getConstant(Val: `1`, DL: dl, VT);
6509	SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6510	return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6511	}
6512
6513	/// If all values in Values that don't* match the predicate are same 'splat'*
6514	/// value, then replace all values with that splat value.
6515	/// Else, if AlternativeReplacement was provided, then replace all values that
6516	/// do match predicate with AlternativeReplacement value.
6517	static void
6518	turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6519	std::function<bool(SDValue)> Predicate,
6520	SDValue AlternativeReplacement = SDValue ()) {
6521	SDValue Replacement;
6522	// Is there a value for which the Predicate does NOT* match? What is it?*
6523	auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6524	if (SplatValue != Values.end()) {
6525	// Does Values consist only of SplatValue's and values matching Predicate?
6526	if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6527	return Value == *SplatValue \|\| Predicate (Value);
6528	})) // Then we shall replace values matching predicate with SplatValue.
6529	Replacement = *SplatValue;
6530	}
6531	if (!Replacement) {
6532	// Oops, we did not find the "baseline" splat value.
6533	if (!AlternativeReplacement)
6534	return; // Nothing to do.
6535	// Let's replace with provided value then.
6536	Replacement = AlternativeReplacement;
6537	}
6538	std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6539	}
6540
6541	/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6542	/// where the divisor is constant and the comparison target is zero,
6543	/// return a DAG expression that will generate the same comparison result
6544	/// using only multiplications, additions and shifts/rotations.
6545	/// Ref: "Hacker's Delight" 10-17.
6546	SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6547	SDValue CompTargetNode,
6548	ISD::CondCode Cond,
6549	DAGCombinerInfo &DCI,
6550	const SDLoc &DL) const {
6551	SmallVector<SDNode *, `5`> Built;
6552	if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6553	DCI, DL, Created&: Built)) {
6554	for (SDNode *N : Built)
6555	DCI.AddToWorklist(N);
6556	return Folded;
6557	}
6558
6559	return SDValue ();
6560	}
6561
6562	SDValue
6563	TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6564	SDValue CompTargetNode, ISD::CondCode Cond,
6565	DAGCombinerInfo &DCI, const SDLoc &DL,
6566	SmallVectorImpl<SDNode > &Created) const* {
6567	// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6568	// - D must be constant, with D = D0 2^K where D0 is odd*
6569	// - P is the multiplicative inverse of D0 modulo 2^W
6570	// - Q = floor(((2^W) - 1) / D)
6571	// where W is the width of the common type of N and D.
6572	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6573	"Only applicable for (in)equality comparisons.");
6574
6575	SelectionDAG &DAG = DCI.DAG;
6576
6577	EVT VT = REMNode.getValueType();
6578	EVT SVT = VT.getScalarType();
6579	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6580	EVT ShSVT = ShVT.getScalarType();
6581
6582	// If MUL is unavailable, we cannot proceed in any case.
6583	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6584	return SDValue ();
6585
6586	bool ComparingWithAllZeros = true;
6587	bool AllComparisonsWithNonZerosAreTautological = true;
6588	bool HadTautologicalLanes = false;
6589	bool AllLanesAreTautological = true;
6590	bool HadEvenDivisor = false;
6591	bool AllDivisorsArePowerOfTwo = true;
6592	bool HadTautologicalInvertedLanes = false;
6593	SmallVector<SDValue, `16`> PAmts, KAmts, QAmts, IAmts;
6594
6595	auto BuildUREMPattern = [&](ConstantSDNode CDiv, ConstantSDNode CCmp) {
6596	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6597	if (CDiv->isZero())
6598	return false;
6599
6600	const APInt &D = CDiv->getAPIntValue();
6601	const APInt &Cmp = CCmp->getAPIntValue();
6602
6603	ComparingWithAllZeros &= Cmp.isZero();
6604
6605	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6606	// if C2 is not less than C1, the comparison is always false.
6607	// But we will only be able to produce the comparison that will give the
6608	// opposive tautological answer. So this lane would need to be fixed up.
6609	bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6610	HadTautologicalInvertedLanes \|= TautologicalInvertedLane;
6611
6612	// If all lanes are tautological (either all divisors are ones, or divisor
6613	// is not greater than the constant we are comparing with),
6614	// we will prefer to avoid the fold.
6615	bool TautologicalLane = D.isOne() \|\| TautologicalInvertedLane;
6616	HadTautologicalLanes \|= TautologicalLane;
6617	AllLanesAreTautological &= TautologicalLane;
6618
6619	// If we are comparing with non-zero, we need'll need to subtract said
6620	// comparison value from the LHS. But there is no point in doing that if
6621	// every lane where we are comparing with non-zero is tautological..
6622	if (!Cmp.isZero())
6623	AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6624
6625	// Decompose D into D0 2^K*
6626	unsigned K = D.countr_zero();
6627	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6628	APInt D0 = D.lshr(shiftAmt: K);
6629
6630	// D is even if it has trailing zeros.
6631	HadEvenDivisor \|= (K != `0`);
6632	// D is a power-of-two if D0 is one.
6633	// If all divisors are power-of-two, we will prefer to avoid the fold.
6634	AllDivisorsArePowerOfTwo &= D0.isOne();
6635
6636	// P = inv(D0, 2^W)
6637	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6638	unsigned W = D.getBitWidth();
6639	APInt P = D0.zext(width: W + `1`)
6640	.multiplicativeInverse(modulo: APInt::getSignedMinValue(numBits: W + `1`))
6641	.trunc(width: W);
6642	assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6643	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6644
6645	// Q = floor((2^W - 1) u/ D)
6646	// R = ((2^W - 1) u% D)
6647	APInt Q, R;
6648	APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6649
6650	// If we are comparing with zero, then that comparison constant is okay,
6651	// else it may need to be one less than that.
6652	if (Cmp.ugt(RHS: R))
6653	Q -= `1`;
6654
6655	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6656	"We are expecting that K is always less than all-ones for ShSVT");
6657
6658	// If the lane is tautological the result can be constant-folded.
6659	if (TautologicalLane) {
6660	// Set P and K amount to a bogus values so we can try to splat them.
6661	P = `0`;
6662	K = -`1`;
6663	// And ensure that comparison constant is tautological,
6664	// it will always compare true/false.
6665	Q = -`1`;
6666	}
6667
6668	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6669	KAmts.push_back(
6670	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6671	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6672	return true;
6673	};
6674
6675	SDValue N = REMNode.getOperand(i: `0`);
6676	SDValue D = REMNode.getOperand(i: `1`);
6677
6678	// Collect the values from each element.
6679	if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
6680	return SDValue ();
6681
6682	// If all lanes are tautological, the result can be constant-folded.
6683	if (AllLanesAreTautological)
6684	return SDValue ();
6685
6686	// If this is a urem by a powers-of-two, avoid the fold since it can be
6687	// best implemented as a bit test.
6688	if (AllDivisorsArePowerOfTwo)
6689	return SDValue ();
6690
6691	SDValue PVal, KVal, QVal;
6692	if (D.getOpcode() == ISD::BUILD_VECTOR) {
6693	if (HadTautologicalLanes) {
6694	// Try to turn PAmts into a splat, since we don't care about the values
6695	// that are currently '0'. If we can't, just keep '0'`s.
6696	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6697	// Try to turn KAmts into a splat, since we don't care about the values
6698	// that are currently '-1'. If we can't, change them to '0'`s.
6699	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6700	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
6701	}
6702
6703	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6704	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6705	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6706	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6707	assert(PAmts.size() == `1` && KAmts.size() == `1` && QAmts.size() == `1` &&
6708	"Expected matchBinaryPredicate to return one element for "
6709	"SPLAT_VECTORs");
6710	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
6711	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
6712	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
6713	} else {
6714	PVal = PAmts [`0`];
6715	KVal = KAmts [`0`];
6716	QVal = QAmts [`0`];
6717	}
6718
6719	if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6720	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
6721	return SDValue (); // FIXME: Could/should use `ISD::ADD`?
6722	assert(CompTargetNode.getValueType() == N.getValueType() &&
6723	"Expecting that the types on LHS and RHS of comparisons match.");
6724	N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
6725	}
6726
6727	// (mul N, P)
6728	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6729	Created.push_back(Elt: Op0.getNode());
6730
6731	// Rotate right only if any divisor was even. We avoid rotates for all-odd
6732	// divisors as a performance improvement, since rotating by 0 is a no-op.
6733	if (HadEvenDivisor) {
6734	// We need ROTR to do this.
6735	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
6736	return SDValue ();
6737	// UREM: (rotr (mul N, P), K)
6738	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
6739	Created.push_back(Elt: Op0.getNode());
6740	}
6741
6742	// UREM: (setule/setugt (rotr (mul N, P), K), Q)
6743	SDValue NewCC =
6744	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
6745	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6746	if (!HadTautologicalInvertedLanes)
6747	return NewCC;
6748
6749	// If any lanes previously compared always-false, the NewCC will give
6750	// always-true result for them, so we need to fixup those lanes.
6751	// Or the other way around for inequality predicate.
6752	assert(VT.isVector() && "Can/should only get here for vectors.");
6753	Created.push_back(Elt: NewCC.getNode());
6754
6755	// x u% C1` is always* less than C1. So given `x u% C1 == C2`,*
6756	// if C2 is not less than C1, the comparison is always false.
6757	// But we have produced the comparison that will give the
6758	// opposive tautological answer. So these lanes would need to be fixed up.
6759	SDValue TautologicalInvertedChannels =
6760	DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
6761	Created.push_back(Elt: TautologicalInvertedChannels.getNode());
6762
6763	// NOTE: we avoid letting illegal types through even if we're before legalize
6764	// ops – legalization has a hard time producing good code for this.
6765	if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
6766	// If we have a vector select, let's replace the comparison results in the
6767	// affected lanes with the correct tautological result.
6768	SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
6769	DL, VT: SETCCVT, OpVT: SETCCVT);
6770	return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
6771	N2: Replacement, N3: NewCC);
6772	}
6773
6774	// Else, we can just invert the comparison result in the appropriate lanes.
6775	//
6776	// NOTE: see the note above VSELECT above.
6777	if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
6778	return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
6779	N2: TautologicalInvertedChannels);
6780
6781	return SDValue (); // Don't know how to lower.
6782	}
6783
6784	/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6785	/// where the divisor is constant and the comparison target is zero,
6786	/// return a DAG expression that will generate the same comparison result
6787	/// using only multiplications, additions and shifts/rotations.
6788	/// Ref: "Hacker's Delight" 10-17.
6789	SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6790	SDValue CompTargetNode,
6791	ISD::CondCode Cond,
6792	DAGCombinerInfo &DCI,
6793	const SDLoc &DL) const {
6794	SmallVector<SDNode *, `7`> Built;
6795	if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6796	DCI, DL, Created&: Built)) {
6797	assert(Built.size() <= `7` && "Max size prediction failed.");
6798	for (SDNode *N : Built)
6799	DCI.AddToWorklist(N);
6800	return Folded;
6801	}
6802
6803	return SDValue ();
6804	}
6805
6806	SDValue
6807	TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6808	SDValue CompTargetNode, ISD::CondCode Cond,
6809	DAGCombinerInfo &DCI, const SDLoc &DL,
6810	SmallVectorImpl<SDNode > &Created) const* {
6811	// Fold:
6812	// (seteq/ne (srem N, D), 0)
6813	// To:
6814	// (setule/ugt (rotr (add (mul N, P), A), K), Q)
6815	//
6816	// - D must be constant, with D = D0 2^K where D0 is odd*
6817	// - P is the multiplicative inverse of D0 modulo 2^W
6818	// - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6819	// - Q = floor((2 A) / (2^K))*
6820	// where W is the width of the common type of N and D.
6821	assert((Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) &&
6822	"Only applicable for (in)equality comparisons.");
6823
6824	SelectionDAG &DAG = DCI.DAG;
6825
6826	EVT VT = REMNode.getValueType();
6827	EVT SVT = VT.getScalarType();
6828	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6829	EVT ShSVT = ShVT.getScalarType();
6830
6831	// If we are after ops legalization, and MUL is unavailable, we can not
6832	// proceed.
6833	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6834	return SDValue ();
6835
6836	// TODO: Could support comparing with non-zero too.
6837	ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
6838	if (!CompTarget \|\| !CompTarget->isZero())
6839	return SDValue ();
6840
6841	bool HadIntMinDivisor = false;
6842	bool HadOneDivisor = false;
6843	bool AllDivisorsAreOnes = true;
6844	bool HadEvenDivisor = false;
6845	bool NeedToApplyOffset = false;
6846	bool AllDivisorsArePowerOfTwo = true;
6847	SmallVector<SDValue, `16`> PAmts, AAmts, KAmts, QAmts;
6848
6849	auto BuildSREMPattern = [&](ConstantSDNode *C) {
6850	// Division by 0 is UB. Leave it to be constant-folded elsewhere.
6851	if (C->isZero())
6852	return false;
6853
6854	// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6855
6856	// WARNING: this fold is only valid for positive divisors!
6857	APInt D = C->getAPIntValue();
6858	if (D.isNegative())
6859	D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6860
6861	HadIntMinDivisor \|= D.isMinSignedValue();
6862
6863	// If all divisors are ones, we will prefer to avoid the fold.
6864	HadOneDivisor \|= D.isOne();
6865	AllDivisorsAreOnes &= D.isOne();
6866
6867	// Decompose D into D0 2^K*
6868	unsigned K = D.countr_zero();
6869	assert((!D.isOne() \|\| (K == `0`)) && "For divisor '1' we won't rotate.");
6870	APInt D0 = D.lshr(shiftAmt: K);
6871
6872	if (!D.isMinSignedValue()) {
6873	// D is even if it has trailing zeros; unless it's INT_MIN, in which case
6874	// we don't care about this lane in this fold, we'll special-handle it.
6875	HadEvenDivisor \|= (K != `0`);
6876	}
6877
6878	// D is a power-of-two if D0 is one. This includes INT_MIN.
6879	// If all divisors are power-of-two, we will prefer to avoid the fold.
6880	AllDivisorsArePowerOfTwo &= D0.isOne();
6881
6882	// P = inv(D0, 2^W)
6883	// 2^W requires W + 1 bits, so we have to extend and then truncate.
6884	unsigned W = D.getBitWidth();
6885	APInt P = D0.zext(width: W + `1`)
6886	.multiplicativeInverse(modulo: APInt::getSignedMinValue(numBits: W + `1`))
6887	.trunc(width: W);
6888	assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6889	assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6890
6891	// A = floor((2^(W - 1) - 1) / D0) & -2^K
6892	APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
6893	A.clearLowBits(loBits: K);
6894
6895	if (!D.isMinSignedValue()) {
6896	// If divisor INT_MIN, then we don't care about this lane in this fold,
6897	// we'll special-handle it.
6898	NeedToApplyOffset \|= A != `0`;
6899	}
6900
6901	// Q = floor((2 A) / (2^K))*
6902	APInt Q = (`2` * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
6903
6904	assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6905	"We are expecting that A is always less than all-ones for SVT");
6906	assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6907	"We are expecting that K is always less than all-ones for ShSVT");
6908
6909	// If the divisor is 1 the result can be constant-folded. Likewise, we
6910	// don't care about INT_MIN lanes, those can be set to undef if appropriate.
6911	if (D.isOne()) {
6912	// Set P, A and K to a bogus values so we can try to splat them.
6913	P = `0`;
6914	A = -`1`;
6915	K = -`1`;
6916
6917	// x ?% 1 == 0 <--> true <--> x u<= -1
6918	Q = -`1`;
6919	}
6920
6921	PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6922	AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
6923	KAmts.push_back(
6924	Elt: DAG.getConstant(Val: APInt (ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6925	QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6926	return true;
6927	};
6928
6929	SDValue N = REMNode.getOperand(i: `0`);
6930	SDValue D = REMNode.getOperand(i: `1`);
6931
6932	// Collect the values from each element.
6933	if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
6934	return SDValue ();
6935
6936	// If this is a srem by a one, avoid the fold since it can be constant-folded.
6937	if (AllDivisorsAreOnes)
6938	return SDValue ();
6939
6940	// If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6941	// since it can be best implemented as a bit test.
6942	if (AllDivisorsArePowerOfTwo)
6943	return SDValue ();
6944
6945	SDValue PVal, AVal, KVal, QVal;
6946	if (D.getOpcode() == ISD::BUILD_VECTOR) {
6947	if (HadOneDivisor) {
6948	// Try to turn PAmts into a splat, since we don't care about the values
6949	// that are currently '0'. If we can't, just keep '0'`s.
6950	turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6951	// Try to turn AAmts into a splat, since we don't care about the
6952	// values that are currently '-1'. If we can't, change them to '0'`s.
6953	turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
6954	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: SVT));
6955	// Try to turn KAmts into a splat, since we don't care about the values
6956	// that are currently '-1'. If we can't, change them to '0'`s.
6957	turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6958	AlternativeReplacement: DAG.getConstant(Val: `0`, DL, VT: ShSVT));
6959	}
6960
6961	PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6962	AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
6963	KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6964	QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6965	} else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6966	assert(PAmts.size() == `1` && AAmts.size() == `1` && KAmts.size() == `1` &&
6967	QAmts.size() == `1` &&
6968	"Expected matchUnaryPredicate to return one element for scalable "
6969	"vectors");
6970	PVal = DAG.getSplatVector(VT, DL, Op: PAmts [`0`]);
6971	AVal = DAG.getSplatVector(VT, DL, Op: AAmts [`0`]);
6972	KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts [`0`]);
6973	QVal = DAG.getSplatVector(VT, DL, Op: QAmts [`0`]);
6974	} else {
6975	assert(isa<ConstantSDNode>(D) && "Expected a constant");
6976	PVal = PAmts [`0`];
6977	AVal = AAmts [`0`];
6978	KVal = KAmts [`0`];
6979	QVal = QAmts [`0`];
6980	}
6981
6982	// (mul N, P)
6983	SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6984	Created.push_back(Elt: Op0.getNode());
6985
6986	if (NeedToApplyOffset) {
6987	// We need ADD to do this.
6988	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
6989	return SDValue ();
6990
6991	// (add (mul N, P), A)
6992	Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
6993	Created.push_back(Elt: Op0.getNode());
6994	}
6995
6996	// Rotate right only if any divisor was even. We avoid rotates for all-odd
6997	// divisors as a performance improvement, since rotating by 0 is a no-op.
6998	if (HadEvenDivisor) {
6999	// We need ROTR to do this.
7000	if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7001	return SDValue ();
7002	// SREM: (rotr (add (mul N, P), A), K)
7003	Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7004	Created.push_back(Elt: Op0.getNode());
7005	}
7006
7007	// SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7008	SDValue Fold =
7009	DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7010	Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7011
7012	// If we didn't have lanes with INT_MIN divisor, then we're done.
7013	if (!HadIntMinDivisor)
7014	return Fold;
7015
7016	// That fold is only valid for positive divisors. Which effectively means,
7017	// it is invalid for INT_MIN divisors. So if we have such a lane,
7018	// we must fix-up results for said lanes.
7019	assert(VT.isVector() && "Can/should only get here for vectors.");
7020
7021	// NOTE: we avoid letting illegal types through even if we're before legalize
7022	// ops – legalization has a hard time producing good code for the code that
7023	// follows.
7024	if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) \|\|
7025	!isOperationLegalOrCustom(Op: ISD::AND, VT) \|\|
7026	!isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) \|\|
7027	!isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7028	return SDValue ();
7029
7030	Created.push_back(Elt: Fold.getNode());
7031
7032	SDValue IntMin = DAG.getConstant(
7033	Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7034	SDValue IntMax = DAG.getConstant(
7035	Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7036	SDValue Zero =
7037	DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7038
7039	// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7040	SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7041	Created.push_back(Elt: DivisorIsIntMin.getNode());
7042
7043	// (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7044	SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7045	Created.push_back(Elt: Masked.getNode());
7046	SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7047	Created.push_back(Elt: MaskedIsZero.getNode());
7048
7049	// To produce final result we need to blend 2 vectors: 'SetCC' and
7050	// 'MaskedIsZero'. If the divisor for channel was NOT* INT_MIN, we pick*
7051	// from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7052	// constant-folded, select can get lowered to a shuffle with constant mask.
7053	SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7054	N2: MaskedIsZero, N3: Fold);
7055
7056	return Blended;
7057	}
7058
7059	bool TargetLowering::
7060	verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7061	if (!isa<ConstantSDNode>(Val: Op.getOperand(i: `0`))) {
7062	DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7063	"be a constant integer");
7064	return true;
7065	}
7066
7067	return false;
7068	}
7069
7070	SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7071	const DenormalMode &Mode) const {
7072	SDLoc DL(Op);
7073	EVT VT = Op.getValueType();
7074	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7075	SDValue FPZero = DAG.getConstantFP(Val: `0.0`, DL, VT);
7076
7077	// This is specifically a check for the handling of denormal inputs, not the
7078	// result.
7079	if (Mode.Input == DenormalMode::PreserveSign \|\|
7080	Mode.Input == DenormalMode::PositiveZero) {
7081	// Test = X == 0.0
7082	return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7083	}
7084
7085	// Testing it with denormal inputs to avoid wrong estimate.
7086	//
7087	// Test = fabs(X) < SmallestNormal
7088	const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7089	APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7090	SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7091	SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7092	return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7093	}
7094
7095	SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7096	bool LegalOps, bool OptForSize,
7097	NegatibleCost &Cost,
7098	unsigned Depth) const {
7099	// fneg is removable even if it has multiple uses.
7100	if (Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::VP_FNEG) {
7101	Cost = NegatibleCost::Cheaper;
7102	return Op.getOperand(i: `0`);
7103	}
7104
7105	// Don't recurse exponentially.
7106	if (Depth > SelectionDAG::MaxRecursionDepth)
7107	return SDValue ();
7108
7109	// Pre-increment recursion depth for use in recursive calls.
7110	++Depth;
7111	const SDNodeFlags Flags = Op ->getFlags();
7112	const TargetOptions &Options = DAG.getTarget().Options;
7113	EVT VT = Op.getValueType();
7114	unsigned Opcode = Op.getOpcode();
7115
7116	// Don't allow anything with multiple uses unless we know it is free.
7117	if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7118	bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7119	isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: `0`).getValueType());
7120	if (!IsFreeExtend)
7121	return SDValue ();
7122	}
7123
7124	auto RemoveDeadNode = [&](SDValue N) {
7125	if (N && N.getNode()->use_empty())
7126	DAG.RemoveDeadNode(N: N.getNode());
7127	};
7128
7129	SDLoc DL(Op);
7130
7131	// Because getNegatedExpression can delete nodes we need a handle to keep
7132	// temporary nodes alive in case the recursion manages to create an identical
7133	// node.
7134	std::list<HandleSDNode> Handles;
7135
7136	switch (Opcode) {
7137	case ISD::ConstantFP: {
7138	// Don't invert constant FP values after legalization unless the target says
7139	// the negated constant is legal.
7140	bool IsOpLegal =
7141	isOperationLegal(Op: ISD::ConstantFP, VT) \|\|
7142	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7143	ForCodeSize: OptForSize);
7144
7145	if (LegalOps && !IsOpLegal)
7146	break;
7147
7148	APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7149	V.changeSign();
7150	SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7151
7152	// If we already have the use of the negated floating constant, it is free
7153	// to negate it even it has multiple uses.
7154	if (!Op.hasOneUse() && CFP.use_empty())
7155	break;
7156	Cost = NegatibleCost::Neutral;
7157	return CFP;
7158	}
7159	case ISD::BUILD_VECTOR: {
7160	// Only permit BUILD_VECTOR of constants.
7161	if (llvm::any_of(Range: Op ->op_values(), P: [&](SDValue N) {
7162	return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7163	}))
7164	break;
7165
7166	bool IsOpLegal =
7167	(isOperationLegal(Op: ISD::ConstantFP, VT) &&
7168	isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) \|\|
7169	llvm::all_of(Range: Op ->op_values(), P: [&](SDValue N) {
7170	return N.isUndef() \|\|
7171	isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7172	ForCodeSize: OptForSize);
7173	});
7174
7175	if (LegalOps && !IsOpLegal)
7176	break;
7177
7178	SmallVector<SDValue, `4`> Ops;
7179	for (SDValue C : Op ->op_values()) {
7180	if (C.isUndef()) {
7181	Ops.push_back(Elt: C);
7182	continue;
7183	}
7184	APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7185	V.changeSign();
7186	Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7187	}
7188	Cost = NegatibleCost::Neutral;
7189	return DAG.getBuildVector(VT, DL, Ops);
7190	}
7191	case ISD::FADD: {
7192	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7193	break;
7194
7195	// After operation legalization, it might not be legal to create new FSUBs.
7196	if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7197	break;
7198	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7199
7200	// fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7201	NegatibleCost CostX = NegatibleCost::Expensive;
7202	SDValue NegX =
7203	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7204	// Prevent this node from being deleted by the next call.
7205	if (NegX)
7206	Handles.emplace_back(args&: NegX);
7207
7208	// fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7209	NegatibleCost CostY = NegatibleCost::Expensive;
7210	SDValue NegY =
7211	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7212
7213	// We're done with the handles.
7214	Handles.clear();
7215
7216	// Negate the X if its cost is less or equal than Y.
7217	if (NegX && (CostX <= CostY)) {
7218	Cost = CostX;
7219	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7220	if (NegY != N)
7221	RemoveDeadNode (NegY);
7222	return N;
7223	}
7224
7225	// Negate the Y if it is not expensive.
7226	if (NegY) {
7227	Cost = CostY;
7228	SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7229	if (NegX != N)
7230	RemoveDeadNode (NegX);
7231	return N;
7232	}
7233	break;
7234	}
7235	case ISD::FSUB: {
7236	// We can't turn -(A-B) into B-A when we honor signed zeros.
7237	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7238	break;
7239
7240	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7241	// fold (fneg (fsub 0, Y)) -> Y
7242	if (ConstantFPSDNode C = isConstOrConstSplatFP(N: X, /AllowUndefs/* true))
7243	if (C->isZero()) {
7244	Cost = NegatibleCost::Cheaper;
7245	return Y;
7246	}
7247
7248	// fold (fneg (fsub X, Y)) -> (fsub Y, X)
7249	Cost = NegatibleCost::Neutral;
7250	return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7251	}
7252	case ISD::FMUL:
7253	case ISD::FDIV: {
7254	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`);
7255
7256	// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7257	NegatibleCost CostX = NegatibleCost::Expensive;
7258	SDValue NegX =
7259	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7260	// Prevent this node from being deleted by the next call.
7261	if (NegX)
7262	Handles.emplace_back(args&: NegX);
7263
7264	// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7265	NegatibleCost CostY = NegatibleCost::Expensive;
7266	SDValue NegY =
7267	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7268
7269	// We're done with the handles.
7270	Handles.clear();
7271
7272	// Negate the X if its cost is less or equal than Y.
7273	if (NegX && (CostX <= CostY)) {
7274	Cost = CostX;
7275	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7276	if (NegY != N)
7277	RemoveDeadNode (NegY);
7278	return N;
7279	}
7280
7281	// Ignore X 2.0 because that is expected to be canonicalized to X + X.*
7282	if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: `1`)))
7283	if (C->isExactlyValue(V: `2.0`) && Op.getOpcode() == ISD::FMUL)
7284	break;
7285
7286	// Negate the Y if it is not expensive.
7287	if (NegY) {
7288	Cost = CostY;
7289	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7290	if (NegX != N)
7291	RemoveDeadNode (NegX);
7292	return N;
7293	}
7294	break;
7295	}
7296	case ISD::FMA:
7297	case ISD::FMAD: {
7298	if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7299	break;
7300
7301	SDValue X = Op.getOperand(i: `0`), Y = Op.getOperand(i: `1`), Z = Op.getOperand(i: `2`);
7302	NegatibleCost CostZ = NegatibleCost::Expensive;
7303	SDValue NegZ =
7304	getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7305	// Give up if fail to negate the Z.
7306	if (!NegZ)
7307	break;
7308
7309	// Prevent this node from being deleted by the next two calls.
7310	Handles.emplace_back(args&: NegZ);
7311
7312	// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7313	NegatibleCost CostX = NegatibleCost::Expensive;
7314	SDValue NegX =
7315	getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7316	// Prevent this node from being deleted by the next call.
7317	if (NegX)
7318	Handles.emplace_back(args&: NegX);
7319
7320	// fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7321	NegatibleCost CostY = NegatibleCost::Expensive;
7322	SDValue NegY =
7323	getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7324
7325	// We're done with the handles.
7326	Handles.clear();
7327
7328	// Negate the X if its cost is less or equal than Y.
7329	if (NegX && (CostX <= CostY)) {
7330	Cost = std::min(a: CostX, b: CostZ);
7331	SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7332	if (NegY != N)
7333	RemoveDeadNode (NegY);
7334	return N;
7335	}
7336
7337	// Negate the Y if it is not expensive.
7338	if (NegY) {
7339	Cost = std::min(a: CostY, b: CostZ);
7340	SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7341	if (NegX != N)
7342	RemoveDeadNode (NegX);
7343	return N;
7344	}
7345	break;
7346	}
7347
7348	case ISD::FP_EXTEND:
7349	case ISD::FSIN:
7350	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7351	OptForSize, Cost, Depth))
7352	return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7353	break;
7354	case ISD::FP_ROUND:
7355	if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: `0`), DAG, LegalOps,
7356	OptForSize, Cost, Depth))
7357	return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: `1`));
7358	break;
7359	case ISD::SELECT:
7360	case ISD::VSELECT: {
7361	// fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7362	// iff at least one cost is cheaper and the other is neutral/cheaper
7363	SDValue LHS = Op.getOperand(i: `1`);
7364	NegatibleCost CostLHS = NegatibleCost::Expensive;
7365	SDValue NegLHS =
7366	getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7367	if (!NegLHS \|\| CostLHS > NegatibleCost::Neutral) {
7368	RemoveDeadNode (NegLHS);
7369	break;
7370	}
7371
7372	// Prevent this node from being deleted by the next call.
7373	Handles.emplace_back(args&: NegLHS);
7374
7375	SDValue RHS = Op.getOperand(i: `2`);
7376	NegatibleCost CostRHS = NegatibleCost::Expensive;
7377	SDValue NegRHS =
7378	getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7379
7380	// We're done with the handles.
7381	Handles.clear();
7382
7383	if (!NegRHS \|\| CostRHS > NegatibleCost::Neutral \|\|
7384	(CostLHS != NegatibleCost::Cheaper &&
7385	CostRHS != NegatibleCost::Cheaper)) {
7386	RemoveDeadNode (NegLHS);
7387	RemoveDeadNode (NegRHS);
7388	break;
7389	}
7390
7391	Cost = std::min(a: CostLHS, b: CostRHS);
7392	return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: `0`), LHS: NegLHS, RHS: NegRHS);
7393	}
7394	}
7395
7396	return SDValue ();
7397	}
7398
7399	//===----------------------------------------------------------------------===//
7400	// Legalization Utilities
7401	//===----------------------------------------------------------------------===//
7402
7403	bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7404	SDValue LHS, SDValue RHS,
7405	SmallVectorImpl<SDValue> &Result,
7406	EVT HiLoVT, SelectionDAG &DAG,
7407	MulExpansionKind Kind, SDValue LL,
7408	SDValue LH, SDValue RL, SDValue RH) const {
7409	assert(Opcode == ISD::MUL \|\| Opcode == ISD::UMUL_LOHI \|\|
7410	Opcode == ISD::SMUL_LOHI);
7411
7412	bool HasMULHS = (Kind == MulExpansionKind::Always) \|\|
7413	isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7414	bool HasMULHU = (Kind == MulExpansionKind::Always) \|\|
7415	isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7416	bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7417	isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7418	bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) \|\|
7419	isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7420
7421	if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7422	return false;
7423
7424	unsigned OuterBitSize = VT.getScalarSizeInBits();
7425	unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7426
7427	// LL, LH, RL, and RH must be either all NULL or all set to a value.
7428	assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) \|\|
7429	(!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7430
7431	SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7432	auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7433	bool Signed) -> bool {
7434	if ((Signed && HasSMUL_LOHI) \|\| (!Signed && HasUMUL_LOHI)) {
7435	Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7436	Hi = SDValue (Lo.getNode(), `1`);
7437	return true;
7438	}
7439	if ((Signed && HasMULHS) \|\| (!Signed && HasMULHU)) {
7440	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7441	Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7442	return true;
7443	}
7444	return false;
7445	};
7446
7447	SDValue Lo, Hi;
7448
7449	if (!LL.getNode() && !RL.getNode() &&
7450	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7451	LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7452	RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7453	}
7454
7455	if (!LL.getNode())
7456	return false;
7457
7458	APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7459	if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7460	DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7461	// The inputs are both zero-extended.
7462	if (MakeMUL_LOHI (LL, RL, Lo, Hi, false)) {
7463	Result.push_back(Elt: Lo);
7464	Result.push_back(Elt: Hi);
7465	if (Opcode != ISD::MUL) {
7466	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7467	Result.push_back(Elt: Zero);
7468	Result.push_back(Elt: Zero);
7469	}
7470	return true;
7471	}
7472	}
7473
7474	if (!VT.isVector() && Opcode == ISD::MUL &&
7475	DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7476	DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7477	// The input values are both sign-extended.
7478	// TODO non-MUL case?
7479	if (MakeMUL_LOHI (LL, RL, Lo, Hi, true)) {
7480	Result.push_back(Elt: Lo);
7481	Result.push_back(Elt: Hi);
7482	return true;
7483	}
7484	}
7485
7486	unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7487	SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7488
7489	if (!LH.getNode() && !RH.getNode() &&
7490	isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7491	isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7492	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7493	LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7494	RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7495	RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7496	}
7497
7498	if (!LH.getNode())
7499	return false;
7500
7501	if (!MakeMUL_LOHI (LL, RL, Lo, Hi, false))
7502	return false;
7503
7504	Result.push_back(Elt: Lo);
7505
7506	if (Opcode == ISD::MUL) {
7507	RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7508	LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7509	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7510	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7511	Result.push_back(Elt: Hi);
7512	return true;
7513	}
7514
7515	// Compute the full width result.
7516	auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7517	Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7518	Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7519	Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7520	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7521	};
7522
7523	SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7524	if (!MakeMUL_LOHI (LL, RH, Lo, Hi, false))
7525	return false;
7526
7527	// This is effectively the add part of a multiply-add of half-sized operands,
7528	// so it cannot overflow.
7529	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7530
7531	if (!MakeMUL_LOHI (LH, RL, Lo, Hi, false))
7532	return false;
7533
7534	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7535	EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7536
7537	bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7538	isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7539	if (UseGlue)
7540	Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7541	Merge(Lo, Hi));
7542	else
7543	Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7544	N2: Merge (Lo, Hi), N3: DAG.getConstant(Val: `0`, DL: dl, VT: BoolType));
7545
7546	SDValue Carry = Next.getValue(R: `1`);
7547	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7548	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7549
7550	if (!MakeMUL_LOHI (LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7551	return false;
7552
7553	if (UseGlue)
7554	Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7555	Carry);
7556	else
7557	Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7558	N2: Zero, N3: Carry);
7559
7560	Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge (Lo, Hi));
7561
7562	if (Opcode == ISD::SMUL_LOHI) {
7563	SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7564	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7565	Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7566
7567	NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7568	N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7569	Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7570	}
7571
7572	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7573	Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7574	Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7575	return true;
7576	}
7577
7578	bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7579	SelectionDAG &DAG, MulExpansionKind Kind,
7580	SDValue LL, SDValue LH, SDValue RL,
7581	SDValue RH) const {
7582	SmallVector<SDValue, `2`> Result;
7583	bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: `0`), dl: SDLoc (N),
7584	LHS: N->getOperand(Num: `0`), RHS: N->getOperand(Num: `1`), Result, HiLoVT,
7585	DAG, Kind, LL, LH, RL, RH);
7586	if (Ok) {
7587	assert(Result.size() == `2`);
7588	Lo = Result [`0`];
7589	Hi = Result [`1`];
7590	}
7591	return Ok;
7592	}
7593
7594	// Optimize unsigned division or remainder by constants for types twice as large
7595	// as a legal VT.
7596	//
7597	// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7598	// can be computed
7599	// as:
7600	// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7601	// Remainder = Sum % Constant
7602	// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7603	//
7604	// For division, we can compute the remainder using the algorithm described
7605	// above, subtract it from the dividend to get an exact multiple of Constant.
7606	// Then multiply that extact multiply by the multiplicative inverse modulo
7607	// (1 << (BitWidth / 2)) to get the quotient.
7608
7609	// If Constant is even, we can shift right the dividend and the divisor by the
7610	// number of trailing zeros in Constant before applying the remainder algorithm.
7611	// If we're after the quotient, we can subtract this value from the shifted
7612	// dividend and multiply by the multiplicative inverse of the shifted divisor.
7613	// If we want the remainder, we shift the value left by the number of trailing
7614	// zeros and add the bits that were shifted out of the dividend.
7615	bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7616	SmallVectorImpl<SDValue> &Result,
7617	EVT HiLoVT, SelectionDAG &DAG,
7618	SDValue LL, SDValue LH) const {
7619	unsigned Opcode = N->getOpcode();
7620	EVT VT = N->getValueType(ResNo: `0`);
7621
7622	// TODO: Support signed division/remainder.
7623	if (Opcode == ISD::SREM \|\| Opcode == ISD::SDIV \|\| Opcode == ISD::SDIVREM)
7624	return false;
7625	assert(
7626	(Opcode == ISD::UREM \|\| Opcode == ISD::UDIV \|\| Opcode == ISD::UDIVREM) &&
7627	"Unexpected opcode");
7628
7629	auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
7630	if (!CN)
7631	return false;
7632
7633	APInt Divisor = CN->getAPIntValue();
7634	unsigned BitWidth = Divisor.getBitWidth();
7635	unsigned HBitWidth = BitWidth / `2`;
7636	assert(VT.getScalarSizeInBits() == BitWidth &&
7637	HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7638
7639	// Divisor needs to less than (1 << HBitWidth).
7640	APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7641	if (Divisor.uge(RHS: HalfMaxPlus1))
7642	return false;
7643
7644	// We depend on the UREM by constant optimization in DAGCombiner that requires
7645	// high multiply.
7646	if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
7647	!isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
7648	return false;
7649
7650	// Don't expand if optimizing for size.
7651	if (DAG.shouldOptForSize())
7652	return false;
7653
7654	// Early out for 0 or 1 divisors.
7655	if (Divisor.ule(RHS: `1`))
7656	return false;
7657
7658	// If the divisor is even, shift it until it becomes odd.
7659	unsigned TrailingZeros = `0`;
7660	if (!Divisor [`0`]) {
7661	TrailingZeros = Divisor.countr_zero();
7662	Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
7663	}
7664
7665	SDLoc dl(N);
7666	SDValue Sum;
7667	SDValue PartialRem;
7668
7669	// If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7670	// then add in the carry.
7671	// TODO: If we can't split it in half, we might be able to split into 3 or
7672	// more pieces using a smaller bit width.
7673	if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
7674	assert(!LL == !LH && "Expected both input halves or no input halves!");
7675	if (!LL)
7676	std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: `0`), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7677
7678	// Shift the input by the number of TrailingZeros in the divisor. The
7679	// shifted out bits will be added to the remainder later.
7680	if (TrailingZeros) {
7681	// Save the shifted off bits if we need the remainder.
7682	if (Opcode != ISD::UDIV) {
7683	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7684	PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
7685	N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
7686	}
7687
7688	LL = DAG.getNode(
7689	Opcode: ISD::OR, DL: dl, VT: HiLoVT,
7690	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
7691	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
7692	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
7693	N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
7694	VT: HiLoVT, DL: dl)));
7695	LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
7696	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7697	}
7698
7699	// Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7700	EVT SetCCType =
7701	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
7702	if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
7703	SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
7704	Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
7705	Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
7706	N2: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: `1`));
7707	} else {
7708	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
7709	SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
7710	// If the boolean for the target is 0 or 1, we can add the setcc result
7711	// directly.
7712	if (getBooleanContents(Type: HiLoVT) ==
7713	TargetLoweringBase::ZeroOrOneBooleanContent)
7714	Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
7715	else
7716	Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: `1`, DL: dl, VT: HiLoVT),
7717	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7718	Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
7719	}
7720	}
7721
7722	// If we didn't find a sum, we can't do the expansion.
7723	if (!Sum)
7724	return false;
7725
7726	// Perform a HiLoVT urem on the Sum using truncated divisor.
7727	SDValue RemL =
7728	DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
7729	N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
7730	SDValue RemH = DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT);
7731
7732	if (Opcode != ISD::UREM) {
7733	// Subtract the remainder from the shifted dividend.
7734	SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
7735	SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
7736
7737	Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
7738
7739	// Multiply by the multiplicative inverse of the divisor modulo
7740	// (1 << BitWidth).
7741	APInt Mod = APInt::getSignedMinValue(numBits: BitWidth + `1`);
7742	APInt MulFactor = Divisor.zext(width: BitWidth + `1`);
7743	MulFactor = MulFactor.multiplicativeInverse(modulo: Mod);
7744	MulFactor = MulFactor.trunc(width: BitWidth);
7745
7746	SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
7747	N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
7748
7749	// Split the quotient into low and high parts.
7750	SDValue QuotL, QuotH;
7751	std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7752	Result.push_back(Elt: QuotL);
7753	Result.push_back(Elt: QuotH);
7754	}
7755
7756	if (Opcode != ISD::UDIV) {
7757	// If we shifted the input, shift the remainder left and add the bits we
7758	// shifted off the input.
7759	if (TrailingZeros) {
7760	APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7761	RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
7762	N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7763	RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
7764	}
7765	Result.push_back(Elt: RemL);
7766	Result.push_back(Elt: DAG.getConstant(Val: `0`, DL: dl, VT: HiLoVT));
7767	}
7768
7769	return true;
7770	}
7771
7772	// Check that (every element of) Z is undef or not an exact multiple of BW.
7773	static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7774	return ISD::matchUnaryPredicate(
7775	Op: Z,
7776	Match: [=](ConstantSDNode C) { return* !C \|\| C->getAPIntValue().urem(RHS: BW) != `0`; },
7777	AllowUndefs: true);
7778	}
7779
7780	static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7781	EVT VT = Node->getValueType(ResNo: `0`);
7782	SDValue ShX, ShY;
7783	SDValue ShAmt, InvShAmt;
7784	SDValue X = Node->getOperand(Num: `0`);
7785	SDValue Y = Node->getOperand(Num: `1`);
7786	SDValue Z = Node->getOperand(Num: `2`);
7787	SDValue Mask = Node->getOperand(Num: `3`);
7788	SDValue VL = Node->getOperand(Num: `4`);
7789
7790	unsigned BW = VT.getScalarSizeInBits();
7791	bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7792	SDLoc DL(SDValue (Node, `0`));
7793
7794	EVT ShVT = Z.getValueType();
7795	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7796	// fshl: X << C \| Y >> (BW - C)
7797	// fshr: X << (BW - C) \| Y >> C
7798	// where C = Z % BW is not zero
7799	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7800	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7801	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
7802	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
7803	N4: VL);
7804	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
7805	N4: VL);
7806	} else {
7807	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
7808	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
7809	SDValue BitMask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
7810	if (isPowerOf2_32(Value: BW)) {
7811	// Z % BW -> Z & (BW - 1)
7812	ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
7813	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7814	SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
7815	N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
7816	InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
7817	} else {
7818	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7819	ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7820	InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
7821	}
7822
7823	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7824	if (IsFSHL) {
7825	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
7826	SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
7827	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
7828	} else {
7829	SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
7830	ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
7831	ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
7832	}
7833	}
7834	return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
7835	}
7836
7837	SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7838	SelectionDAG &DAG) const {
7839	if (Node->isVPOpcode())
7840	return expandVPFunnelShift(Node, DAG);
7841
7842	EVT VT = Node->getValueType(ResNo: `0`);
7843
7844	if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
7845	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
7846	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
7847	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
7848	return SDValue ();
7849
7850	SDValue X = Node->getOperand(Num: `0`);
7851	SDValue Y = Node->getOperand(Num: `1`);
7852	SDValue Z = Node->getOperand(Num: `2`);
7853
7854	unsigned BW = VT.getScalarSizeInBits();
7855	bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7856	SDLoc DL(SDValue (Node, `0`));
7857
7858	EVT ShVT = Z.getValueType();
7859
7860	// If a funnel shift in the other direction is more supported, use it.
7861	unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7862	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7863	isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
7864	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7865	// fshl X, Y, Z -> fshr X, Y, -Z
7866	// fshr X, Y, Z -> fshl X, Y, -Z
7867	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
7868	Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
7869	} else {
7870	// fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7871	// fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7872	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7873	if (IsFSHL) {
7874	Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7875	X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
7876	} else {
7877	X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7878	Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
7879	}
7880	Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
7881	}
7882	return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
7883	}
7884
7885	SDValue ShX, ShY;
7886	SDValue ShAmt, InvShAmt;
7887	if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7888	// fshl: X << C \| Y >> (BW - C)
7889	// fshr: X << (BW - C) \| Y >> C
7890	// where C = Z % BW is not zero
7891	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7892	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7893	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
7894	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
7895	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
7896	} else {
7897	// fshl: X << (Z % BW) \| Y >> 1 >> (BW - 1 - (Z % BW))
7898	// fshr: X << 1 << (BW - 1 - (Z % BW)) \| Y >> (Z % BW)
7899	SDValue Mask = DAG.getConstant(Val: BW - `1`, DL, VT: ShVT);
7900	if (isPowerOf2_32(Value: BW)) {
7901	// Z % BW -> Z & (BW - 1)
7902	ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
7903	// (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7904	InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
7905	} else {
7906	SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7907	ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7908	InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
7909	}
7910
7911	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7912	if (IsFSHL) {
7913	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
7914	SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
7915	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
7916	} else {
7917	SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
7918	ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
7919	ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
7920	}
7921	}
7922	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
7923	}
7924
7925	// TODO: Merge with expandFunnelShift.
7926	SDValue TargetLowering::expandROT(SDNode Node, bool* AllowVectorOps,
7927	SelectionDAG &DAG) const {
7928	EVT VT = Node->getValueType(ResNo: `0`);
7929	unsigned EltSizeInBits = VT.getScalarSizeInBits();
7930	bool IsLeft = Node->getOpcode() == ISD::ROTL;
7931	SDValue Op0 = Node->getOperand(Num: `0`);
7932	SDValue Op1 = Node->getOperand(Num: `1`);
7933	SDLoc DL(SDValue (Node, `0`));
7934
7935	EVT ShVT = Op1.getValueType();
7936	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: ShVT);
7937
7938	// If a rotate in the other direction is more supported, use it.
7939	unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7940	if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7941	isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
7942	SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7943	return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
7944	}
7945
7946	if (!AllowVectorOps && VT.isVector() &&
7947	(!isOperationLegalOrCustom(Op: ISD::SHL, VT) \|\|
7948	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
7949	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
7950	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) \|\|
7951	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
7952	return SDValue ();
7953
7954	unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7955	unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7956	SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - `1`, DL, VT: ShVT);
7957	SDValue ShVal;
7958	SDValue HsVal;
7959	if (isPowerOf2_32(Value: EltSizeInBits)) {
7960	// (rotl x, c) -> x << (c & (w - 1)) \| x >> (-c & (w - 1))
7961	// (rotr x, c) -> x >> (c & (w - 1)) \| x << (-c & (w - 1))
7962	SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7963	SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
7964	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
7965	SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
7966	HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
7967	} else {
7968	// (rotl x, c) -> x << (c % w) \| x >> 1 >> (w - 1 - (c % w))
7969	// (rotr x, c) -> x >> (c % w) \| x << 1 << (w - 1 - (c % w))
7970	SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
7971	SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
7972	ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
7973	SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
7974	SDValue One = DAG.getConstant(Val: `1`, DL, VT: ShVT);
7975	HsVal =
7976	DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
7977	}
7978	return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
7979	}
7980
7981	void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7982	SelectionDAG &DAG) const {
7983	assert(Node->getNumOperands() == `3` && "Not a double-shift!");
7984	EVT VT = Node->getValueType(ResNo: `0`);
7985	unsigned VTBits = VT.getScalarSizeInBits();
7986	assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7987
7988	bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7989	bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7990	SDValue ShOpLo = Node->getOperand(Num: `0`);
7991	SDValue ShOpHi = Node->getOperand(Num: `1`);
7992	SDValue ShAmt = Node->getOperand(Num: `2`);
7993	EVT ShAmtVT = ShAmt.getValueType();
7994	EVT ShAmtCCVT =
7995	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
7996	SDLoc dl(Node);
7997
7998	// ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7999	// ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8000	// away during isel.
8001	SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8002	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT));
8003	SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8004	N2: DAG.getConstant(Val: VTBits - `1`, DL: dl, VT: ShAmtVT))
8005	: DAG.getConstant(Val: `0`, DL: dl, VT);
8006
8007	SDValue Tmp2, Tmp3;
8008	if (IsSHL) {
8009	Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8010	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8011	} else {
8012	Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8013	Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8014	}
8015
8016	// If the shift amount is larger or equal than the width of a part we don't
8017	// use the result from the FSHL/FSHR. Insert a test and select the appropriate
8018	// values for large shift amounts.
8019	SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8020	N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8021	SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8022	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8023
8024	if (IsSHL) {
8025	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8026	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8027	} else {
8028	Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8029	Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8030	}
8031	}
8032
8033	bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8034	SelectionDAG &DAG) const {
8035	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8036	SDValue Src = Node->getOperand(Num: OpNo);
8037	EVT SrcVT = Src.getValueType();
8038	EVT DstVT = Node->getValueType(ResNo: `0`);
8039	SDLoc dl(SDValue (Node, `0`));
8040
8041	// FIXME: Only f32 to i64 conversions are supported.
8042	if (SrcVT != MVT::f32 \|\| DstVT != MVT::i64)
8043	return false;
8044
8045	if (Node->isStrictFPOpcode())
8046	// When a NaN is converted to an integer a trap is allowed. We can't
8047	// use this expansion here because it would eliminate that trap. Other
8048	// traps are also allowed and cannot be eliminated. See
8049	// IEEE 754-2008 sec 5.8.
8050	return false;
8051
8052	// Expand f32 -> i64 conversion
8053	// This algorithm comes from compiler-rt's implementation of fixsfdi:
8054	// https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8055	unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8056	EVT IntVT = SrcVT.changeTypeToInteger();
8057	EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8058
8059	SDValue ExponentMask = DAG.getConstant(Val: `0x7F800000`, DL: dl, VT: IntVT);
8060	SDValue ExponentLoBit = DAG.getConstant(Val: `23`, DL: dl, VT: IntVT);
8061	SDValue Bias = DAG.getConstant(Val: `127`, DL: dl, VT: IntVT);
8062	SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8063	SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - `1`, DL: dl, VT: IntVT);
8064	SDValue MantissaMask = DAG.getConstant(Val: `0x007FFFFF`, DL: dl, VT: IntVT);
8065
8066	SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8067
8068	SDValue ExponentBits = DAG.getNode(
8069	Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8070	N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8071	SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8072
8073	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8074	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8075	N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8076	Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8077
8078	SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8079	N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8080	N2: DAG.getConstant(Val: `0x00800000`, DL: dl, VT: IntVT));
8081
8082	R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8083
8084	R = DAG.getSelectCC(
8085	DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8086	True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8087	N2: DAG.getZExtOrTrunc(
8088	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8089	DL: dl, VT: IntShVT)),
8090	False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8091	N2: DAG.getZExtOrTrunc(
8092	Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8093	DL: dl, VT: IntShVT)),
8094	Cond: ISD::SETGT);
8095
8096	SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8097	N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8098
8099	Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: `0`, DL: dl, VT: IntVT),
8100	True: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8101	return true;
8102	}
8103
8104	bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8105	SDValue &Chain,
8106	SelectionDAG &DAG) const {
8107	SDLoc dl(SDValue (Node, `0`));
8108	unsigned OpNo = Node->isStrictFPOpcode() ? `1` : `0`;
8109	SDValue Src = Node->getOperand(Num: OpNo);
8110
8111	EVT SrcVT = Src.getValueType();
8112	EVT DstVT = Node->getValueType(ResNo: `0`);
8113	EVT SetCCVT =
8114	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8115	EVT DstSetCCVT =
8116	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8117
8118	// Only expand vector types if we have the appropriate vector bit operations.
8119	unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8120	ISD::FP_TO_SINT;
8121	if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) \|\|
8122	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8123	return false;
8124
8125	// If the maximum float value is smaller then the signed integer range,
8126	// the destination signmask can't be represented by the float, so we can
8127	// just use FP_TO_SINT directly.
8128	const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(VT: SrcVT);
8129	APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8130	APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8131	if (APFloat::opOverflow &
8132	APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8133	if (Node->isStrictFPOpcode()) {
8134	Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8135	{ Node->getOperand(`0`), Src });
8136	Chain = Result.getValue(R: `1`);
8137	} else
8138	Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8139	return true;
8140	}
8141
8142	// Don't expand it if there isn't cheap fsub instruction.
8143	if (!isOperationLegalOrCustom(
8144	Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8145	return false;
8146
8147	SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8148	SDValue Sel;
8149
8150	if (Node->isStrictFPOpcode()) {
8151	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8152	Chain: Node->getOperand(Num: `0`), /IsSignaling/ true);
8153	Chain = Sel.getValue(R: `1`);
8154	} else {
8155	Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8156	}
8157
8158	bool Strict = Node->isStrictFPOpcode() \|\|
8159	shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /IsSigned/ false);
8160
8161	if (Strict) {
8162	// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8163	// signmask then offset (the result of which should be fully representable).
8164	// Sel = Src < 0x8000000000000000
8165	// FltOfs = select Sel, 0, 0x8000000000000000
8166	// IntOfs = select Sel, 0, 0x8000000000000000
8167	// Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8168
8169	// TODO: Should any fast-math-flags be set for the FSUB?
8170	SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8171	LHS: DAG.getConstantFP(Val: `0.0`, DL: dl, VT: SrcVT), RHS: Cst);
8172	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8173	SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8174	LHS: DAG.getConstant(Val: `0`, DL: dl, VT: DstVT),
8175	RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8176	SDValue SInt;
8177	if (Node->isStrictFPOpcode()) {
8178	SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8179	{ Chain, Src, FltOfs });
8180	SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8181	{ Val.getValue(`1`), Val });
8182	Chain = SInt.getValue(R: `1`);
8183	} else {
8184	SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8185	SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8186	}
8187	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8188	} else {
8189	// Expand based on maximum range of FP_TO_SINT:
8190	// True = fp_to_sint(Src)
8191	// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8192	// Result = select (Src < 0x8000000000000000), True, False
8193
8194	SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8195	// TODO: Should any fast-math-flags be set for the FSUB?
8196	SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8197	Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8198	False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8199	N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8200	Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8201	Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8202	}
8203	return true;
8204	}
8205
8206	bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8207	SDValue &Chain,
8208	SelectionDAG &DAG) const {
8209	// This transform is not correct for converting 0 when rounding mode is set
8210	// to round toward negative infinity which will produce -0.0. So disable under
8211	// strictfp.
8212	if (Node->isStrictFPOpcode())
8213	return false;
8214
8215	SDValue Src = Node->getOperand(Num: `0`);
8216	EVT SrcVT = Src.getValueType();
8217	EVT DstVT = Node->getValueType(ResNo: `0`);
8218
8219	if (SrcVT.getScalarType() != MVT::i64 \|\| DstVT.getScalarType() != MVT::f64)
8220	return false;
8221
8222	// Only expand vector types if we have the appropriate vector bit operations.
8223	if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) \|\|
8224	!isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) \|\|
8225	!isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) \|\|
8226	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) \|\|
8227	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8228	return false;
8229
8230	SDLoc dl(SDValue (Node, `0`));
8231	EVT ShiftVT = getShiftAmountTy(LHSTy: SrcVT, DL: DAG.getDataLayout());
8232
8233	// Implementation of unsigned i64 to f64 following the algorithm in
8234	// __floatundidf in compiler_rt. This implementation performs rounding
8235	// correctly in all rounding modes with the exception of converting 0
8236	// when rounding toward negative infinity. In that case the fsub will produce
8237	// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8238	SDValue TwoP52 = DAG.getConstant(UINT64_C(`0x4330000000000000`), DL: dl, VT: SrcVT);
8239	SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8240	Val: llvm::bit_cast<double>(UINT64_C(`0x4530000000100000`)), DL: dl, VT: DstVT);
8241	SDValue TwoP84 = DAG.getConstant(UINT64_C(`0x4530000000000000`), DL: dl, VT: SrcVT);
8242	SDValue LoMask = DAG.getConstant(UINT64_C(`0x00000000FFFFFFFF`), DL: dl, VT: SrcVT);
8243	SDValue HiShift = DAG.getConstant(Val: `32`, DL: dl, VT: ShiftVT);
8244
8245	SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8246	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8247	SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8248	SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8249	SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8250	SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8251	SDValue HiSub =
8252	DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8253	Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8254	return true;
8255	}
8256
8257	SDValue
8258	TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8259	SelectionDAG &DAG) const {
8260	unsigned Opcode = Node->getOpcode();
8261	assert((Opcode == ISD::FMINNUM \|\| Opcode == ISD::FMAXNUM \|\|
8262	Opcode == ISD::STRICT_FMINNUM \|\| Opcode == ISD::STRICT_FMAXNUM) &&
8263	"Wrong opcode");
8264
8265	if (Node->getFlags().hasNoNaNs()) {
8266	ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8267	SDValue Op1 = Node->getOperand(Num: `0`);
8268	SDValue Op2 = Node->getOperand(Num: `1`);
8269	SDValue SelCC = DAG.getSelectCC(DL: SDLoc (Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8270	// Copy FMF flags, but always set the no-signed-zeros flag
8271	// as this is implied by the FMINNUM/FMAXNUM semantics.
8272	SDNodeFlags Flags = Node->getFlags();
8273	Flags.setNoSignedZeros(true);
8274	SelCC ->setFlags(Flags);
8275	return SelCC;
8276	}
8277
8278	return SDValue ();
8279	}
8280
8281	SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8282	SelectionDAG &DAG) const {
8283	SDLoc dl(Node);
8284	unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8285	ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8286	EVT VT = Node->getValueType(ResNo: `0`);
8287
8288	if (VT.isScalableVector())
8289	report_fatal_error(
8290	reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8291
8292	if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8293	SDValue Quiet0 = Node->getOperand(Num: `0`);
8294	SDValue Quiet1 = Node->getOperand(Num: `1`);
8295
8296	if (!Node->getFlags().hasNoNaNs()) {
8297	// Insert canonicalizes if it's possible we need to quiet to get correct
8298	// sNaN behavior.
8299	if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8300	Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8301	Flags: Node->getFlags());
8302	}
8303	if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8304	Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8305	Flags: Node->getFlags());
8306	}
8307	}
8308
8309	return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8310	}
8311
8312	// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8313	// instead if there are no NaNs and there can't be an incompatible zero
8314	// compare: at least one operand isn't +/-0, or there are no signed-zeros.
8315	if ((Node->getFlags().hasNoNaNs() \|\|
8316	(DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `0`)) &&
8317	DAG.isKnownNeverNaN(Op: Node->getOperand(Num: `1`)))) &&
8318	(Node->getFlags().hasNoSignedZeros() \|\|
8319	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `0`)) \|\|
8320	DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: `1`)))) {
8321	unsigned IEEE2018Op =
8322	Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8323	if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8324	return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: `0`),
8325	N2: Node->getOperand(Num: `1`), Flags: Node->getFlags());
8326	}
8327
8328	if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8329	return SelCC;
8330
8331	return SDValue ();
8332	}
8333
8334	/// Returns a true value if if this FPClassTest can be performed with an ordered
8335	/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8336	/// std::nullopt if it cannot be performed as a compare with 0.
8337	static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8338	const fltSemantics &Semantics,
8339	const MachineFunction &MF) {
8340	FPClassTest OrderedMask = Test & ~fcNan;
8341	FPClassTest NanTest = Test & fcNan;
8342	bool IsOrdered = NanTest == fcNone;
8343	bool IsUnordered = NanTest == fcNan;
8344
8345	// Skip cases that are testing for only a qnan or snan.
8346	if (!IsOrdered && !IsUnordered)
8347	return std::nullopt;
8348
8349	if (OrderedMask == fcZero &&
8350	MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8351	return IsOrdered;
8352	if (OrderedMask == (fcZero \| fcSubnormal) &&
8353	MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8354	return IsOrdered;
8355	return std::nullopt;
8356	}
8357
8358	SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8359	FPClassTest Test, SDNodeFlags Flags,
8360	const SDLoc &DL,
8361	SelectionDAG &DAG) const {
8362	EVT OperandVT = Op.getValueType();
8363	assert(OperandVT.isFloatingPoint());
8364
8365	// Degenerated cases.
8366	if (Test == fcNone)
8367	return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8368	if ((Test & fcAllFlags) == fcAllFlags)
8369	return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8370
8371	// PPC double double is a pair of doubles, of which the higher part determines
8372	// the value class.
8373	if (OperandVT == MVT::ppcf128) {
8374	Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8375	DAG.getConstant(`1`, DL, MVT::i32));
8376	OperandVT = MVT::f64;
8377	}
8378
8379	// Some checks may be represented as inversion of simpler check, for example
8380	// "inf\|normal\|subnormal\|zero" => !"nan".
8381	bool IsInverted = false;
8382	if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8383	IsInverted = true;
8384	Test = InvertedCheck;
8385	}
8386
8387	// Floating-point type properties.
8388	EVT ScalarFloatVT = OperandVT.getScalarType();
8389	const Type FloatTy = ScalarFloatVT.getTypeForEVT(Context&: DAG.getContext());
8390	const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8391	bool IsF80 = (ScalarFloatVT == MVT::f80);
8392
8393	// Some checks can be implemented using float comparisons, if floating point
8394	// exceptions are ignored.
8395	if (Flags.hasNoFPExcept() &&
8396	isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8397	ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8398	ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8399
8400	if (std::optional<bool> IsCmp0 =
8401	isFCmpEqualZero(Test, Semantics, MF: DAG.getMachineFunction());
8402	IsCmp0 && (isCondCodeLegalOrCustom(
8403	CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8404	VT: OperandVT.getScalarType().getSimpleVT()))) {
8405
8406	// If denormals could be implicitly treated as 0, this is not equivalent
8407	// to a compare with 0 since it will also be true for denormals.
8408	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8409	RHS: DAG.getConstantFP(Val: `0.0`, DL, VT: OperandVT),
8410	Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8411	}
8412
8413	if (Test == fcNan &&
8414	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETO : ISD::SETUO,
8415	VT: OperandVT.getScalarType().getSimpleVT())) {
8416	return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8417	Cond: IsInverted ? ISD::SETO : ISD::SETUO);
8418	}
8419
8420	if (Test == fcInf &&
8421	isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8422	VT: OperandVT.getScalarType().getSimpleVT()) &&
8423	isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType())) {
8424	// isinf(x) --> fabs(x) == inf
8425	SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8426	SDValue Inf =
8427	DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8428	return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8429	Cond: IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8430	}
8431	}
8432
8433	// In the general case use integer operations.
8434	unsigned BitSize = OperandVT.getScalarSizeInBits();
8435	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
8436	if (OperandVT.isVector())
8437	IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
8438	EC: OperandVT.getVectorElementCount());
8439	SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
8440
8441	// Various masks.
8442	APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8443	APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8444	APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8445	const unsigned ExplicitIntBitInF80 = `63`;
8446	APInt ExpMask = Inf;
8447	if (IsF80)
8448	ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
8449	APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8450	APInt QNaNBitMask =
8451	APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - `1`);
8452	APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
8453
8454	SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
8455	SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
8456	SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
8457	SDValue ZeroV = DAG.getConstant(Val: `0`, DL, VT: IntVT);
8458	SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
8459	SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
8460
8461	SDValue Res;
8462	const auto appendResult = [&](SDValue PartialRes) {
8463	if (PartialRes) {
8464	if (Res)
8465	Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
8466	else
8467	Res = PartialRes;
8468	}
8469	};
8470
8471	SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8472	const auto getIntBitIsSet = [&]() -> SDValue {
8473	if (!IntBitIsSetV) {
8474	APInt IntBitMask(BitSize, `0`);
8475	IntBitMask.setBit(ExplicitIntBitInF80);
8476	SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
8477	SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
8478	IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
8479	}
8480	return IntBitIsSetV;
8481	};
8482
8483	// Split the value into sign bit and absolute value.
8484	SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
8485	SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
8486	RHS: DAG.getConstant(Val: `0.0`, DL, VT: IntVT), Cond: ISD::SETLT);
8487
8488	// Tests that involve more than one class should be processed first.
8489	SDValue PartialRes;
8490
8491	if (IsF80)
8492	; // Detect finite numbers of f80 by checking individual classes because
8493	// they have different settings of the explicit integer bit.
8494	else if ((Test & fcFinite) == fcFinite) {
8495	// finite(V) ==> abs(V) < exp_mask
8496	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8497	Test &= ~fcFinite;
8498	} else if ((Test & fcFinite) == fcPosFinite) {
8499	// finite(V) && V > 0 ==> V < exp_mask
8500	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
8501	Test &= ~fcPosFinite;
8502	} else if ((Test & fcFinite) == fcNegFinite) {
8503	// finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8504	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8505	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8506	Test &= ~fcNegFinite;
8507	}
8508	appendResult (PartialRes);
8509
8510	if (FPClassTest PartialCheck = Test & (fcZero \| fcSubnormal)) {
8511	// fcZero \| fcSubnormal => test all exponent bits are 0
8512	// TODO: Handle sign bit specific cases
8513	if (PartialCheck == (fcZero \| fcSubnormal)) {
8514	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
8515	SDValue ExpIsZero =
8516	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8517	appendResult (ExpIsZero);
8518	Test &= ~PartialCheck & fcAllFlags;
8519	}
8520	}
8521
8522	// Check for individual classes.
8523
8524	if (unsigned PartialCheck = Test & fcZero) {
8525	if (PartialCheck == fcPosZero)
8526	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
8527	else if (PartialCheck == fcZero)
8528	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
8529	else // ISD::fcNegZero
8530	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
8531	appendResult (PartialRes);
8532	}
8533
8534	if (unsigned PartialCheck = Test & fcSubnormal) {
8535	// issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8536	// issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8537	SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8538	SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
8539	SDValue VMinusOneV =
8540	DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: `1`, DL, VT: IntVT));
8541	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
8542	if (PartialCheck == fcNegSubnormal)
8543	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8544	appendResult (PartialRes);
8545	}
8546
8547	if (unsigned PartialCheck = Test & fcInf) {
8548	if (PartialCheck == fcPosInf)
8549	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
8550	else if (PartialCheck == fcInf)
8551	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
8552	else { // ISD::fcNegInf
8553	APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8554	SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
8555	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
8556	}
8557	appendResult (PartialRes);
8558	}
8559
8560	if (unsigned PartialCheck = Test & fcNan) {
8561	APInt InfWithQnanBit = Inf \| QNaNBitMask;
8562	SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
8563	if (PartialCheck == fcNan) {
8564	// isnan(V) ==> abs(V) > int(inf)
8565	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8566	if (IsF80) {
8567	// Recognize unsupported values as NaNs for compatibility with glibc.
8568	// In them (exp(V)==0) == int_bit.
8569	SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
8570	SDValue ExpIsZero =
8571	DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8572	SDValue IsPseudo =
8573	DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet (), RHS: ExpIsZero, Cond: ISD::SETEQ);
8574	PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
8575	}
8576	} else if (PartialCheck == fcQNan) {
8577	// isquiet(V) ==> abs(V) >= (unsigned(Inf) \| quiet_bit)
8578	PartialRes =
8579	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
8580	} else { // ISD::fcSNan
8581	// issignaling(V) ==> abs(V) > unsigned(Inf) &&
8582	// abs(V) < (unsigned(Inf) \| quiet_bit)
8583	SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8584	SDValue IsNotQnan =
8585	DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
8586	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
8587	}
8588	appendResult (PartialRes);
8589	}
8590
8591	if (unsigned PartialCheck = Test & fcNormal) {
8592	// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8593	APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: `1`));
8594	SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
8595	SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
8596	APInt ExpLimit = ExpMask - ExpLSB;
8597	SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
8598	PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
8599	if (PartialCheck == fcNegNormal)
8600	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8601	else if (PartialCheck == fcPosNormal) {
8602	SDValue PosSignV =
8603	DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
8604	PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
8605	}
8606	if (IsF80)
8607	PartialRes =
8608	DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet ());
8609	appendResult (PartialRes);
8610	}
8611
8612	if (!Res)
8613	return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
8614	if (IsInverted)
8615	Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
8616	return Res;
8617	}
8618
8619	// Only expand vector types if we have the appropriate vector bit operations.
8620	static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8621	assert(VT.isVector() && "Expected vector type");
8622	unsigned Len = VT.getScalarSizeInBits();
8623	return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
8624	TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
8625	TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8626	(Len == `8` \|\| TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
8627	TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
8628	}
8629
8630	SDValue TargetLowering::expandCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8631	SDLoc dl(Node);
8632	EVT VT = Node->getValueType(ResNo: `0`);
8633	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8634	SDValue Op = Node->getOperand(Num: `0`);
8635	unsigned Len = VT.getScalarSizeInBits();
8636	assert(VT.isInteger() && "CTPOP not implemented for this type.");
8637
8638	// TODO: Add support for irregular type lengths.
8639	if (!(Len <= `128` && Len % `8` == `0`))
8640	return SDValue ();
8641
8642	// Only expand vector types if we have the appropriate vector bit operations.
8643	if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
8644	return SDValue ();
8645
8646	// This is the "best" algorithm from
8647	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8648	SDValue Mask55 =
8649	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8650	SDValue Mask33 =
8651	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8652	SDValue Mask0F =
8653	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8654
8655	// v = v - ((v >> 1) & 0x55555555...)
8656	Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
8657	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8658	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8659	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT)),
8660	N2: Mask55));
8661	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8662	Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
8663	N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8664	N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8665	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT)),
8666	N2: Mask33));
8667	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8668	Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8669	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8670	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8671	N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT))),
8672	N2: Mask0F);
8673
8674	if (Len <= `8`)
8675	return Op;
8676
8677	// Avoid the multiply if we only have 2 bytes to add.
8678	// TODO: Only doing this for scalars because vectors weren't as obviously
8679	// improved.
8680	if (Len == `16` && !VT.isVector()) {
8681	// v = (v + (v >> 8)) & 0x00FF;
8682	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8683	N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8684	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8685	N2: DAG.getConstant(Val: `8`, DL: dl, VT: ShVT))),
8686	N2: DAG.getConstant(Val: `0xFF`, DL: dl, VT));
8687	}
8688
8689	// v = (v 0x01010101...) >> (Len - 8)*
8690	SDValue Mask01 =
8691	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8692	return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT,
8693	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01),
8694	N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT));
8695	}
8696
8697	SDValue TargetLowering::expandVPCTPOP(SDNode Node, SelectionDAG &DAG) const* {
8698	SDLoc dl(Node);
8699	EVT VT = Node->getValueType(ResNo: `0`);
8700	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8701	SDValue Op = Node->getOperand(Num: `0`);
8702	SDValue Mask = Node->getOperand(Num: `1`);
8703	SDValue VL = Node->getOperand(Num: `2`);
8704	unsigned Len = VT.getScalarSizeInBits();
8705	assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8706
8707	// TODO: Add support for irregular type lengths.
8708	if (!(Len <= `128` && Len % `8` == `0`))
8709	return SDValue ();
8710
8711	// This is same algorithm of expandCTPOP from
8712	// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8713	SDValue Mask55 =
8714	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x55`)), DL: dl, VT);
8715	SDValue Mask33 =
8716	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x33`)), DL: dl, VT);
8717	SDValue Mask0F =
8718	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x0F`)), DL: dl, VT);
8719
8720	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8721
8722	// v = v - ((v >> 1) & 0x55555555...)
8723	Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8724	N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8725	N2: DAG.getConstant(Val: `1`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8726	N2: Mask55, N3: Mask, N4: VL);
8727	Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
8728
8729	// v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8730	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
8731	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8732	N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8733	N2: DAG.getConstant(Val: `2`, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8734	N2: Mask33, N3: Mask, N4: VL);
8735	Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
8736
8737	// v = (v + (v >> 4)) & 0x0F0F0F0F...
8738	Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `4`, DL: dl, VT: ShVT),
8739	N3: Mask, N4: VL),
8740	Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
8741	Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
8742
8743	if (Len <= `8`)
8744	return Op;
8745
8746	// v = (v 0x01010101...) >> (Len - 8)*
8747	SDValue Mask01 =
8748	DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt (`8`, `0x01`)), DL: dl, VT);
8749	return DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT,
8750	N1: DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL),
8751	N2: DAG.getConstant(Val: Len - `8`, DL: dl, VT: ShVT), N3: Mask, N4: VL);
8752	}
8753
8754	SDValue TargetLowering::expandCTLZ(SDNode Node, SelectionDAG &DAG) const* {
8755	SDLoc dl(Node);
8756	EVT VT = Node->getValueType(ResNo: `0`);
8757	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8758	SDValue Op = Node->getOperand(Num: `0`);
8759	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8760
8761	// If the non-ZERO_UNDEF version is supported we can use that instead.
8762	if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8763	isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
8764	return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
8765
8766	// If the ZERO_UNDEF version is supported use that and handle the zero case.
8767	if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
8768	EVT SetCCVT =
8769	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8770	SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8771	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8772	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8773	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8774	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
8775	}
8776
8777	// Only expand vector types if we have the appropriate vector bit operations.
8778	// This includes the operations needed to expand CTPOP if it isn't supported.
8779	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
8780	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8781	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
8782	!isOperationLegalOrCustom(Op: ISD::SRL, VT) \|\|
8783	!isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8784	return SDValue ();
8785
8786	// for now, we do this:
8787	// x = x \| (x >> 1);
8788	// x = x \| (x >> 2);
8789	// ...
8790	// x = x \| (x >>16);
8791	// x = x \| (x >>32); // for 64-bit input
8792	// return popcount(~x);
8793	//
8794	// Ref: "Hacker's Delight" by Henry Warren
8795	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
8796	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
8797	Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
8798	N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
8799	}
8800	Op = DAG.getNOT(DL: dl, Val: Op, VT);
8801	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
8802	}
8803
8804	SDValue TargetLowering::expandVPCTLZ(SDNode Node, SelectionDAG &DAG) const* {
8805	SDLoc dl(Node);
8806	EVT VT = Node->getValueType(ResNo: `0`);
8807	EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8808	SDValue Op = Node->getOperand(Num: `0`);
8809	SDValue Mask = Node->getOperand(Num: `1`);
8810	SDValue VL = Node->getOperand(Num: `2`);
8811	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8812
8813	// do this:
8814	// x = x \| (x >> 1);
8815	// x = x \| (x >> 2);
8816	// ...
8817	// x = x \| (x >>16);
8818	// x = x \| (x >>32); // for 64-bit input
8819	// return popcount(~x);
8820	for (unsigned i = `0`; (`1U` << i) < NumBitsPerElt; ++i) {
8821	SDValue Tmp = DAG.getConstant(Val: `1ULL` << i, DL: dl, VT: ShVT);
8822	Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
8823	N2: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
8824	N4: VL);
8825	}
8826	Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask,
8827	N4: VL);
8828	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
8829	}
8830
8831	SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8832	const SDLoc &DL, EVT VT, SDValue Op,
8833	unsigned BitWidth) const {
8834	if (BitWidth != `32` && BitWidth != `64`)
8835	return SDValue ();
8836	APInt DeBruijn = BitWidth == `32` ? APInt (`32`, `0x077CB531U`)
8837	: APInt (`64`, `0x0218A392CD3D5DBFULL`);
8838	const DataLayout &TD = DAG.getDataLayout();
8839	MachinePointerInfo PtrInfo =
8840	MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
8841	unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
8842	SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: `0`, DL, VT), N2: Op);
8843	SDValue Lookup = DAG.getNode(
8844	Opcode: ISD::SRL, DL, VT,
8845	N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
8846	N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
8847	N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
8848	Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
8849
8850	SmallVector<uint8_t> Table(BitWidth, `0`);
8851	for (unsigned i = `0`; i < BitWidth; i++) {
8852	APInt Shl = DeBruijn.shl(shiftAmt: i);
8853	APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
8854	Table [Lshr.getZExtValue()] = i;
8855	}
8856
8857	// Create a ConstantArray in Constant Pool
8858	auto CA = ConstantDataArray::get(Context&: DAG.getContext(), Elts&: Table);
8859	SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
8860	Align: TD.getPrefTypeAlign(Ty: CA->getType()));
8861	SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8862	DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8863	PtrInfo, MVT::i8);
8864	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8865	return ExtLoad;
8866
8867	EVT SetCCVT =
8868	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8869	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
8870	SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8871	return DAG.getSelect(DL, VT, Cond: SrcIsZero,
8872	LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
8873	}
8874
8875	SDValue TargetLowering::expandCTTZ(SDNode Node, SelectionDAG &DAG) const* {
8876	SDLoc dl(Node);
8877	EVT VT = Node->getValueType(ResNo: `0`);
8878	SDValue Op = Node->getOperand(Num: `0`);
8879	unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8880
8881	// If the non-ZERO_UNDEF version is supported we can use that instead.
8882	if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8883	isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
8884	return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
8885
8886	// If the ZERO_UNDEF version is supported use that and handle the zero case.
8887	if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
8888	EVT SetCCVT =
8889	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8890	SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8891	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8892	SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8893	return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8894	LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
8895	}
8896
8897	// Only expand vector types if we have the appropriate vector bit operations.
8898	// This includes the operations needed to expand CTPOP if it isn't supported.
8899	if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) \|\|
8900	(!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8901	!isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
8902	!canExpandVectorCTPOP(TLI: *this, VT)) \|\|
8903	!isOperationLegalOrCustom(Op: ISD::SUB, VT) \|\|
8904	!isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) \|\|
8905	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8906	return SDValue ();
8907
8908	// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8909	if (!VT.isVector() && isOperationExpand(Op: ISD::CTPOP, VT) &&
8910	!isOperationLegal(Op: ISD::CTLZ, VT))
8911	if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
8912	return V;
8913
8914	// for now, we use: { return popcount(~x & (x - 1)); }
8915	// unless the target has ctlz but not ctpop, in which case we use:
8916	// { return 32 - nlz(~x & (x-1)); }
8917	// Ref: "Hacker's Delight" by Henry Warren
8918	SDValue Tmp = DAG.getNode(
8919	Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
8920	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `1`, DL: dl, VT)));
8921
8922	// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8923	if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
8924	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
8925	N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
8926	}
8927
8928	return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
8929	}
8930
8931	SDValue TargetLowering::expandVPCTTZ(SDNode Node, SelectionDAG &DAG) const* {
8932	SDValue Op = Node->getOperand(Num: `0`);
8933	SDValue Mask = Node->getOperand(Num: `1`);
8934	SDValue VL = Node->getOperand(Num: `2`);
8935	SDLoc dl(Node);
8936	EVT VT = Node->getValueType(ResNo: `0`);
8937
8938	// Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8939	SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
8940	N2: DAG.getConstant(Val: -`1`, DL: dl, VT), N3: Mask, N4: VL);
8941	SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
8942	N2: DAG.getConstant(Val: `1`, DL: dl, VT), N3: Mask, N4: VL);
8943	SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
8944	return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
8945	}
8946
8947	SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
8948	bool IsNegative) const {
8949	SDLoc dl(N);
8950	EVT VT = N->getValueType(ResNo: `0`);
8951	SDValue Op = N->getOperand(Num: `0`);
8952
8953	// abs(x) -> smax(x,sub(0,x))
8954	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8955	isOperationLegal(Op: ISD::SMAX, VT)) {
8956	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8957	return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
8958	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8959	}
8960
8961	// abs(x) -> umin(x,sub(0,x))
8962	if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8963	isOperationLegal(Op: ISD::UMIN, VT)) {
8964	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8965	Op = DAG.getFreeze(V: Op);
8966	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
8967	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8968	}
8969
8970	// 0 - abs(x) -> smin(x, sub(0,x))
8971	if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8972	isOperationLegal(Op: ISD::SMIN, VT)) {
8973	Op = DAG.getFreeze(V: Op);
8974	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
8975	return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
8976	N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8977	}
8978
8979	// Only expand vector types if we have the appropriate vector operations.
8980	if (VT.isVector() &&
8981	(!isOperationLegalOrCustom(Op: ISD::SRA, VT) \|\|
8982	(!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) \|\|
8983	(IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) \|\|
8984	!isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8985	return SDValue ();
8986
8987	Op = DAG.getFreeze(V: Op);
8988	SDValue Shift = DAG.getNode(
8989	Opcode: ISD::SRA, DL: dl, VT, N1: Op,
8990	N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - `1`, VT, DL: dl));
8991	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
8992
8993	// abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
8994	if (!IsNegative)
8995	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
8996
8997	// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
8998	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
8999	}
9000
9001	SDValue TargetLowering::expandABD(SDNode N, SelectionDAG &DAG) const* {
9002	SDLoc dl(N);
9003	EVT VT = N->getValueType(ResNo: `0`);
9004	SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: `0`));
9005	SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: `1`));
9006	bool IsSigned = N->getOpcode() == ISD::ABDS;
9007
9008	// abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9009	// abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9010	unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9011	unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9012	if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9013	SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9014	SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9015	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9016	}
9017
9018	// abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9019	if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9020	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9021	N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9022	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9023
9024	// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9025	// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9026	EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9027	ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9028	SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9029	return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9030	RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9031	}
9032
9033	SDValue TargetLowering::expandBSWAP(SDNode N, SelectionDAG &DAG) const* {
9034	SDLoc dl(N);
9035	EVT VT = N->getValueType(ResNo: `0`);
9036	SDValue Op = N->getOperand(Num: `0`);
9037
9038	if (!VT.isSimple())
9039	return SDValue ();
9040
9041	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9042	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9043	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9044	default:
9045	return SDValue ();
9046	case MVT::i16:
9047	// Use a rotate by 8. This can be further expanded if necessary.
9048	return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9049	case MVT::i32:
9050	Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9051	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9052	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9053	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9054	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9055	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT));
9056	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9057	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9058	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9059	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9060	case MVT::i64:
9061	Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9062	Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9063	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9064	Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9065	Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9066	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9067	Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9068	Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9069	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9070	Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9071	Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT));
9072	Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9073	N2: DAG.getConstant(Val: `255ULL`<<`24`, DL: dl, VT));
9074	Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT));
9075	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9076	N2: DAG.getConstant(Val: `255ULL`<<`16`, DL: dl, VT));
9077	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT));
9078	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9079	N2: DAG.getConstant(Val: `255ULL`<<`8`, DL: dl, VT));
9080	Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT));
9081	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9082	Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9083	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9084	Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9085	Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9086	Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9087	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9088	}
9089	}
9090
9091	SDValue TargetLowering::expandVPBSWAP(SDNode N, SelectionDAG &DAG) const* {
9092	SDLoc dl(N);
9093	EVT VT = N->getValueType(ResNo: `0`);
9094	SDValue Op = N->getOperand(Num: `0`);
9095	SDValue Mask = N->getOperand(Num: `1`);
9096	SDValue EVL = N->getOperand(Num: `2`);
9097
9098	if (!VT.isSimple())
9099	return SDValue ();
9100
9101	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9102	SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9103	switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9104	default:
9105	return SDValue ();
9106	case MVT::i16:
9107	Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9108	N3: Mask, N4: EVL);
9109	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9110	N3: Mask, N4: EVL);
9111	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9112	case MVT::i32:
9113	Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9114	N3: Mask, N4: EVL);
9115	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT),
9116	N3: Mask, N4: EVL);
9117	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9118	N3: Mask, N4: EVL);
9119	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9120	N3: Mask, N4: EVL);
9121	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9122	N2: DAG.getConstant(Val: `0xFF00`, DL: dl, VT), N3: Mask, N4: EVL);
9123	Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9124	N3: Mask, N4: EVL);
9125	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9126	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9127	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9128	case MVT::i64:
9129	Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9130	N3: Mask, N4: EVL);
9131	Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9132	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9133	Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9134	N3: Mask, N4: EVL);
9135	Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9136	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9137	Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9138	N3: Mask, N4: EVL);
9139	Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9140	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9141	Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9142	N3: Mask, N4: EVL);
9143	Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `8`, DL: dl, VT: SHVT),
9144	N3: Mask, N4: EVL);
9145	Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9146	N2: DAG.getConstant(Val: `255ULL` << `24`, DL: dl, VT), N3: Mask, N4: EVL);
9147	Tmp3 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `24`, DL: dl, VT: SHVT),
9148	N3: Mask, N4: EVL);
9149	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9150	N2: DAG.getConstant(Val: `255ULL` << `16`, DL: dl, VT), N3: Mask, N4: EVL);
9151	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `40`, DL: dl, VT: SHVT),
9152	N3: Mask, N4: EVL);
9153	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9154	N2: DAG.getConstant(Val: `255ULL` << `8`, DL: dl, VT), N3: Mask, N4: EVL);
9155	Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: `56`, DL: dl, VT: SHVT),
9156	N3: Mask, N4: EVL);
9157	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9158	Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9159	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9160	Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9161	Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9162	Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9163	return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9164	}
9165	}
9166
9167	SDValue TargetLowering::expandBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9168	SDLoc dl(N);
9169	EVT VT = N->getValueType(ResNo: `0`);
9170	SDValue Op = N->getOperand(Num: `0`);
9171	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9172	unsigned Sz = VT.getScalarSizeInBits();
9173
9174	SDValue Tmp, Tmp2, Tmp3;
9175
9176	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9177	// and finally the i1 pairs.
9178	// TODO: We can easily support i4/i2 legal types if any target ever does.
9179	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9180	// Create the masks - repeating the pattern every byte.
9181	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9182	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9183	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9184
9185	// BSWAP if the type is wider than a single byte.
9186	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
9187
9188	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9189	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9190	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9191	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9192	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT));
9193	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9194
9195	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9196	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9197	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9198	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9199	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT));
9200	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9201
9202	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9203	Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9204	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9205	Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9206	Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT));
9207	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9208	return Tmp;
9209	}
9210
9211	Tmp = DAG.getConstant(Val: `0`, DL: dl, VT);
9212	for (unsigned I = `0`, J = Sz-`1`; I < Sz; ++I, --J) {
9213	if (I < J)
9214	Tmp2 =
9215	DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
9216	else
9217	Tmp2 =
9218	DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
9219
9220	APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
9221	Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
9222	Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
9223	}
9224
9225	return Tmp;
9226	}
9227
9228	SDValue TargetLowering::expandVPBITREVERSE(SDNode N, SelectionDAG &DAG) const* {
9229	assert(N->getOpcode() == ISD::VP_BITREVERSE);
9230
9231	SDLoc dl(N);
9232	EVT VT = N->getValueType(ResNo: `0`);
9233	SDValue Op = N->getOperand(Num: `0`);
9234	SDValue Mask = N->getOperand(Num: `1`);
9235	SDValue EVL = N->getOperand(Num: `2`);
9236	EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9237	unsigned Sz = VT.getScalarSizeInBits();
9238
9239	SDValue Tmp, Tmp2, Tmp3;
9240
9241	// If we can, perform BSWAP first and then the mask+swap the i4, then i2
9242	// and finally the i1 pairs.
9243	// TODO: We can easily support i4/i2 legal types if any target ever does.
9244	if (Sz >= `8` && isPowerOf2_32(Value: Sz)) {
9245	// Create the masks - repeating the pattern every byte.
9246	APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x0F`));
9247	APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x33`));
9248	APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt (`8`, `0x55`));
9249
9250	// BSWAP if the type is wider than a single byte.
9251	Tmp = (Sz > `8` ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
9252
9253	// swap i4: ((V >> 4) & 0x0F) \| ((V & 0x0F) << 4)
9254	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9255	N3: Mask, N4: EVL);
9256	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9257	N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
9258	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
9259	N3: Mask, N4: EVL);
9260	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `4`, DL: dl, VT: SHVT),
9261	N3: Mask, N4: EVL);
9262	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9263
9264	// swap i2: ((V >> 2) & 0x33) \| ((V & 0x33) << 2)
9265	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9266	N3: Mask, N4: EVL);
9267	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9268	N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
9269	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
9270	N3: Mask, N4: EVL);
9271	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `2`, DL: dl, VT: SHVT),
9272	N3: Mask, N4: EVL);
9273	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9274
9275	// swap i1: ((V >> 1) & 0x55) \| ((V & 0x55) << 1)
9276	Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9277	N3: Mask, N4: EVL);
9278	Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9279	N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
9280	Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
9281	N3: Mask, N4: EVL);
9282	Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: `1`, DL: dl, VT: SHVT),
9283	N3: Mask, N4: EVL);
9284	Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9285	return Tmp;
9286	}
9287	return SDValue ();
9288	}
9289
9290	std::pair<SDValue, SDValue>
9291	TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9292	SelectionDAG &DAG) const {
9293	SDLoc SL(LD);
9294	SDValue Chain = LD->getChain();
9295	SDValue BasePTR = LD->getBasePtr();
9296	EVT SrcVT = LD->getMemoryVT();
9297	EVT DstVT = LD->getValueType(ResNo: `0`);
9298	ISD::LoadExtType ExtType = LD->getExtensionType();
9299
9300	if (SrcVT.isScalableVector())
9301	report_fatal_error(reason: "Cannot scalarize scalable vector loads");
9302
9303	unsigned NumElem = SrcVT.getVectorNumElements();
9304
9305	EVT SrcEltVT = SrcVT.getScalarType();
9306	EVT DstEltVT = DstVT.getScalarType();
9307
9308	// A vector must always be stored in memory as-is, i.e. without any padding
9309	// between the elements, since various code depend on it, e.g. in the
9310	// handling of a bitcast of a vector type to int, which may be done with a
9311	// vector store followed by an integer load. A vector that does not have
9312	// elements that are byte-sized must therefore be stored as an integer
9313	// built out of the extracted vector elements.
9314	if (!SrcEltVT.isByteSized()) {
9315	unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9316	EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
9317
9318	unsigned NumSrcBits = SrcVT.getSizeInBits();
9319	EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
9320
9321	unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9322	SDValue SrcEltBitMask = DAG.getConstant(
9323	Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
9324
9325	// Load the whole vector and avoid masking off the top bits as it makes
9326	// the codegen worse.
9327	SDValue Load =
9328	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
9329	PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getOriginalAlign(),
9330	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9331
9332	SmallVector<SDValue, `8`> Vals;
9333	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9334	unsigned ShiftIntoIdx =
9335	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9336	SDValue ShiftAmount =
9337	DAG.getShiftAmountConstant(Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9338	VT: LoadVT, DL: SL, /LegalTypes=/false);
9339	SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
9340	SDValue Elt =
9341	DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
9342	SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
9343
9344	if (ExtType != ISD::NON_EXTLOAD) {
9345	unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
9346	Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
9347	}
9348
9349	Vals.push_back(Elt: Scalar);
9350	}
9351
9352	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9353	return std::make_pair(x&: Value, y: Load.getValue(R: `1`));
9354	}
9355
9356	unsigned Stride = SrcEltVT.getSizeInBits() / `8`;
9357	assert(SrcEltVT.isByteSized());
9358
9359	SmallVector<SDValue, `8`> Vals;
9360	SmallVector<SDValue, `8`> LoadChains;
9361
9362	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9363	SDValue ScalarLoad =
9364	DAG.getExtLoad(ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
9365	PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride),
9366	MemVT: SrcEltVT, Alignment: LD->getOriginalAlign(),
9367	MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9368
9369	BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
9370
9371	Vals.push_back(Elt: ScalarLoad.getValue(R: `0`));
9372	LoadChains.push_back(Elt: ScalarLoad.getValue(R: `1`));
9373	}
9374
9375	SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9376	SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9377
9378	return std::make_pair(x&: Value, y&: NewChain);
9379	}
9380
9381	SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9382	SelectionDAG &DAG) const {
9383	SDLoc SL(ST);
9384
9385	SDValue Chain = ST->getChain();
9386	SDValue BasePtr = ST->getBasePtr();
9387	SDValue Value = ST->getValue();
9388	EVT StVT = ST->getMemoryVT();
9389
9390	if (StVT.isScalableVector())
9391	report_fatal_error(reason: "Cannot scalarize scalable vector stores");
9392
9393	// The type of the data we want to save
9394	EVT RegVT = Value.getValueType();
9395	EVT RegSclVT = RegVT.getScalarType();
9396
9397	// The type of data as saved in memory.
9398	EVT MemSclVT = StVT.getScalarType();
9399
9400	unsigned NumElem = StVT.getVectorNumElements();
9401
9402	// A vector must always be stored in memory as-is, i.e. without any padding
9403	// between the elements, since various code depend on it, e.g. in the
9404	// handling of a bitcast of a vector type to int, which may be done with a
9405	// vector store followed by an integer load. A vector that does not have
9406	// elements that are byte-sized must therefore be stored as an integer
9407	// built out of the extracted vector elements.
9408	if (!MemSclVT.isByteSized()) {
9409	unsigned NumBits = StVT.getSizeInBits();
9410	EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
9411
9412	SDValue CurrVal = DAG.getConstant(Val: `0`, DL: SL, VT: IntVT);
9413
9414	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9415	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9416	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9417	SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
9418	SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
9419	unsigned ShiftIntoIdx =
9420	(DAG.getDataLayout().isBigEndian() ? (NumElem - `1`) - Idx : Idx);
9421	SDValue ShiftAmount =
9422	DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
9423	SDValue ShiftedElt =
9424	DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
9425	CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
9426	}
9427
9428	return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
9429	Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9430	AAInfo: ST->getAAInfo());
9431	}
9432
9433	// Store Stride in bytes
9434	unsigned Stride = MemSclVT.getSizeInBits() / `8`;
9435	assert(Stride && "Zero stride!");
9436	// Extract each of the elements from the original vector and save them into
9437	// memory individually.
9438	SmallVector<SDValue, `8`> Stores;
9439	for (unsigned Idx = `0`; Idx < NumElem; ++Idx) {
9440	SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9441	N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9442
9443	SDValue Ptr =
9444	DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
9445
9446	// This scalar TruncStore may be illegal, but we legalize it later.
9447	SDValue Store = DAG.getTruncStore(
9448	Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
9449	SVT: MemSclVT, Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9450	AAInfo: ST->getAAInfo());
9451
9452	Stores.push_back(Elt: Store);
9453	}
9454
9455	return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9456	}
9457
9458	std::pair<SDValue, SDValue>
9459	TargetLowering::expandUnalignedLoad(LoadSDNode LD, SelectionDAG &DAG) const* {
9460	assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9461	"unaligned indexed loads not implemented!");
9462	SDValue Chain = LD->getChain();
9463	SDValue Ptr = LD->getBasePtr();
9464	EVT VT = LD->getValueType(ResNo: `0`);
9465	EVT LoadedVT = LD->getMemoryVT();
9466	SDLoc dl(LD);
9467	auto &MF = DAG.getMachineFunction();
9468
9469	if (VT.isFloatingPoint() \|\| VT.isVector()) {
9470	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
9471	if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
9472	if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
9473	LoadedVT.isVector()) {
9474	// Scalarize the load and let the individual components be handled.
9475	return scalarizeVectorLoad(LD, DAG);
9476	}
9477
9478	// Expand to a (misaligned) integer load of the same size,
9479	// then bitconvert to floating point or vector.
9480	SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
9481	MMO: LD->getMemOperand());
9482	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
9483	if (LoadedVT != VT)
9484	Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
9485	ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
9486
9487	return std::make_pair(x&: Result, y: newLoad.getValue(R: `1`));
9488	}
9489
9490	// Copy the value to a (aligned) stack slot using (unaligned) integer
9491	// loads and stores, then do a (aligned) load from the stack slot.
9492	MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
9493	unsigned LoadedBytes = LoadedVT.getStoreSize();
9494	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9495	unsigned NumRegs = (LoadedBytes + RegBytes - `1`) / RegBytes;
9496
9497	// Make sure the stack slot is also aligned for the register type.
9498	SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
9499	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
9500	SmallVector<SDValue, `8`> Stores;
9501	SDValue StackPtr = StackBase;
9502	unsigned Offset = `0`;
9503
9504	EVT PtrVT = Ptr.getValueType();
9505	EVT StackPtrVT = StackPtr.getValueType();
9506
9507	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9508	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9509
9510	// Do all but one copies using the full register width.
9511	for (unsigned i = `1`; i < NumRegs; i++) {
9512	// Load one integer register's worth from the original location.
9513	SDValue Load = DAG.getLoad(
9514	VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
9515	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9516	AAInfo: LD->getAAInfo());
9517	// Follow the load with a store to the stack slot. Remember the store.
9518	Stores.push_back(Elt: DAG.getStore(
9519	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9520	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
9521	// Increment the pointers.
9522	Offset += RegBytes;
9523
9524	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9525	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9526	}
9527
9528	// The last copy may be partial. Do an extending load.
9529	EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
9530	BitWidth: `8` * (LoadedBytes - Offset));
9531	SDValue Load =
9532	DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
9533	PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT,
9534	Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9535	AAInfo: LD->getAAInfo());
9536	// Follow the load with a store to the stack slot. Remember the store.
9537	// On big-endian machines this requires a truncating store to ensure
9538	// that the bits end up in the right place.
9539	Stores.push_back(Elt: DAG.getTruncStore(
9540	Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr: StackPtr,
9541	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
9542
9543	// The order of the stores doesn't matter - say it with a TokenFactor.
9544	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9545
9546	// Finally, perform the original load only redirected to the stack slot.
9547	Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
9548	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`),
9549	MemVT: LoadedVT);
9550
9551	// Callers expect a MERGE_VALUES node.
9552	return std::make_pair(x&: Load, y&: TF);
9553	}
9554
9555	assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9556	"Unaligned load of unsupported type.");
9557
9558	// Compute the new VT that is half the size of the old one. This is an
9559	// integer MVT.
9560	unsigned NumBits = LoadedVT.getSizeInBits();
9561	EVT NewLoadedVT;
9562	NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/`2`);
9563	NumBits >>= `1`;
9564
9565	Align Alignment = LD->getOriginalAlign();
9566	unsigned IncrementSize = NumBits / `8`;
9567	ISD::LoadExtType HiExtType = LD->getExtensionType();
9568
9569	// If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9570	if (HiExtType == ISD::NON_EXTLOAD)
9571	HiExtType = ISD::ZEXTLOAD;
9572
9573	// Load the value in two parts
9574	SDValue Lo, Hi;
9575	if (DAG.getDataLayout().isLittleEndian()) {
9576	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9577	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9578	AAInfo: LD->getAAInfo());
9579
9580	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9581	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
9582	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9583	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9584	AAInfo: LD->getAAInfo());
9585	} else {
9586	Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9587	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9588	AAInfo: LD->getAAInfo());
9589
9590	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9591	Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9592	PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9593	MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9594	AAInfo: LD->getAAInfo());
9595	}
9596
9597	// aggregate the two parts
9598	SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
9599	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
9600	Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
9601
9602	SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(`1`),
9603	Hi.getValue(`1`));
9604
9605	return std::make_pair(x&: Result, y&: TF);
9606	}
9607
9608	SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9609	SelectionDAG &DAG) const {
9610	assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9611	"unaligned indexed stores not implemented!");
9612	SDValue Chain = ST->getChain();
9613	SDValue Ptr = ST->getBasePtr();
9614	SDValue Val = ST->getValue();
9615	EVT VT = Val.getValueType();
9616	Align Alignment = ST->getOriginalAlign();
9617	auto &MF = DAG.getMachineFunction();
9618	EVT StoreMemVT = ST->getMemoryVT();
9619
9620	SDLoc dl(ST);
9621	if (StoreMemVT.isFloatingPoint() \|\| StoreMemVT.isVector()) {
9622	EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
9623	if (isTypeLegal(VT: intVT)) {
9624	if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
9625	StoreMemVT.isVector()) {
9626	// Scalarize the store and let the individual components be handled.
9627	SDValue Result = scalarizeVectorStore(ST, DAG);
9628	return Result;
9629	}
9630	// Expand to a bitconvert of the value to the integer type of the
9631	// same size, then a (misaligned) int store.
9632	// FIXME: Does not handle truncating floating point stores!
9633	SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
9634	Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
9635	Alignment, MMOFlags: ST->getMemOperand()->getFlags());
9636	return Result;
9637	}
9638	// Do a (aligned) store to a stack slot, then copy from the stack slot
9639	// to the final destination using (unaligned) integer loads and stores.
9640	MVT RegVT = getRegisterType(
9641	Context&: *DAG.getContext(),
9642	VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
9643	EVT PtrVT = Ptr.getValueType();
9644	unsigned StoredBytes = StoreMemVT.getStoreSize();
9645	unsigned RegBytes = RegVT.getSizeInBits() / `8`;
9646	unsigned NumRegs = (StoredBytes + RegBytes - `1`) / RegBytes;
9647
9648	// Make sure the stack slot is also aligned for the register type.
9649	SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
9650	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
9651
9652	// Perform the original store, only redirected to the stack slot.
9653	SDValue Store = DAG.getTruncStore(
9654	Chain, dl, Val, Ptr: StackPtr,
9655	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: `0`), SVT: StoreMemVT);
9656
9657	EVT StackPtrVT = StackPtr.getValueType();
9658
9659	SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9660	SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9661	SmallVector<SDValue, `8`> Stores;
9662	unsigned Offset = `0`;
9663
9664	// Do all but one copies using the full register width.
9665	for (unsigned i = `1`; i < NumRegs; i++) {
9666	// Load one integer register's worth from the stack slot.
9667	SDValue Load = DAG.getLoad(
9668	VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
9669	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
9670	// Store it to the final location. Remember the store.
9671	Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
9672	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
9673	Alignment: ST->getOriginalAlign(),
9674	MMOFlags: ST->getMemOperand()->getFlags()));
9675	// Increment the pointers.
9676	Offset += RegBytes;
9677	StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9678	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9679	}
9680
9681	// The last store may be partial. Do a truncating store. On big-endian
9682	// machines this requires an extending load from the stack slot to ensure
9683	// that the bits are in the right place.
9684	EVT LoadMemVT =
9685	EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: `8` (StoredBytes - Offset));
9686
9687	// Load from the stack slot.
9688	SDValue Load = DAG.getExtLoad(
9689	ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
9690	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
9691
9692	Stores.push_back(
9693	Elt: DAG.getTruncStore(Chain: Load.getValue(R: `1`), dl, Val: Load, Ptr,
9694	PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
9695	Alignment: ST->getOriginalAlign(),
9696	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
9697	// The order of the stores doesn't matter - say it with a TokenFactor.
9698	SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9699	return Result;
9700	}
9701
9702	assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9703	"Unaligned store of unknown type.");
9704	// Get the half-size VT
9705	EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
9706	unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9707	unsigned IncrementSize = NumBits / `8`;
9708
9709	// Divide the stored value in two parts.
9710	SDValue ShiftAmount =
9711	DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
9712	SDValue Lo = Val;
9713	// If Val is a constant, replace the upper bits with 0. The SRL will constant
9714	// fold and not use the upper bits. A smaller constant may be easier to
9715	// materialize.
9716	if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
9717	Lo = DAG.getNode(
9718	Opcode: ISD::AND, DL: dl, VT, N1: Lo,
9719	N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
9720	VT));
9721	SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
9722
9723	// Store the two parts
9724	SDValue Store1, Store2;
9725	Store1 = DAG.getTruncStore(Chain, dl,
9726	Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9727	Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
9728	MMOFlags: ST->getMemOperand()->getFlags());
9729
9730	Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9731	Store2 = DAG.getTruncStore(
9732	Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9733	PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
9734	MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
9735
9736	SDValue Result =
9737	DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9738	return Result;
9739	}
9740
9741	SDValue
9742	TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9743	const SDLoc &DL, EVT DataVT,
9744	SelectionDAG &DAG,
9745	bool IsCompressedMemory) const {
9746	SDValue Increment;
9747	EVT AddrVT = Addr.getValueType();
9748	EVT MaskVT = Mask.getValueType();
9749	assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9750	"Incompatible types of Data and Mask");
9751	if (IsCompressedMemory) {
9752	if (DataVT.isScalableVector())
9753	report_fatal_error(
9754	reason: "Cannot currently handle compressed memory with scalable vectors");
9755	// Incrementing the pointer according to number of '1's in the mask.
9756	EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
9757	SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
9758	if (MaskIntVT.getSizeInBits() < `32`) {
9759	MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9760	MaskIntVT = MVT::i32;
9761	}
9762
9763	// Count '1's with POPCNT.
9764	Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
9765	Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
9766	// Scale is an element size in bytes.
9767	SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / `8`, DL,
9768	VT: AddrVT);
9769	Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
9770	} else if (DataVT.isScalableVector()) {
9771	Increment = DAG.getVScale(DL, VT: AddrVT,
9772	MulImm: APInt (AddrVT.getFixedSizeInBits(),
9773	DataVT.getStoreSize().getKnownMinValue()));
9774	} else
9775	Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
9776
9777	return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
9778	}
9779
9780	static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9781	EVT VecVT, const SDLoc &dl,
9782	ElementCount SubEC) {
9783	assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9784	"Cannot index a scalable vector within a fixed-width vector");
9785
9786	unsigned NElts = VecVT.getVectorMinNumElements();
9787	unsigned NumSubElts = SubEC.getKnownMinValue();
9788	EVT IdxVT = Idx.getValueType();
9789
9790	if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9791	// If this is a constant index and we know the value plus the number of the
9792	// elements in the subvector minus one is less than the minimum number of
9793	// elements then it's safe to return Idx.
9794	if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
9795	if (IdxCst->getZExtValue() + (NumSubElts - `1`) < NElts)
9796	return Idx;
9797	SDValue VS =
9798	DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getFixedSizeInBits(), NElts));
9799	unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9800	SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
9801	N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
9802	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
9803	}
9804	if (isPowerOf2_32(Value: NElts) && NumSubElts == `1`) {
9805	APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
9806	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
9807	N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
9808	}
9809	unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : `0`;
9810	return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
9811	N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
9812	}
9813
9814	SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9815	SDValue VecPtr, EVT VecVT,
9816	SDValue Index) const {
9817	return getVectorSubVecPointer(
9818	DAG, VecPtr, VecVT,
9819	SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: `1`),
9820	Index);
9821	}
9822
9823	SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9824	SDValue VecPtr, EVT VecVT,
9825	EVT SubVecVT,
9826	SDValue Index) const {
9827	SDLoc dl(Index);
9828	// Make sure the index type is big enough to compute in.
9829	Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
9830
9831	EVT EltVT = VecVT.getVectorElementType();
9832
9833	// Calculate the element offset and add it to the pointer.
9834	unsigned EltSize = EltVT.getFixedSizeInBits() / `8`; // FIXME: should be ABI size.
9835	assert(EltSize * `8` == EltVT.getFixedSizeInBits() &&
9836	"Converting bits to bytes lost precision");
9837	assert(SubVecVT.getVectorElementType() == EltVT &&
9838	"Sub-vector must be a vector with matching element type");
9839	Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
9840	SubEC: SubVecVT.getVectorElementCount());
9841
9842	EVT IdxVT = Index.getValueType();
9843	if (SubVecVT.isScalableVector())
9844	Index =
9845	DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9846	N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt (IdxVT.getSizeInBits(), `1`)));
9847
9848	Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9849	N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
9850	return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
9851	}
9852
9853	//===----------------------------------------------------------------------===//
9854	// Implementation of Emulated TLS Model
9855	//===----------------------------------------------------------------------===//
9856
9857	SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9858	SelectionDAG &DAG) const {
9859	// Access to address of TLS varialbe xyz is lowered to a function call:
9860	// __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9861	EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
9862	PointerType VoidPtrType = PointerType::get(C&: DAG.getContext(), AddressSpace: `0`);
9863	SDLoc dl(GA);
9864
9865	ArgListTy Args;
9866	ArgListEntry Entry;
9867	std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9868	Module VariableModule = const_cast<Module>(GA->getGlobal()->getParent());
9869	StringRef EmuTlsVarName(NameString);
9870	GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(Name: EmuTlsVarName);
9871	assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9872	Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
9873	Entry.Ty = VoidPtrType;
9874	Args.push_back(x: Entry);
9875
9876	SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
9877
9878	TargetLowering::CallLoweringInfo CLI(DAG);
9879	CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9880	CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
9881	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9882
9883	// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9884	// At last for X86 targets, maybe good for other targets too?
9885	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9886	MFI.setAdjustsStack(true); // Is this only for X86 target?
9887	MFI.setHasCalls(true);
9888
9889	assert((GA->getOffset() == `0`) &&
9890	"Emulated TLS must have zero offset in GlobalAddressSDNode");
9891	return CallResult.first;
9892	}
9893
9894	SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9895	SelectionDAG &DAG) const {
9896	assert((Op ->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9897	if (!isCtlzFast())
9898	return SDValue ();
9899	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
9900	SDLoc dl(Op);
9901	if (isNullConstant(V: Op.getOperand(i: `1`)) && CC == ISD::SETEQ) {
9902	EVT VT = Op.getOperand(i: `0`).getValueType();
9903	SDValue Zext = Op.getOperand(i: `0`);
9904	if (VT.bitsLT(MVT::i32)) {
9905	VT = MVT::i32;
9906	Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: `0`));
9907	}
9908	unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
9909	SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
9910	SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9911	DAG.getConstant(Log2b, dl, MVT::i32));
9912	return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9913	}
9914	return SDValue ();
9915	}
9916
9917	SDValue TargetLowering::expandIntMINMAX(SDNode Node, SelectionDAG &DAG) const* {
9918	SDValue Op0 = Node->getOperand(Num: `0`);
9919	SDValue Op1 = Node->getOperand(Num: `1`);
9920	EVT VT = Op0.getValueType();
9921	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9922	unsigned Opcode = Node->getOpcode();
9923	SDLoc DL(Node);
9924
9925	// umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9926	if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
9927	getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9928	Op0 = DAG.getFreeze(V: Op0);
9929	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT);
9930	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
9931	N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
9932	}
9933
9934	// umin(x,y) -> sub(x,usubsat(x,y))
9935	// TODO: Missing freeze(Op0)?
9936	if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
9937	isOperationLegal(Op: ISD::USUBSAT, VT)) {
9938	return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
9939	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
9940	}
9941
9942	// umax(x,y) -> add(x,usubsat(y,x))
9943	// TODO: Missing freeze(Op0)?
9944	if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
9945	isOperationLegal(Op: ISD::USUBSAT, VT)) {
9946	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
9947	N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
9948	}
9949
9950	// FIXME: Should really try to split the vector in case it's legal on a
9951	// subvector.
9952	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9953	return DAG.UnrollVectorOp(N: Node);
9954
9955	// Attempt to find an existing SETCC node that we can reuse.
9956	// TODO: Do we need a generic doesSETCCNodeExist?
9957	// TODO: Missing freeze(Op0)/freeze(Op1)?
9958	auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
9959	ISD::CondCode PrefCommuteCC,
9960	ISD::CondCode AltCommuteCC) {
9961	SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
9962	for (ISD::CondCode CC : {PrefCC, AltCC}) {
9963	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
9964	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
9965	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
9966	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
9967	}
9968	}
9969	for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
9970	if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
9971	Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
9972	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
9973	return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
9974	}
9975	}
9976	SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
9977	return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
9978	};
9979
9980	// Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
9981	// -> Y = (A < B) ? B : A
9982	// -> Y = (A >= B) ? A : B
9983	// -> Y = (A <= B) ? B : A
9984	switch (Opcode) {
9985	case ISD::SMAX:
9986	return buildMinMax (ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
9987	case ISD::SMIN:
9988	return buildMinMax (ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
9989	case ISD::UMAX:
9990	return buildMinMax (ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
9991	case ISD::UMIN:
9992	return buildMinMax (ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
9993	}
9994
9995	llvm_unreachable("How did we get here?");
9996	}
9997
9998	SDValue TargetLowering::expandAddSubSat(SDNode Node, SelectionDAG &DAG) const* {
9999	unsigned Opcode = Node->getOpcode();
10000	SDValue LHS = Node->getOperand(Num: `0`);
10001	SDValue RHS = Node->getOperand(Num: `1`);
10002	EVT VT = LHS.getValueType();
10003	SDLoc dl(Node);
10004
10005	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10006	assert(VT.isInteger() && "Expected operands to be integers");
10007
10008	// usub.sat(a, b) -> umax(a, b) - b
10009	if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10010	SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10011	return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10012	}
10013
10014	// uadd.sat(a, b) -> umin(a, ~b) + b
10015	if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10016	SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10017	SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10018	return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10019	}
10020
10021	unsigned OverflowOp;
10022	switch (Opcode) {
10023	case ISD::SADDSAT:
10024	OverflowOp = ISD::SADDO;
10025	break;
10026	case ISD::UADDSAT:
10027	OverflowOp = ISD::UADDO;
10028	break;
10029	case ISD::SSUBSAT:
10030	OverflowOp = ISD::SSUBO;
10031	break;
10032	case ISD::USUBSAT:
10033	OverflowOp = ISD::USUBO;
10034	break;
10035	default:
10036	llvm_unreachable("Expected method to receive signed or unsigned saturation "
10037	"addition or subtraction node.");
10038	}
10039
10040	// FIXME: Should really try to split the vector in case it's legal on a
10041	// subvector.
10042	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10043	return DAG.UnrollVectorOp(N: Node);
10044
10045	unsigned BitWidth = LHS.getScalarValueSizeInBits();
10046	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10047	SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10048	SDValue SumDiff = Result.getValue(R: `0`);
10049	SDValue Overflow = Result.getValue(R: `1`);
10050	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10051	SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10052
10053	if (Opcode == ISD::UADDSAT) {
10054	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10055	// (LHS + RHS) \| OverflowMask
10056	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10057	return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10058	}
10059	// Overflow ? 0xffff.... : (LHS + RHS)
10060	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10061	}
10062
10063	if (Opcode == ISD::USUBSAT) {
10064	if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10065	// (LHS - RHS) & ~OverflowMask
10066	SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10067	SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10068	return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10069	}
10070	// Overflow ? 0 : (LHS - RHS)
10071	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10072	}
10073
10074	if (Opcode == ISD::SADDSAT \|\| Opcode == ISD::SSUBSAT) {
10075	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10076	APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10077
10078	KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10079	KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10080
10081	// If either of the operand signs are known, then they are guaranteed to
10082	// only saturate in one direction. If non-negative they will saturate
10083	// towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10084	//
10085	// In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10086	// sign of 'y' has to be flipped.
10087
10088	bool LHSIsNonNegative = KnownLHS.isNonNegative();
10089	bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10090	: KnownRHS.isNegative();
10091	if (LHSIsNonNegative \|\| RHSIsNonNegative) {
10092	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10093	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10094	}
10095
10096	bool LHSIsNegative = KnownLHS.isNegative();
10097	bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10098	: KnownRHS.isNonNegative();
10099	if (LHSIsNegative \|\| RHSIsNegative) {
10100	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10101	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10102	}
10103	}
10104
10105	// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10106	APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10107	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10108	SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10109	N2: DAG.getConstant(Val: BitWidth - `1`, DL: dl, VT));
10110	Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10111	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10112	}
10113
10114	SDValue TargetLowering::expandShlSat(SDNode Node, SelectionDAG &DAG) const* {
10115	unsigned Opcode = Node->getOpcode();
10116	bool IsSigned = Opcode == ISD::SSHLSAT;
10117	SDValue LHS = Node->getOperand(Num: `0`);
10118	SDValue RHS = Node->getOperand(Num: `1`);
10119	EVT VT = LHS.getValueType();
10120	SDLoc dl(Node);
10121
10122	assert((Node->getOpcode() == ISD::SSHLSAT \|\|
10123	Node->getOpcode() == ISD::USHLSAT) &&
10124	"Expected a SHLSAT opcode");
10125	assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10126	assert(VT.isInteger() && "Expected operands to be integers");
10127
10128	if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10129	return DAG.UnrollVectorOp(N: Node);
10130
10131	// If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10132
10133	unsigned BW = VT.getScalarSizeInBits();
10134	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10135	SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10136	SDValue Orig =
10137	DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10138
10139	SDValue SatVal;
10140	if (IsSigned) {
10141	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10142	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10143	SDValue Cond =
10144	DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETLT);
10145	SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10146	} else {
10147	SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
10148	}
10149	SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
10150	return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
10151	}
10152
10153	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10154	bool Signed, EVT WideVT,
10155	const SDValue LL, const SDValue LH,
10156	const SDValue RL, const SDValue RH,
10157	SDValue &Lo, SDValue &Hi) const {
10158	// We can fall back to a libcall with an illegal type for the MUL if we
10159	// have a libcall big enough.
10160	// Also, we can fall back to a division in some cases, but that's a big
10161	// performance hit in the general case.
10162	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10163	if (WideVT == MVT::i16)
10164	LC = RTLIB::MUL_I16;
10165	else if (WideVT == MVT::i32)
10166	LC = RTLIB::MUL_I32;
10167	else if (WideVT == MVT::i64)
10168	LC = RTLIB::MUL_I64;
10169	else if (WideVT == MVT::i128)
10170	LC = RTLIB::MUL_I128;
10171
10172	if (LC == RTLIB::UNKNOWN_LIBCALL \|\| !getLibcallName(Call: LC)) {
10173	// We'll expand the multiplication by brute force because we have no other
10174	// options. This is a trivially-generalized version of the code from
10175	// Hacker's Delight (itself derived from Knuth's Algorithm M from section
10176	// 4.3.1).
10177	EVT VT = LL.getValueType();
10178	unsigned Bits = VT.getSizeInBits();
10179	unsigned HalfBits = Bits >> `1`;
10180	SDValue Mask =
10181	DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
10182	SDValue LLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LL, N2: Mask);
10183	SDValue RLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RL, N2: Mask);
10184
10185	SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLL);
10186	SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
10187
10188	SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
10189	SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
10190	SDValue LLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LL, N2: Shift);
10191	SDValue RLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RL, N2: Shift);
10192
10193	SDValue U = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10194	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLL), N2: TH);
10195	SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
10196	SDValue UH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: U, N2: Shift);
10197
10198	SDValue V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10199	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLH), N2: UL);
10200	SDValue VH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: Shift);
10201
10202	SDValue W =
10203	DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLH),
10204	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
10205	Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
10206	N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
10207
10208	Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: W,
10209	N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10210	N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RH, N2: LL),
10211	N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RL, N2: LH)));
10212	} else {
10213	// Attempt a libcall.
10214	SDValue Ret;
10215	TargetLowering::MakeLibCallOptions CallOptions;
10216	CallOptions.setSExt(Signed);
10217	CallOptions.setIsPostTypeLegalization(true);
10218	if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
10219	// Halves of WideVT are packed into registers in different order
10220	// depending on platform endianness. This is usually handled by
10221	// the C calling convention, but we can't defer to it in
10222	// the legalizer.
10223	SDValue Args[] = {LL, LH, RL, RH};
10224	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10225	} else {
10226	SDValue Args[] = {LH, LL, RH, RL};
10227	Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10228	}
10229	assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10230	"Ret value is a collection of constituent nodes holding result.");
10231	if (DAG.getDataLayout().isLittleEndian()) {
10232	// Same as above.
10233	Lo = Ret.getOperand(i: `0`);
10234	Hi = Ret.getOperand(i: `1`);
10235	} else {
10236	Lo = Ret.getOperand(i: `1`);
10237	Hi = Ret.getOperand(i: `0`);
10238	}
10239	}
10240	}
10241
10242	void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10243	bool Signed, const SDValue LHS,
10244	const SDValue RHS, SDValue &Lo,
10245	SDValue &Hi) const {
10246	EVT VT = LHS.getValueType();
10247	assert(RHS.getValueType() == VT && "Mismatching operand types");
10248
10249	SDValue HiLHS;
10250	SDValue HiRHS;
10251	if (Signed) {
10252	// The high part is obtained by SRA'ing all but one of the bits of low
10253	// part.
10254	unsigned LoSize = VT.getFixedSizeInBits();
10255	HiLHS = DAG.getNode(
10256	Opcode: ISD::SRA, DL: dl, VT, N1: LHS,
10257	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10258	HiRHS = DAG.getNode(
10259	Opcode: ISD::SRA, DL: dl, VT, N1: RHS,
10260	N2: DAG.getConstant(Val: LoSize - `1`, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10261	} else {
10262	HiLHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10263	HiRHS = DAG.getConstant(Val: `0`, DL: dl, VT);
10264	}
10265	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getSizeInBits() `2`);
10266	forceExpandWideMUL(DAG, dl, Signed, WideVT, LL: LHS, LH: HiLHS, RL: RHS, RH: HiRHS, Lo, Hi);
10267	}
10268
10269	SDValue
10270	TargetLowering::expandFixedPointMul(SDNode Node, SelectionDAG &DAG) const* {
10271	assert((Node->getOpcode() == ISD::SMULFIX \|\|
10272	Node->getOpcode() == ISD::UMULFIX \|\|
10273	Node->getOpcode() == ISD::SMULFIXSAT \|\|
10274	Node->getOpcode() == ISD::UMULFIXSAT) &&
10275	"Expected a fixed point multiplication opcode");
10276
10277	SDLoc dl(Node);
10278	SDValue LHS = Node->getOperand(Num: `0`);
10279	SDValue RHS = Node->getOperand(Num: `1`);
10280	EVT VT = LHS.getValueType();
10281	unsigned Scale = Node->getConstantOperandVal(Num: `2`);
10282	bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT \|\|
10283	Node->getOpcode() == ISD::UMULFIXSAT);
10284	bool Signed = (Node->getOpcode() == ISD::SMULFIX \|\|
10285	Node->getOpcode() == ISD::SMULFIXSAT);
10286	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10287	unsigned VTSize = VT.getScalarSizeInBits();
10288
10289	if (!Scale) {
10290	// [us]mul.fix(a, b, 0) -> mul(a, b)
10291	if (!Saturating) {
10292	if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
10293	return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10294	} else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
10295	SDValue Result =
10296	DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10297	SDValue Product = Result.getValue(R: `0`);
10298	SDValue Overflow = Result.getValue(R: `1`);
10299	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10300
10301	APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
10302	APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
10303	SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10304	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10305	// Xor the inputs, if resulting sign bit is 0 the product will be
10306	// positive, else negative.
10307	SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10308	SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
10309	Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
10310	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
10311	} else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
10312	SDValue Result =
10313	DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10314	SDValue Product = Result.getValue(R: `0`);
10315	SDValue Overflow = Result.getValue(R: `1`);
10316
10317	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10318	SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10319	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
10320	}
10321	}
10322
10323	assert(((Signed && Scale < VTSize) \|\| (!Signed && Scale <= VTSize)) &&
10324	"Expected scale to be less than the number of bits if signed or at "
10325	"most the number of bits if unsigned.");
10326	assert(LHS.getValueType() == RHS.getValueType() &&
10327	"Expected both operands to be the same type");
10328
10329	// Get the upper and lower bits of the result.
10330	SDValue Lo, Hi;
10331	unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10332	unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10333	if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
10334	SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
10335	Lo = Result.getValue(R: `0`);
10336	Hi = Result.getValue(R: `1`);
10337	} else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
10338	Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10339	Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
10340	} else if (VT.isVector()) {
10341	return SDValue ();
10342	} else {
10343	forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10344	}
10345
10346	if (Scale == VTSize)
10347	// Result is just the top half since we'd be shifting by the width of the
10348	// operand. Overflow impossible so this works for both UMULFIX and
10349	// UMULFIXSAT.
10350	return Hi;
10351
10352	// The result will need to be shifted right by the scale since both operands
10353	// are scaled. The result is given to us in 2 halves, so we only want part of
10354	// both in the result.
10355	SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
10356	N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
10357	if (!Saturating)
10358	return Result;
10359
10360	if (!Signed) {
10361	// Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10362	// widened multiplication) aren't all zeroes.
10363
10364	// Saturate to max if ((Hi >> Scale) != 0),
10365	// which is the same as if (Hi > ((1 << Scale) - 1))
10366	APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10367	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
10368	DL: dl, VT);
10369	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
10370	True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
10371	Cond: ISD::SETUGT);
10372
10373	return Result;
10374	}
10375
10376	// Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10377	// widened multiplication) aren't all ones or all zeroes.
10378
10379	SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
10380	SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
10381
10382	if (Scale == `0`) {
10383	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
10384	N2: DAG.getShiftAmountConstant(Val: VTSize - `1`, VT, DL: dl));
10385	SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
10386	// Saturated to SatMin if wide product is negative, and SatMax if wide
10387	// product is positive ...
10388	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10389	SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
10390	Cond: ISD::SETLT);
10391	// ... but only if we overflowed.
10392	return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
10393	}
10394
10395	// We handled Scale==0 above so all the bits to examine is in Hi.
10396
10397	// Saturate to max if ((Hi >> (Scale - 1)) > 0),
10398	// which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10399	SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - `1`),
10400	DL: dl, VT);
10401	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
10402	// Saturate to min if (Hi >> (Scale - 1)) < -1),
10403	// which is the same as if (HI < (-1 << (Scale - 1))
10404	SDValue HighMask =
10405	DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + `1`),
10406	DL: dl, VT);
10407	Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
10408	return Result;
10409	}
10410
10411	SDValue
10412	TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10413	SDValue LHS, SDValue RHS,
10414	unsigned Scale, SelectionDAG &DAG) const {
10415	assert((Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT \|\|
10416	Opcode == ISD::UDIVFIX \|\| Opcode == ISD::UDIVFIXSAT) &&
10417	"Expected a fixed point division opcode");
10418
10419	EVT VT = LHS.getValueType();
10420	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
10421	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
10422	EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10423
10424	// If there is enough room in the type to upscale the LHS or downscale the
10425	// RHS before the division, we can perform it in this type without having to
10426	// resize. For signed operations, the LHS headroom is the number of
10427	// redundant sign bits, and for unsigned ones it is the number of zeroes.
10428	// The headroom for the RHS is the number of trailing zeroes.
10429	unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - `1`
10430	: DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
10431	unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
10432
10433	// For signed saturating operations, we need to be able to detect true integer
10434	// division overflow; that is, when you have MIN / -EPS. However, this
10435	// is undefined behavior and if we emit divisions that could take such
10436	// values it may cause undesired behavior (arithmetic exceptions on x86, for
10437	// example).
10438	// Avoid this by requiring an extra bit so that we never get this case.
10439	// FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10440	// signed saturating division, we need to emit a whopping 32-bit division.
10441	if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10442	return SDValue ();
10443
10444	unsigned LHSShift = std::min(a: LHSLead, b: Scale);
10445	unsigned RHSShift = Scale - LHSShift;
10446
10447	// At this point, we know that if we shift the LHS up by LHSShift and the
10448	// RHS down by RHSShift, we can emit a regular division with a final scaling
10449	// factor of Scale.
10450
10451	if (LHSShift)
10452	LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
10453	N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
10454	if (RHSShift)
10455	RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
10456	N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
10457
10458	SDValue Quot;
10459	if (Signed) {
10460	// For signed operations, if the resulting quotient is negative and the
10461	// remainder is nonzero, subtract 1 from the quotient to round towards
10462	// negative infinity.
10463	SDValue Rem;
10464	// FIXME: Ideally we would always produce an SDIVREM here, but if the
10465	// type isn't legal, SDIVREM cannot be expanded. There is no reason why
10466	// we couldn't just form a libcall, but the type legalizer doesn't do it.
10467	if (isTypeLegal(VT) &&
10468	isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
10469	Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
10470	VTList: DAG.getVTList(VT1: VT, VT2: VT),
10471	N1: LHS, N2: RHS);
10472	Rem = Quot.getValue(R: `1`);
10473	Quot = Quot.getValue(R: `0`);
10474	} else {
10475	Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
10476	N1: LHS, N2: RHS);
10477	Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
10478	N1: LHS, N2: RHS);
10479	}
10480	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT);
10481	SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
10482	SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
10483	SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
10484	SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
10485	SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
10486	N2: DAG.getConstant(Val: `1`, DL: dl, VT));
10487	Quot = DAG.getSelect(DL: dl, VT,
10488	Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
10489	LHS: Sub1, RHS: Quot);
10490	} else
10491	Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
10492	N1: LHS, N2: RHS);
10493
10494	return Quot;
10495	}
10496
10497	void TargetLowering::expandUADDSUBO(
10498	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10499	SDLoc dl(Node);
10500	SDValue LHS = Node->getOperand(Num: `0`);
10501	SDValue RHS = Node->getOperand(Num: `1`);
10502	bool IsAdd = Node->getOpcode() == ISD::UADDO;
10503
10504	// If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10505	unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10506	if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: `0`))) {
10507	SDValue CarryIn = DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `1`));
10508	SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
10509	Ops: { LHS, RHS, CarryIn });
10510	Result = SDValue (NodeCarry.getNode(), `0`);
10511	Overflow = SDValue (NodeCarry.getNode(), `1`);
10512	return;
10513	}
10514
10515	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10516	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10517
10518	EVT ResultType = Node->getValueType(ResNo: `1`);
10519	EVT SetCCType = getSetCCResultType(
10520	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10521	SDValue SetCC;
10522	if (IsAdd && isOneConstant(V: RHS)) {
10523	// Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10524	// the live range of X. We assume comparing with 0 is cheap.
10525	// The general case (X + C) < C is not necessarily beneficial. Although we
10526	// reduce the live range of X, we may introduce the materialization of
10527	// constant C.
10528	SetCC =
10529	DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
10530	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETEQ);
10531	} else if (IsAdd && isAllOnesConstant(V: RHS)) {
10532	// Special case: uaddo X, -1 overflows if X != 0.
10533	SetCC =
10534	DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
10535	RHS: DAG.getConstant(Val: `0`, DL: dl, VT: Node->getValueType(ResNo: `0`)), Cond: ISD::SETNE);
10536	} else {
10537	ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10538	SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
10539	}
10540	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10541	}
10542
10543	void TargetLowering::expandSADDSUBO(
10544	SDNode Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const* {
10545	SDLoc dl(Node);
10546	SDValue LHS = Node->getOperand(Num: `0`);
10547	SDValue RHS = Node->getOperand(Num: `1`);
10548	bool IsAdd = Node->getOpcode() == ISD::SADDO;
10549
10550	Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10551	VT: LHS.getValueType(), N1: LHS, N2: RHS);
10552
10553	EVT ResultType = Node->getValueType(ResNo: `1`);
10554	EVT OType = getSetCCResultType(
10555	DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: `0`));
10556
10557	// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10558	unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10559	if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
10560	SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
10561	SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
10562	Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10563	return;
10564	}
10565
10566	SDValue Zero = DAG.getConstant(Val: `0`, DL: dl, VT: LHS.getValueType());
10567
10568	// For an addition, the result should be less than one of the operands (LHS)
10569	// if and only if the other operand (RHS) is negative, otherwise there will
10570	// be overflow.
10571	// For a subtraction, the result should be less than one of the operands
10572	// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10573	// otherwise there will be overflow.
10574	SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
10575	SDValue ConditionRHS =
10576	DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
10577
10578	Overflow = DAG.getBoolExtOrTrunc(
10579	Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
10580	VT: ResultType, OpVT: ResultType);
10581	}
10582
10583	bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10584	SDValue &Overflow, SelectionDAG &DAG) const {
10585	SDLoc dl(Node);
10586	EVT VT = Node->getValueType(ResNo: `0`);
10587	EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10588	SDValue LHS = Node->getOperand(Num: `0`);
10589	SDValue RHS = Node->getOperand(Num: `1`);
10590	bool isSigned = Node->getOpcode() == ISD::SMULO;
10591
10592	// For power-of-two multiplications we can use a simpler shift expansion.
10593	if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
10594	const APInt &C = RHSC->getAPIntValue();
10595	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10596	if (C.isPowerOf2()) {
10597	// smulo(x, signed_min) is same as umulo(x, signed_min).
10598	bool UseArithShift = isSigned && !C.isMinSignedValue();
10599	SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
10600	Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
10601	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
10602	LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
10603	DL: dl, VT, N1: Result, N2: ShiftAmt),
10604	RHS: LHS, Cond: ISD::SETNE);
10605	return true;
10606	}
10607	}
10608
10609	EVT WideVT = EVT::getIntegerVT(Context&: DAG.getContext(), BitWidth: VT.getScalarSizeInBits() `2`);
10610	if (VT.isVector())
10611	WideVT =
10612	EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
10613
10614	SDValue BottomHalf;
10615	SDValue TopHalf;
10616	static const unsigned Ops[`2`][`3`] =
10617	{ { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10618	{ ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10619	if (isOperationLegalOrCustom(Op: Ops[isSigned][`0`], VT)) {
10620	BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10621	TopHalf = DAG.getNode(Opcode: Ops[isSigned][`0`], DL: dl, VT, N1: LHS, N2: RHS);
10622	} else if (isOperationLegalOrCustom(Op: Ops[isSigned][`1`], VT)) {
10623	BottomHalf = DAG.getNode(Opcode: Ops[isSigned][`1`], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
10624	N2: RHS);
10625	TopHalf = BottomHalf.getValue(R: `1`);
10626	} else if (isTypeLegal(VT: WideVT)) {
10627	LHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: LHS);
10628	RHS = DAG.getNode(Opcode: Ops[isSigned][`2`], DL: dl, VT: WideVT, Operand: RHS);
10629	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
10630	BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
10631	SDValue ShiftAmt =
10632	DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
10633	TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
10634	Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
10635	} else {
10636	if (VT.isVector())
10637	return false;
10638
10639	forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
10640	}
10641
10642	Result = BottomHalf;
10643	if (isSigned) {
10644	SDValue ShiftAmt = DAG.getShiftAmountConstant(
10645	Val: VT.getScalarSizeInBits() - `1`, VT: BottomHalf.getValueType(), DL: dl);
10646	SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
10647	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
10648	} else {
10649	Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
10650	RHS: DAG.getConstant(Val: `0`, DL: dl, VT), Cond: ISD::SETNE);
10651	}
10652
10653	// Truncate the result if SetCC returns a larger type than needed.
10654	EVT RType = Node->getValueType(ResNo: `1`);
10655	if (RType.bitsLT(VT: Overflow.getValueType()))
10656	Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
10657
10658	assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10659	"Unexpected result type for S/UMULO legalization");
10660	return true;
10661	}
10662
10663	SDValue TargetLowering::expandVecReduce(SDNode Node, SelectionDAG &DAG) const* {
10664	SDLoc dl(Node);
10665	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10666	SDValue Op = Node->getOperand(Num: `0`);
10667	EVT VT = Op.getValueType();
10668
10669	if (VT.isScalableVector())
10670	report_fatal_error(
10671	reason: "Expanding reductions for scalable vectors is undefined.");
10672
10673	// Try to use a shuffle reduction for power of two vectors.
10674	if (VT.isPow2VectorType()) {
10675	while (VT.getVectorNumElements() > `1`) {
10676	EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
10677	if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
10678	break;
10679
10680	SDValue Lo, Hi;
10681	std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
10682	Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi);
10683	VT = HalfVT;
10684	}
10685	}
10686
10687	EVT EltVT = VT.getVectorElementType();
10688	unsigned NumElts = VT.getVectorNumElements();
10689
10690	SmallVector<SDValue, `8`> Ops;
10691	DAG.ExtractVectorElements(Op, Args&: Ops, Start: `0`, Count: NumElts);
10692
10693	SDValue Res = Ops [`0`];
10694	for (unsigned i = `1`; i < NumElts; i++)
10695	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags: Node->getFlags());
10696
10697	// Result type may be wider than element type.
10698	if (EltVT != Node->getValueType(ResNo: `0`))
10699	Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: `0`), Operand: Res);
10700	return Res;
10701	}
10702
10703	SDValue TargetLowering::expandVecReduceSeq(SDNode Node, SelectionDAG &DAG) const* {
10704	SDLoc dl(Node);
10705	SDValue AccOp = Node->getOperand(Num: `0`);
10706	SDValue VecOp = Node->getOperand(Num: `1`);
10707	SDNodeFlags Flags = Node->getFlags();
10708
10709	EVT VT = VecOp.getValueType();
10710	EVT EltVT = VT.getVectorElementType();
10711
10712	if (VT.isScalableVector())
10713	report_fatal_error(
10714	reason: "Expanding reductions for scalable vectors is undefined.");
10715
10716	unsigned NumElts = VT.getVectorNumElements();
10717
10718	SmallVector<SDValue, `8`> Ops;
10719	DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: `0`, Count: NumElts);
10720
10721	unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10722
10723	SDValue Res = AccOp;
10724	for (unsigned i = `0`; i < NumElts; i++)
10725	Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops [i], Flags);
10726
10727	return Res;
10728	}
10729
10730	bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10731	SelectionDAG &DAG) const {
10732	EVT VT = Node->getValueType(ResNo: `0`);
10733	SDLoc dl(Node);
10734	bool isSigned = Node->getOpcode() == ISD::SREM;
10735	unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10736	unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10737	SDValue Dividend = Node->getOperand(Num: `0`);
10738	SDValue Divisor = Node->getOperand(Num: `1`);
10739	if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
10740	SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
10741	Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: `1`);
10742	return true;
10743	}
10744	if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
10745	// X % Y -> X-X/YY*
10746	SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
10747	SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
10748	Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
10749	return true;
10750	}
10751	return false;
10752	}
10753
10754	SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10755	SelectionDAG &DAG) const {
10756	bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10757	SDLoc dl(SDValue (Node, `0`));
10758	SDValue Src = Node->getOperand(Num: `0`);
10759
10760	// DstVT is the result type, while SatVT is the size to which we saturate
10761	EVT SrcVT = Src.getValueType();
10762	EVT DstVT = Node->getValueType(ResNo: `0`);
10763
10764	EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: `1`))->getVT();
10765	unsigned SatWidth = SatVT.getScalarSizeInBits();
10766	unsigned DstWidth = DstVT.getScalarSizeInBits();
10767	assert(SatWidth <= DstWidth &&
10768	"Expected saturation width smaller than result width");
10769
10770	// Determine minimum and maximum integer values and their corresponding
10771	// floating-point values.
10772	APInt MinInt, MaxInt;
10773	if (IsSigned) {
10774	MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
10775	MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
10776	} else {
10777	MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
10778	MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
10779	}
10780
10781	// We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10782	// libcall emission cannot handle this. Large result types will fail.
10783	if (SrcVT == MVT::f16 \|\| SrcVT == MVT::bf16) {
10784	Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10785	SrcVT = Src.getValueType();
10786	}
10787
10788	APFloat MinFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10789	APFloat MaxFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10790
10791	APFloat::opStatus MinStatus =
10792	MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
10793	APFloat::opStatus MaxStatus =
10794	MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
10795	bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10796	!(MaxStatus & APFloat::opStatus::opInexact);
10797
10798	SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
10799	SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
10800
10801	// If the integer bounds are exactly representable as floats and min/max are
10802	// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10803	// of comparisons and selects.
10804	bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
10805	isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
10806	if (AreExactFloatBounds && MinMaxLegal) {
10807	SDValue Clamped = Src;
10808
10809	// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10810	Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
10811	// Clamp by MaxFloat from above. NaN cannot occur.
10812	Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
10813	// Convert clamped value to integer.
10814	SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10815	DL: dl, VT: DstVT, Operand: Clamped);
10816
10817	// In the unsigned case we're done, because we mapped NaN to MinFloat,
10818	// which will cast to zero.
10819	if (!IsSigned)
10820	return FpToInt;
10821
10822	// Otherwise, select 0 if Src is NaN.
10823	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
10824	EVT SetCCVT =
10825	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10826	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10827	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
10828	}
10829
10830	SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
10831	SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
10832
10833	// Result of direct conversion. The assumption here is that the operation is
10834	// non-trapping and it's fine to apply it to an out-of-range value if we
10835	// select it away later.
10836	SDValue FpToInt =
10837	DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
10838
10839	SDValue Select = FpToInt;
10840
10841	EVT SetCCVT =
10842	getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10843
10844	// If Src ULT MinFloat, select MinInt. In particular, this also selects
10845	// MinInt if Src is NaN.
10846	SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
10847	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
10848	// If Src OGT MaxFloat, select MaxInt.
10849	SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
10850	Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
10851
10852	// In the unsigned case we are done, because we mapped NaN to MinInt, which
10853	// is already zero.
10854	if (!IsSigned)
10855	return Select;
10856
10857	// Otherwise, select 0 if Src is NaN.
10858	SDValue ZeroInt = DAG.getConstant(Val: `0`, DL: dl, VT: DstVT);
10859	SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10860	return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
10861	}
10862
10863	SDValue TargetLowering::expandVectorSplice(SDNode *Node,
10864	SelectionDAG &DAG) const {
10865	assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
10866	assert(Node->getValueType(`0`).isScalableVector() &&
10867	"Fixed length vector types expected to use SHUFFLE_VECTOR!");
10868
10869	EVT VT = Node->getValueType(ResNo: `0`);
10870	SDValue V1 = Node->getOperand(Num: `0`);
10871	SDValue V2 = Node->getOperand(Num: `1`);
10872	int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: `2`))->getSExtValue();
10873	SDLoc DL(Node);
10874
10875	// Expand through memory thusly:
10876	// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
10877	// Store V1, Ptr
10878	// Store V2, Ptr + sizeof(V1)
10879	// If (Imm < 0)
10880	// TrailingElts = -Imm
10881	// Ptr = Ptr + sizeof(V1) - (TrailingElts sizeof(VT.Elt))*
10882	// else
10883	// Ptr = Ptr + (Imm sizeof(VT.Elt))*
10884	// Res = Load Ptr
10885
10886	Align Alignment = DAG.getReducedAlign(VT, /UseABI=/false);
10887
10888	EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
10889	EC: VT.getVectorElementCount() * `2`);
10890	SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
10891	EVT PtrVT = StackPtr.getValueType();
10892	auto &MF = DAG.getMachineFunction();
10893	auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10894	auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
10895
10896	// Store the lo part of CONCAT_VECTORS(V1, V2)
10897	SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
10898	// Store the hi part of CONCAT_VECTORS(V1, V2)
10899	SDValue OffsetToV2 = DAG.getVScale(
10900	DL, VT: PtrVT,
10901	MulImm: APInt (PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
10902	SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
10903	SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
10904
10905	if (Imm >= `0`) {
10906	// Load back the required element. getVectorElementPointer takes care of
10907	// clamping the index if it's out-of-bounds.
10908	StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: `2`));
10909	// Load the spliced result
10910	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
10911	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
10912	}
10913
10914	uint64_t TrailingElts = -Imm;
10915
10916	// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
10917	TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
10918	SDValue TrailingBytes =
10919	DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
10920
10921	if (TrailingElts > VT.getVectorMinNumElements()) {
10922	SDValue VLBytes =
10923	DAG.getVScale(DL, VT: PtrVT,
10924	MulImm: APInt (PtrVT.getFixedSizeInBits(),
10925	VT.getStoreSize().getKnownMinValue()));
10926	TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
10927	}
10928
10929	// Calculate the start address of the spliced result.
10930	StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
10931
10932	// Load the spliced result
10933	return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
10934	PtrInfo: MachinePointerInfo::getUnknownStack(MF));
10935	}
10936
10937	bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
10938	SDValue &LHS, SDValue &RHS,
10939	SDValue &CC, SDValue Mask,
10940	SDValue EVL, bool &NeedInvert,
10941	const SDLoc &dl, SDValue &Chain,
10942	bool IsSignaling) const {
10943	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10944	MVT OpVT = LHS.getSimpleValueType();
10945	ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
10946	NeedInvert = false;
10947	assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
10948	bool IsNonVP = !EVL;
10949	switch (TLI.getCondCodeAction(CC: CCCode, VT: OpVT)) {
10950	default:
10951	llvm_unreachable("Unknown condition code action!");
10952	case TargetLowering::Legal:
10953	// Nothing to do.
10954	break;
10955	case TargetLowering::Expand: {
10956	ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
10957	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10958	std::swap(a&: LHS, b&: RHS);
10959	CC = DAG.getCondCode(Cond: InvCC);
10960	return true;
10961	}
10962	// Swapping operands didn't work. Try inverting the condition.
10963	bool NeedSwap = false;
10964	InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
10965	if (!TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10966	// If inverting the condition is not enough, try swapping operands
10967	// on top of it.
10968	InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
10969	NeedSwap = true;
10970	}
10971	if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10972	CC = DAG.getCondCode(Cond: InvCC);
10973	NeedInvert = true;
10974	if (NeedSwap)
10975	std::swap(a&: LHS, b&: RHS);
10976	return true;
10977	}
10978
10979	ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
10980	unsigned Opc = `0`;
10981	switch (CCCode) {
10982	default:
10983	llvm_unreachable("Don't know how to expand this condition!");
10984	case ISD::SETUO:
10985	if (TLI.isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
10986	CC1 = ISD::SETUNE;
10987	CC2 = ISD::SETUNE;
10988	Opc = ISD::OR;
10989	break;
10990	}
10991	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10992	"If SETUE is expanded, SETOEQ or SETUNE must be legal!");
10993	NeedInvert = true;
10994	[[fallthrough]];
10995	case ISD::SETO:
10996	assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10997	"If SETO is expanded, SETOEQ must be legal!");
10998	CC1 = ISD::SETOEQ;
10999	CC2 = ISD::SETOEQ;
11000	Opc = ISD::AND;
11001	break;
11002	case ISD::SETONE:
11003	case ISD::SETUEQ:
11004	// If the SETUO or SETO CC isn't legal, we might be able to use
11005	// SETOGT \|\| SETOLT, inverting the result for SETUEQ. We only need one
11006	// of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11007	// the operands.
11008	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11009	if (!TLI.isCondCodeLegal(CC: CC2, VT: OpVT) &&
11010	(TLI.isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) \|\|
11011	TLI.isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
11012	CC1 = ISD::SETOGT;
11013	CC2 = ISD::SETOLT;
11014	Opc = ISD::OR;
11015	NeedInvert = ((unsigned)CCCode & `0x8U`);
11016	break;
11017	}
11018	[[fallthrough]];
11019	case ISD::SETOEQ:
11020	case ISD::SETOGT:
11021	case ISD::SETOGE:
11022	case ISD::SETOLT:
11023	case ISD::SETOLE:
11024	case ISD::SETUNE:
11025	case ISD::SETUGT:
11026	case ISD::SETUGE:
11027	case ISD::SETULT:
11028	case ISD::SETULE:
11029	// If we are floating point, assign and break, otherwise fall through.
11030	if (!OpVT.isInteger()) {
11031	// We can use the 4th bit to tell if we are the unordered
11032	// or ordered version of the opcode.
11033	CC2 = ((unsigned)CCCode & `0x8U`) ? ISD::SETUO : ISD::SETO;
11034	Opc = ((unsigned)CCCode & `0x8U`) ? ISD::OR : ISD::AND;
11035	CC1 = (ISD::CondCode)(((int)CCCode & `0x7`) \| `0x10`);
11036	break;
11037	}
11038	// Fallthrough if we are unsigned integer.
11039	[[fallthrough]];
11040	case ISD::SETLE:
11041	case ISD::SETGT:
11042	case ISD::SETGE:
11043	case ISD::SETLT:
11044	case ISD::SETNE:
11045	case ISD::SETEQ:
11046	// If all combinations of inverting the condition and swapping operands
11047	// didn't work then we have no means to expand the condition.
11048	llvm_unreachable("Don't know how to expand this condition!");
11049	}
11050
11051	SDValue SetCC1, SetCC2;
11052	if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11053	// If we aren't the ordered or unorder operation,
11054	// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11055	if (IsNonVP) {
11056	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
11057	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
11058	} else {
11059	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
11060	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
11061	}
11062	} else {
11063	// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11064	if (IsNonVP) {
11065	SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
11066	SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
11067	} else {
11068	SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
11069	SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
11070	}
11071	}
11072	if (Chain)
11073	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(`1`),
11074	SetCC2.getValue(`1`));
11075	if (IsNonVP)
11076	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
11077	else {
11078	// Transform the binary opcode to the VP equivalent.
11079	assert((Opc == ISD::OR \|\| Opc == ISD::AND) && "Unexpected opcode");
11080	Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11081	LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
11082	}
11083	RHS = SDValue ();
11084	CC = SDValue ();
11085	return true;
11086	}
11087	}
11088	return false;
11089	}
11090

source code of llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp