InstCombineCalls.cpp source code [llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp]

1	//===- InstCombineCalls.cpp -----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "InstCombineInternal.h"
14	#include "llvm/ADT/APFloat.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/APSInt.h"
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallBitVector.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/Analysis/AliasAnalysis.h"
23	#include "llvm/Analysis/AssumeBundleQueries.h"
24	#include "llvm/Analysis/AssumptionCache.h"
25	#include "llvm/Analysis/InstructionSimplify.h"
26	#include "llvm/Analysis/Loads.h"
27	#include "llvm/Analysis/MemoryBuiltins.h"
28	#include "llvm/Analysis/ValueTracking.h"
29	#include "llvm/Analysis/VectorUtils.h"
30	#include "llvm/IR/AttributeMask.h"
31	#include "llvm/IR/Attributes.h"
32	#include "llvm/IR/BasicBlock.h"
33	#include "llvm/IR/Constant.h"
34	#include "llvm/IR/Constants.h"
35	#include "llvm/IR/DataLayout.h"
36	#include "llvm/IR/DebugInfo.h"
37	#include "llvm/IR/DerivedTypes.h"
38	#include "llvm/IR/Function.h"
39	#include "llvm/IR/GlobalVariable.h"
40	#include "llvm/IR/InlineAsm.h"
41	#include "llvm/IR/InstrTypes.h"
42	#include "llvm/IR/Instruction.h"
43	#include "llvm/IR/Instructions.h"
44	#include "llvm/IR/IntrinsicInst.h"
45	#include "llvm/IR/Intrinsics.h"
46	#include "llvm/IR/IntrinsicsAArch64.h"
47	#include "llvm/IR/IntrinsicsAMDGPU.h"
48	#include "llvm/IR/IntrinsicsARM.h"
49	#include "llvm/IR/IntrinsicsHexagon.h"
50	#include "llvm/IR/LLVMContext.h"
51	#include "llvm/IR/Metadata.h"
52	#include "llvm/IR/PatternMatch.h"
53	#include "llvm/IR/Statepoint.h"
54	#include "llvm/IR/Type.h"
55	#include "llvm/IR/User.h"
56	#include "llvm/IR/Value.h"
57	#include "llvm/IR/ValueHandle.h"
58	#include "llvm/Support/AtomicOrdering.h"
59	#include "llvm/Support/Casting.h"
60	#include "llvm/Support/CommandLine.h"
61	#include "llvm/Support/Compiler.h"
62	#include "llvm/Support/Debug.h"
63	#include "llvm/Support/ErrorHandling.h"
64	#include "llvm/Support/KnownBits.h"
65	#include "llvm/Support/MathExtras.h"
66	#include "llvm/Support/raw_ostream.h"
67	#include "llvm/Transforms/InstCombine/InstCombiner.h"
68	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
69	#include "llvm/Transforms/Utils/Local.h"
70	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
71	#include <algorithm>
72	#include <cassert>
73	#include <cstdint>
74	#include <optional>
75	#include <utility>
76	#include <vector>
77
78	#define DEBUG_TYPE "instcombine"
79	#include "llvm/Transforms/Utils/InstructionWorklist.h"
80
81	using namespace llvm;
82	using namespace PatternMatch;
83
84	STATISTIC(NumSimplified, "Number of library calls simplified");
85
86	static cl::opt<unsigned> GuardWideningWindow(
87	"instcombine-guard-widening-window",
88	cl::init(Val: `3`),
89	cl::desc ("How wide an instruction window to bypass looking for "
90	"another guard"));
91
92	/// Return the specified type promoted as it would be to pass though a va_arg
93	/// area.
94	static Type getPromotedType(Type Ty) {
95	if (IntegerType* ITy = dyn_cast<IntegerType>(Val: Ty)) {
96	if (ITy->getBitWidth() < `32`)
97	return Type::getInt32Ty(C&: Ty->getContext());
98	}
99	return Ty;
100	}
101
102	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
103	/// TODO: This should probably be integrated with visitAllocSites, but that
104	/// requires a deeper change to allow either unread or unwritten objects.
105	static bool hasUndefSource(AnyMemTransferInst *MI) {
106	auto *Src = MI->getRawSource();
107	while (isa<GetElementPtrInst>(Val: Src) \|\| isa<BitCastInst>(Val: Src)) {
108	if (!Src->hasOneUse())
109	return false;
110	Src = cast<Instruction>(Val: Src)->getOperand(i: `0`);
111	}
112	return isa<AllocaInst>(Val: Src) && Src->hasOneUse();
113	}
114
115	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
116	Align DstAlign = getKnownAlignment(V: MI->getRawDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
117	MaybeAlign CopyDstAlign = MI->getDestAlign();
118	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
119	MI->setDestAlignment(DstAlign);
120	return MI;
121	}
122
123	Align SrcAlign = getKnownAlignment(V: MI->getRawSource(), DL, CxtI: MI, AC: &AC, DT: &DT);
124	MaybeAlign CopySrcAlign = MI->getSourceAlign();
125	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
126	MI->setSourceAlignment(SrcAlign);
127	return MI;
128	}
129
130	// If we have a store to a location which is known constant, we can conclude
131	// that the store must be storing the constant value (else the memory
132	// wouldn't be constant), and this must be a noop.
133	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
134	// Set the size of the copy to 0, it will be deleted on the next iteration.
135	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
136	return MI;
137	}
138
139	// If the source is provably undef, the memcpy/memmove doesn't do anything
140	// (unless the transfer is volatile).
141	if (hasUndefSource(MI) && !MI->isVolatile()) {
142	// Set the size of the copy to 0, it will be deleted on the next iteration.
143	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
144	return MI;
145	}
146
147	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
148	// load/store.
149	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(Val: MI->getLength());
150	if (!MemOpLength) return nullptr;
151
152	// Source and destination pointer types are always "i8" for intrinsic. See*
153	// if the size is something we can handle with a single primitive load/store.
154	// A single load+store correctly handles overlapping memory in the memmove
155	// case.
156	uint64_t Size = MemOpLength->getLimitedValue();
157	assert(Size && "0-sized memory transferring should be removed already.");
158
159	if (Size > `8` \|\| (Size&(Size-`1`)))
160	return nullptr; // If not 1/2/4/8 bytes, exit.
161
162	// If it is an atomic and alignment is less than the size then we will
163	// introduce the unaligned memory access which will be later transformed
164	// into libcall in CodeGen. This is not evident performance gain so disable
165	// it now.
166	if (isa<AtomicMemTransferInst>(Val: MI))
167	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
168	return nullptr;
169
170	// Use an integer load+store unless we can find something better.
171	IntegerType* IntType = IntegerType::get(C&: MI->getContext(), NumBits: Size<<`3`);
172
173	// If the memcpy has metadata describing the members, see if we can get the
174	// TBAA tag describing our copy.
175	AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(AccessSize: Size);
176
177	Value *Src = MI->getArgOperand(i: `1`);
178	Value *Dest = MI->getArgOperand(i: `0`);
179	LoadInst *L = Builder.CreateLoad(Ty: IntType, Ptr: Src);
180	// Alignment from the mem intrinsic will be better, so use it.
181	L->setAlignment(*CopySrcAlign);
182	L->setAAMetadata(AACopyMD);
183	MDNode *LoopMemParallelMD =
184	MI->getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
185	if (LoopMemParallelMD)
186	L->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
187	MDNode *AccessGroupMD = MI->getMetadata(KindID: LLVMContext::MD_access_group);
188	if (AccessGroupMD)
189	L->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
190
191	StoreInst *S = Builder.CreateStore(Val: L, Ptr: Dest);
192	// Alignment from the mem intrinsic will be better, so use it.
193	S->setAlignment(*CopyDstAlign);
194	S->setAAMetadata(AACopyMD);
195	if (LoopMemParallelMD)
196	S->setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access, Node: LoopMemParallelMD);
197	if (AccessGroupMD)
198	S->setMetadata(KindID: LLVMContext::MD_access_group, Node: AccessGroupMD);
199	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
200
201	if (auto *MT = dyn_cast<MemTransferInst>(Val: MI)) {
202	// non-atomics can be volatile
203	L->setVolatile(MT->isVolatile());
204	S->setVolatile(MT->isVolatile());
205	}
206	if (isa<AtomicMemTransferInst>(Val: MI)) {
207	// atomics have to be unordered
208	L->setOrdering(AtomicOrdering::Unordered);
209	S->setOrdering(AtomicOrdering::Unordered);
210	}
211
212	// Set the size of the copy to 0, it will be deleted on the next iteration.
213	MI->setLength(Constant::getNullValue(Ty: MemOpLength->getType()));
214	return MI;
215	}
216
217	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
218	const Align KnownAlignment =
219	getKnownAlignment(V: MI->getDest(), DL, CxtI: MI, AC: &AC, DT: &DT);
220	MaybeAlign MemSetAlign = MI->getDestAlign();
221	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
222	MI->setDestAlignment(KnownAlignment);
223	return MI;
224	}
225
226	// If we have a store to a location which is known constant, we can conclude
227	// that the store must be storing the constant value (else the memory
228	// wouldn't be constant), and this must be a noop.
229	if (!isModSet(MRI: AA->getModRefInfoMask(P: MI->getDest()))) {
230	// Set the size of the copy to 0, it will be deleted on the next iteration.
231	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
232	return MI;
233	}
234
235	// Remove memset with an undef value.
236	// FIXME: This is technically incorrect because it might overwrite a poison
237	// value. Change to PoisonValue once #52930 is resolved.
238	if (isa<UndefValue>(Val: MI->getValue())) {
239	// Set the size of the copy to 0, it will be deleted on the next iteration.
240	MI->setLength(Constant::getNullValue(Ty: MI->getLength()->getType()));
241	return MI;
242	}
243
244	// Extract the length and alignment and fill if they are constant.
245	ConstantInt *LenC = dyn_cast<ConstantInt>(Val: MI->getLength());
246	ConstantInt *FillC = dyn_cast<ConstantInt>(Val: MI->getValue());
247	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(Bitwidth: `8`))
248	return nullptr;
249	const uint64_t Len = LenC->getLimitedValue();
250	assert(Len && "0-sized memory setting should be removed already.");
251	const Align Alignment = MI->getDestAlign().valueOrOne();
252
253	// If it is an atomic and alignment is less than the size then we will
254	// introduce the unaligned memory access which will be later transformed
255	// into libcall in CodeGen. This is not evident performance gain so disable
256	// it now.
257	if (isa<AtomicMemSetInst>(Val: MI))
258	if (Alignment < Len)
259	return nullptr;
260
261	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
262	if (Len <= `8` && isPowerOf2_32(Value: (uint32_t)Len)) {
263	Type ITy = IntegerType::get(C&: MI->getContext(), NumBits: Len`8`); // n=1 -> i8.
264
265	Value *Dest = MI->getDest();
266
267	// Extract the fill value and store.
268	const uint64_t Fill = FillC->getZExtValue()*`0x0101010101010101ULL`;
269	Constant *FillVal = ConstantInt::get(Ty: ITy, V: Fill);
270	StoreInst *S = Builder.CreateStore(Val: FillVal, Ptr: Dest, isVolatile: MI->isVolatile());
271	S->copyMetadata(SrcInst: *MI, WL: LLVMContext::MD_DIAssignID);
272	auto replaceOpForAssignmentMarkers = [FillC, FillVal](auto *DbgAssign) {
273	if (llvm::is_contained(DbgAssign->location_ops(), FillC))
274	DbgAssign->replaceVariableLocationOp(FillC, FillVal);
275	};
276	for_each(Range: at::getAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
277	for_each(Range: at::getDVRAssignmentMarkers(Inst: S), F: replaceOpForAssignmentMarkers);
278
279	S->setAlignment(Alignment);
280	if (isa<AtomicMemSetInst>(Val: MI))
281	S->setOrdering(AtomicOrdering::Unordered);
282
283	// Set the size of the copy to 0, it will be deleted on the next iteration.
284	MI->setLength(Constant::getNullValue(Ty: LenC->getType()));
285	return MI;
286	}
287
288	return nullptr;
289	}
290
291	// TODO, Obvious Missing Transforms:
292	// Narrow width by halfs excluding zero/undef lanes*
293	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
294	Value *LoadPtr = II.getArgOperand(i: `0`);
295	const Align Alignment =
296	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
297
298	// If the mask is all ones or undefs, this is a plain vector load of the 1st
299	// argument.
300	if (maskIsAllOneOrUndef(Mask: II.getArgOperand(i: `2`))) {
301	LoadInst *L = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
302	Name: "unmaskedload");
303	L->copyMetadata(SrcInst: II);
304	return L;
305	}
306
307	// If we can unconditionally load from this address, replace with a
308	// load/select idiom. TODO: use DT for context sensitive query
309	if (isDereferenceablePointer(V: LoadPtr, Ty: II.getType(),
310	DL: II.getModule()->getDataLayout(), CtxI: &II, AC: &AC)) {
311	LoadInst *LI = Builder.CreateAlignedLoad(Ty: II.getType(), Ptr: LoadPtr, Align: Alignment,
312	Name: "unmaskedload");
313	LI->copyMetadata(SrcInst: II);
314	return Builder.CreateSelect(C: II.getArgOperand(i: `2`), True: LI, False: II.getArgOperand(i: `3`));
315	}
316
317	return nullptr;
318	}
319
320	// TODO, Obvious Missing Transforms:
321	// Single constant active lane -> store*
322	// Narrow width by halfs excluding zero/undef lanes*
323	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
324	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
325	if (!ConstMask)
326	return nullptr;
327
328	// If the mask is all zeros, this instruction does nothing.
329	if (ConstMask->isNullValue())
330	return eraseInstFromFunction(I&: II);
331
332	// If the mask is all ones, this is a plain vector store of the 1st argument.
333	if (ConstMask->isAllOnesValue()) {
334	Value *StorePtr = II.getArgOperand(i: `1`);
335	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
336	StoreInst *S =
337	new StoreInst (II.getArgOperand(i: `0`), StorePtr, false, Alignment);
338	S->copyMetadata(SrcInst: II);
339	return S;
340	}
341
342	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
343	return nullptr;
344
345	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
346	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
347	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
348	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
349	PoisonElts))
350	return replaceOperand(I&: II, OpNum: `0`, V);
351
352	return nullptr;
353	}
354
355	// TODO, Obvious Missing Transforms:
356	// Single constant active lane load -> load*
357	// Dereferenceable address & few lanes -> scalarize speculative load/selects*
358	// Adjacent vector addresses -> masked.load*
359	// Narrow width by halfs excluding zero/undef lanes*
360	// Vector incrementing address -> vector masked load*
361	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
362	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `2`));
363	if (!ConstMask)
364	return nullptr;
365
366	// Vector splat address w/known mask -> scalar load
367	// Fold the gather to load the source vector first lane
368	// because it is reloading the same value each time
369	if (ConstMask->isAllOnesValue())
370	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `0`))) {
371	auto *VecTy = cast<VectorType>(Val: II.getType());
372	const Align Alignment =
373	cast<ConstantInt>(Val: II.getArgOperand(i: `1`))->getAlignValue();
374	LoadInst *L = Builder.CreateAlignedLoad(Ty: VecTy->getElementType(), Ptr: SplatPtr,
375	Align: Alignment, Name: "load.scalar");
376	Value *Shuf =
377	Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: L, Name: "broadcast");
378	return replaceInstUsesWith(I&: II, V: cast<Instruction>(Val: Shuf));
379	}
380
381	return nullptr;
382	}
383
384	// TODO, Obvious Missing Transforms:
385	// Single constant active lane -> store*
386	// Adjacent vector addresses -> masked.store*
387	// Narrow store width by halfs excluding zero/undef lanes*
388	// Vector incrementing address -> vector masked store*
389	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
390	auto *ConstMask = dyn_cast<Constant>(Val: II.getArgOperand(i: `3`));
391	if (!ConstMask)
392	return nullptr;
393
394	// If the mask is all zeros, a scatter does nothing.
395	if (ConstMask->isNullValue())
396	return eraseInstFromFunction(I&: II);
397
398	// Vector splat address -> scalar store
399	if (auto *SplatPtr = getSplatValue(V: II.getArgOperand(i: `1`))) {
400	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
401	if (auto *SplatValue = getSplatValue(V: II.getArgOperand(i: `0`))) {
402	if (maskContainsAllOneOrUndef(Mask: ConstMask)) {
403	Align Alignment =
404	cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
405	StoreInst S = new* StoreInst (SplatValue, SplatPtr, /IsVolatile=/false,
406	Alignment);
407	S->copyMetadata(SrcInst: II);
408	return S;
409	}
410	}
411	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
412	// lastlane), ptr
413	if (ConstMask->isAllOnesValue()) {
414	Align Alignment = cast<ConstantInt>(Val: II.getArgOperand(i: `2`))->getAlignValue();
415	VectorType *WideLoadTy = cast<VectorType>(Val: II.getArgOperand(i: `1`)->getType());
416	ElementCount VF = WideLoadTy->getElementCount();
417	Value *RunTimeVF = Builder.CreateElementCount(DstType: Builder.getInt32Ty(), EC: VF);
418	Value *LastLane = Builder.CreateSub(LHS: RunTimeVF, RHS: Builder.getInt32(C: `1`));
419	Value *Extract =
420	Builder.CreateExtractElement(Vec: II.getArgOperand(i: `0`), Idx: LastLane);
421	StoreInst *S =
422	new StoreInst (Extract, SplatPtr, /IsVolatile=/false, Alignment);
423	S->copyMetadata(SrcInst: II);
424	return S;
425	}
426	}
427	if (isa<ScalableVectorType>(Val: ConstMask->getType()))
428	return nullptr;
429
430	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
431	APInt DemandedElts = possiblyDemandedEltsInMask(Mask: ConstMask);
432	APInt PoisonElts(DemandedElts.getBitWidth(), `0`);
433	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `0`), DemandedElts,
434	PoisonElts))
435	return replaceOperand(I&: II, OpNum: `0`, V);
436	if (Value *V = SimplifyDemandedVectorElts(V: II.getOperand(i_nocapture: `1`), DemandedElts,
437	PoisonElts))
438	return replaceOperand(I&: II, OpNum: `1`, V);
439
440	return nullptr;
441	}
442
443	/// This function transforms launder.invariant.group and strip.invariant.group
444	/// like:
445	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
446	/// launder(strip(%x)) -> launder(%x)
447	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
448	/// strip(launder(%x)) -> strip(%x)
449	/// This is legal because it preserves the most recent information about
450	/// the presence or absence of invariant.group.
451	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
452	InstCombinerImpl &IC) {
453	auto *Arg = II.getArgOperand(i: `0`);
454	auto *StrippedArg = Arg->stripPointerCasts();
455	auto *StrippedInvariantGroupsArg = StrippedArg;
456	while (auto *Intr = dyn_cast<IntrinsicInst>(Val: StrippedInvariantGroupsArg)) {
457	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
458	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
459	break;
460	StrippedInvariantGroupsArg = Intr->getArgOperand(i: `0`)->stripPointerCasts();
461	}
462	if (StrippedArg == StrippedInvariantGroupsArg)
463	return nullptr; // No launders/strips to remove.
464
465	Value Result = nullptr*;
466
467	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
468	Result = IC.Builder.CreateLaunderInvariantGroup(Ptr: StrippedInvariantGroupsArg);
469	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
470	Result = IC.Builder.CreateStripInvariantGroup(Ptr: StrippedInvariantGroupsArg);
471	else
472	llvm_unreachable(
473	"simplifyInvariantGroupIntrinsic only handles launder and strip");
474	if (Result->getType()->getPointerAddressSpace() !=
475	II.getType()->getPointerAddressSpace())
476	Result = IC.Builder.CreateAddrSpaceCast(V: Result, DestTy: II.getType());
477
478	return cast<Instruction>(Val: Result);
479	}
480
481	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
482	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
483	II.getIntrinsicID() == Intrinsic::ctlz) &&
484	"Expected cttz or ctlz intrinsic");
485	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
486	Value *Op0 = II.getArgOperand(i: `0`);
487	Value *Op1 = II.getArgOperand(i: `1`);
488	Value *X;
489	// ctlz(bitreverse(x)) -> cttz(x)
490	// cttz(bitreverse(x)) -> ctlz(x)
491	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X)))) {
492	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
493	Function *F = Intrinsic::getDeclaration(M: II.getModule(), id: ID, Tys: II.getType());
494	return CallInst::Create(Func: F, Args: {X, II.getArgOperand(i: `1`)});
495	}
496
497	if (II.getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
498	// ctlz/cttz i1 Op0 --> not Op0
499	if (match(V: Op1, P: m_Zero()))
500	return BinaryOperator::CreateNot(Op: Op0);
501	// If zero is poison, then the input can be assumed to be "true", so the
502	// instruction simplifies to "false".
503	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
504	return IC.replaceInstUsesWith(I&: II, V: ConstantInt::getNullValue(Ty: II.getType()));
505	}
506
507	// If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
508	if (II.hasOneUse() && match(V: Op1, P: m_Zero()) &&
509	match(V: II.user_back(), P: m_Shift(L: m_Value(), R: m_Specific(V: &II))))
510	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
511
512	Constant *C;
513
514	if (IsTZ) {
515	// cttz(-x) -> cttz(x)
516	if (match(V: Op0, P: m_Neg(V: m_Value(V&: X))))
517	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
518
519	// cttz(-x & x) -> cttz(x)
520	if (match(V: Op0, P: m_c_And(L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
521	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
522
523	// cttz(sext(x)) -> cttz(zext(x))
524	if (match(V: Op0, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
525	auto *Zext = IC.Builder.CreateZExt(V: X, DestTy: II.getType());
526	auto *CttzZext =
527	IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
528	return IC.replaceInstUsesWith(I&: II, V: CttzZext);
529	}
530
531	// Zext doesn't change the number of trailing zeros, so narrow:
532	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
533	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X)))) && match(V: Op1, P: m_One())) {
534	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
535	IC.Builder.getTrue());
536	auto *ZextCttz = IC.Builder.CreateZExt(V: Cttz, DestTy: II.getType());
537	return IC.replaceInstUsesWith(I&: II, V: ZextCttz);
538	}
539
540	// cttz(abs(x)) -> cttz(x)
541	// cttz(nabs(x)) -> cttz(x)
542	Value *Y;
543	SelectPatternFlavor SPF = matchSelectPattern(V: Op0, LHS&: X, RHS&: Y).Flavor;
544	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
545	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
546
547	if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(V&: X))))
548	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
549
550	// cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
551	if (match(V: Op0, P: m_Shl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
552	match(V: Op1, P: m_One())) {
553	Value *ConstCttz =
554	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: cttz, LHS: C, RHS: Op1);
555	return BinaryOperator::CreateAdd(V1: ConstCttz, V2: X);
556	}
557
558	// cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
559	if (match(V: Op0, P: m_Exact(SubPattern: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X)))) &&
560	match(V: Op1, P: m_One())) {
561	Value *ConstCttz =
562	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: cttz, LHS: C, RHS: Op1);
563	return BinaryOperator::CreateSub(V1: ConstCttz, V2: X);
564	}
565	} else {
566	// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
567	if (match(V: Op0, P: m_LShr(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
568	match(V: Op1, P: m_One())) {
569	Value *ConstCtlz =
570	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: ctlz, LHS: C, RHS: Op1);
571	return BinaryOperator::CreateAdd(V1: ConstCtlz, V2: X);
572	}
573
574	// ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
575	if (match(V: Op0, P: m_NUWShl(L: m_ImmConstant(C), R: m_Value(V&: X))) &&
576	match(V: Op1, P: m_One())) {
577	Value *ConstCtlz =
578	IC.Builder.CreateBinaryIntrinsic(Intrinsic::ID: ctlz, LHS: C, RHS: Op1);
579	return BinaryOperator::CreateSub(V1: ConstCtlz, V2: X);
580	}
581	}
582
583	KnownBits Known = IC.computeKnownBits(V: Op0, Depth: `0`, CxtI: &II);
584
585	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
586	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
587	: Known.countMaxLeadingZeros();
588	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
589	: Known.countMinLeadingZeros();
590
591	// If all bits above (ctlz) or below (cttz) the first known one are known
592	// zero, this value is constant.
593	// FIXME: This should be in InstSimplify because we're replacing an
594	// instruction with a constant.
595	if (PossibleZeros == DefiniteZeros) {
596	auto *C = ConstantInt::get(Ty: Op0->getType(), V: DefiniteZeros);
597	return IC.replaceInstUsesWith(I&: II, V: C);
598	}
599
600	// If the input to cttz/ctlz is known to be non-zero,
601	// then change the 'ZeroIsPoison' parameter to 'true'
602	// because we know the zero behavior can't affect the result.
603	if (!Known.One.isZero() \|\|
604	isKnownNonZero(V: Op0, Q: IC.getSimplifyQuery().getWithInstruction(I: &II))) {
605	if (!match(V: II.getArgOperand(i: `1`), P: m_One()))
606	return IC.replaceOperand(I&: II, OpNum: `1`, V: IC.Builder.getTrue());
607	}
608
609	// Add range attribute since known bits can't completely reflect what we know.
610	unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
611	if (BitWidth != `1` && !II.hasRetAttr(Attribute::Range) &&
612	!II.getMetadata(KindID: LLVMContext::MD_range)) {
613	ConstantRange Range(APInt (BitWidth, DefiniteZeros),
614	APInt (BitWidth, PossibleZeros + `1`));
615	II.addRangeRetAttr(CR: Range);
616	return &II;
617	}
618
619	return nullptr;
620	}
621
622	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
623	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
624	"Expected ctpop intrinsic");
625	Type *Ty = II.getType();
626	unsigned BitWidth = Ty->getScalarSizeInBits();
627	Value *Op0 = II.getArgOperand(i: `0`);
628	Value X, Y;
629
630	// ctpop(bitreverse(x)) -> ctpop(x)
631	// ctpop(bswap(x)) -> ctpop(x)
632	if (match(V: Op0, P: m_BitReverse(Op0: m_Value(V&: X))) \|\| match(V: Op0, P: m_BSwap(Op0: m_Value(V&: X))))
633	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
634
635	// ctpop(rot(x)) -> ctpop(x)
636	if ((match(V: Op0, P: m_FShl(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value())) \|\|
637	match(V: Op0, P: m_FShr(Op0: m_Value(V&: X), Op1: m_Value(V&: Y), Op2: m_Value()))) &&
638	X == Y)
639	return IC.replaceOperand(I&: II, OpNum: `0`, V: X);
640
641	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
642	if (Op0->hasOneUse() &&
643	match(V: Op0, P: m_c_Or(L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X))))) {
644	Function *F =
645	Intrinsic::getDeclaration(M: II.getModule(), Intrinsic::id: cttz, Tys: Ty);
646	auto *Cttz = IC.Builder.CreateCall(Callee: F, Args: {X, IC.Builder.getFalse()});
647	auto *Bw = ConstantInt::get(Ty, V: APInt (BitWidth, BitWidth));
648	return IC.replaceInstUsesWith(I&: II, V: IC.Builder.CreateSub(LHS: Bw, RHS: Cttz));
649	}
650
651	// ctpop(~x & (x - 1)) -> cttz(x, false)
652	if (match(V: Op0,
653	P: m_c_And(L: m_Not(V: m_Value(V&: X)), R: m_Add(L: m_Deferred(V: X), R: m_AllOnes())))) {
654	Function *F =
655	Intrinsic::getDeclaration(M: II.getModule(), Intrinsic::id: cttz, Tys: Ty);
656	return CallInst::Create(Func: F, Args: {X, IC.Builder.getFalse()});
657	}
658
659	// Zext doesn't change the number of set bits, so narrow:
660	// ctpop (zext X) --> zext (ctpop X)
661	if (match(V: Op0, P: m_OneUse(SubPattern: m_ZExt(Op: m_Value(V&: X))))) {
662	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ID: ctpop, V: X);
663	return CastInst::Create(Instruction::ZExt, S: NarrowPop, Ty);
664	}
665
666	KnownBits Known(BitWidth);
667	IC.computeKnownBits(V: Op0, Known, Depth: `0`, CxtI: &II);
668
669	// If all bits are zero except for exactly one fixed bit, then the result
670	// must be 0 or 1, and we can get that answer by shifting to LSB:
671	// ctpop (X & 32) --> (X & 32) >> 5
672	// TODO: Investigate removing this as its likely unnecessary given the below
673	// `isKnownToBeAPowerOfTwo` check.
674	if ((~Known.Zero).isPowerOf2())
675	return BinaryOperator::CreateLShr(
676	V1: Op0, V2: ConstantInt::get(Ty, V: (~Known.Zero).exactLogBase2()));
677
678	// More generally we can also handle non-constant power of 2 patterns such as
679	// shl/shr(Pow2, X), (X & -X), etc... by transforming:
680	// ctpop(Pow2OrZero) --> icmp ne X, 0
681	if (IC.isKnownToBeAPowerOfTwo(V: Op0, / OrZero / true))
682	return CastInst::Create(Instruction::ZExt,
683	S: IC.Builder.CreateICmp(P: ICmpInst::ICMP_NE, LHS: Op0,
684	RHS: Constant::getNullValue(Ty)),
685	Ty);
686
687	// Add range attribute since known bits can't completely reflect what we know.
688	if (BitWidth != `1` && !II.hasRetAttr(Attribute::Range) &&
689	!II.getMetadata(KindID: LLVMContext::MD_range)) {
690	ConstantRange Range(APInt (BitWidth, Known.countMinPopulation()),
691	APInt (BitWidth, Known.countMaxPopulation() + `1`));
692	II.addRangeRetAttr(CR: Range);
693	return &II;
694	}
695
696	return nullptr;
697	}
698
699	/// Convert a table lookup to shufflevector if the mask is constant.
700	/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
701	/// which case we could lower the shufflevector with rev64 instructions
702	/// as it's actually a byte reverse.
703	static Value simplifyNeonTbl1(const* IntrinsicInst &II,
704	InstCombiner::BuilderTy &Builder) {
705	// Bail out if the mask is not a constant.
706	auto *C = dyn_cast<Constant>(Val: II.getArgOperand(i: `1`));
707	if (!C)
708	return nullptr;
709
710	auto *VecTy = cast<FixedVectorType>(Val: II.getType());
711	unsigned NumElts = VecTy->getNumElements();
712
713	// Only perform this transformation for <8 x i8> vector types.
714	if (!VecTy->getElementType()->isIntegerTy(Bitwidth: `8`) \|\| NumElts != `8`)
715	return nullptr;
716
717	int Indexes[`8`];
718
719	for (unsigned I = `0`; I < NumElts; ++I) {
720	Constant *COp = C->getAggregateElement(Elt: I);
721
722	if (!COp \|\| !isa<ConstantInt>(Val: COp))
723	return nullptr;
724
725	Indexes[I] = cast<ConstantInt>(Val: COp)->getLimitedValue();
726
727	// Make sure the mask indices are in range.
728	if ((unsigned)Indexes[I] >= NumElts)
729	return nullptr;
730	}
731
732	auto *V1 = II.getArgOperand(i: `0`);
733	auto *V2 = Constant::getNullValue(Ty: V1->getType());
734	return Builder.CreateShuffleVector(V1, V2, Mask: ArrayRef(Indexes));
735	}
736
737	// Returns true iff the 2 intrinsics have the same operands, limiting the
738	// comparison to the first NumOperands.
739	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
740	unsigned NumOperands) {
741	assert(I.arg_size() >= NumOperands && "Not enough operands");
742	assert(E.arg_size() >= NumOperands && "Not enough operands");
743	for (unsigned i = `0`; i < NumOperands; i++)
744	if (I.getArgOperand(i) != E.getArgOperand(i))
745	return false;
746	return true;
747	}
748
749	// Remove trivially empty start/end intrinsic ranges, i.e. a start
750	// immediately followed by an end (ignoring debuginfo or other
751	// start/end intrinsics in between). As this handles only the most trivial
752	// cases, tracking the nesting level is not needed:
753	//
754	// call @llvm.foo.start(i1 0)
755	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
756	// call @llvm.foo.end(i1 0)
757	// call @llvm.foo.end(i1 0) ; &I
758	static bool
759	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
760	std::function<bool(const IntrinsicInst &)> IsStart) {
761	// We start from the end intrinsic and scan backwards, so that InstCombine
762	// has already processed (and potentially removed) all the instructions
763	// before the end intrinsic.
764	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
765	for (; BI != BE; ++BI) {
766	if (auto I = dyn_cast<IntrinsicInst>(Val: &BI)) {
767	if (I->isDebugOrPseudoInst() \|\|
768	I->getIntrinsicID() == EndI.getIntrinsicID())
769	continue;
770	if (IsStart (*I)) {
771	if (haveSameOperands(I: EndI, E: *I, NumOperands: EndI.arg_size())) {
772	IC.eraseInstFromFunction(I&: *I);
773	IC.eraseInstFromFunction(I&: EndI);
774	return true;
775	}
776	// Skip start intrinsics that don't pair with this end intrinsic.
777	continue;
778	}
779	}
780	break;
781	}
782
783	return false;
784	}
785
786	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
787	removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
788	return I.getIntrinsicID() == Intrinsic::vastart \|\|
789	I.getIntrinsicID() == Intrinsic::vacopy;
790	});
791	return nullptr;
792	}
793
794	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
795	assert(Call.arg_size() > `1` && "Need at least 2 args to swap");
796	Value Arg0 = Call.getArgOperand(i: `0`), Arg1 = Call.getArgOperand(i: `1`);
797	if (isa<Constant>(Val: Arg0) && !isa<Constant>(Val: Arg1)) {
798	Call.setArgOperand(i: `0`, v: Arg1);
799	Call.setArgOperand(i: `1`, v: Arg0);
800	return &Call;
801	}
802	return nullptr;
803	}
804
805	/// Creates a result tuple for an overflow intrinsic \p II with a given
806	/// \p Result and a constant \p Overflow value.
807	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
808	Constant *Overflow) {
809	Constant *V[] = {PoisonValue::get(T: Result->getType()), Overflow};
810	StructType *ST = cast<StructType>(Val: II->getType());
811	Constant *Struct = ConstantStruct::get(T: ST, V);
812	return InsertValueInst::Create(Agg: Struct, Val: Result, Idxs: `0`);
813	}
814
815	Instruction *
816	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
817	WithOverflowInst *WO = cast<WithOverflowInst>(Val: II);
818	Value OperationResult = nullptr*;
819	Constant OverflowResult = nullptr*;
820	if (OptimizeOverflowCheck(BinaryOp: WO->getBinaryOp(), IsSigned: WO->isSigned(), LHS: WO->getLHS(),
821	RHS: WO->getRHS(), CtxI&: *WO, OperationResult, OverflowResult))
822	return createOverflowTuple(II: WO, Result: OperationResult, Overflow: OverflowResult);
823	return nullptr;
824	}
825
826	static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
827	Ty = Ty->getScalarType();
828	return F.getDenormalMode(FPType: Ty->getFltSemantics()).Input == DenormalMode::IEEE;
829	}
830
831	static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
832	Ty = Ty->getScalarType();
833	return F.getDenormalMode(FPType: Ty->getFltSemantics()).inputsAreZero();
834	}
835
836	/// \returns the compare predicate type if the test performed by
837	/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
838	/// floating-point environment assumed for \p F for type \p Ty
839	static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
840	const Function &F, Type *Ty) {
841	switch (static_cast<unsigned>(Mask)) {
842	case fcZero:
843	if (inputDenormalIsIEEE(F, Ty))
844	return FCmpInst::FCMP_OEQ;
845	break;
846	case fcZero \| fcSubnormal:
847	if (inputDenormalIsDAZ(F, Ty))
848	return FCmpInst::FCMP_OEQ;
849	break;
850	case fcPositive \| fcNegZero:
851	if (inputDenormalIsIEEE(F, Ty))
852	return FCmpInst::FCMP_OGE;
853	break;
854	case fcPositive \| fcNegZero \| fcNegSubnormal:
855	if (inputDenormalIsDAZ(F, Ty))
856	return FCmpInst::FCMP_OGE;
857	break;
858	case fcPosSubnormal \| fcPosNormal \| fcPosInf:
859	if (inputDenormalIsIEEE(F, Ty))
860	return FCmpInst::FCMP_OGT;
861	break;
862	case fcNegative \| fcPosZero:
863	if (inputDenormalIsIEEE(F, Ty))
864	return FCmpInst::FCMP_OLE;
865	break;
866	case fcNegative \| fcPosZero \| fcPosSubnormal:
867	if (inputDenormalIsDAZ(F, Ty))
868	return FCmpInst::FCMP_OLE;
869	break;
870	case fcNegSubnormal \| fcNegNormal \| fcNegInf:
871	if (inputDenormalIsIEEE(F, Ty))
872	return FCmpInst::FCMP_OLT;
873	break;
874	case fcPosNormal \| fcPosInf:
875	if (inputDenormalIsDAZ(F, Ty))
876	return FCmpInst::FCMP_OGT;
877	break;
878	case fcNegNormal \| fcNegInf:
879	if (inputDenormalIsDAZ(F, Ty))
880	return FCmpInst::FCMP_OLT;
881	break;
882	case ~fcZero & ~fcNan:
883	if (inputDenormalIsIEEE(F, Ty))
884	return FCmpInst::FCMP_ONE;
885	break;
886	case ~(fcZero \| fcSubnormal) & ~fcNan:
887	if (inputDenormalIsDAZ(F, Ty))
888	return FCmpInst::FCMP_ONE;
889	break;
890	default:
891	break;
892	}
893
894	return FCmpInst::BAD_FCMP_PREDICATE;
895	}
896
897	Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
898	Value *Src0 = II.getArgOperand(i: `0`);
899	Value *Src1 = II.getArgOperand(i: `1`);
900	const ConstantInt *CMask = cast<ConstantInt>(Val: Src1);
901	FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
902	const bool IsUnordered = (Mask & fcNan) == fcNan;
903	const bool IsOrdered = (Mask & fcNan) == fcNone;
904	const FPClassTest OrderedMask = Mask & ~fcNan;
905	const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
906
907	const bool IsStrict =
908	II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
909
910	Value *FNegSrc;
911	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: FNegSrc)))) {
912	// is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
913
914	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: fneg(Mask)));
915	return replaceOperand(I&: II, OpNum: `0`, V: FNegSrc);
916	}
917
918	Value *FAbsSrc;
919	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: FAbsSrc)))) {
920	II.setArgOperand(i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: inverse_fabs(Mask)));
921	return replaceOperand(I&: II, OpNum: `0`, V: FAbsSrc);
922	}
923
924	if ((OrderedMask == fcInf \|\| OrderedInvertedMask == fcInf) &&
925	(IsOrdered \|\| IsUnordered) && !IsStrict) {
926	// is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
927	// is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
928	// is.fpclass(x, fcInf\|fcNan) -> fcmp ueq fabs(x), +inf
929	// is.fpclass(x, ~(fcInf\|fcNan)) -> fcmp une fabs(x), +inf
930	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType());
931	FCmpInst::Predicate Pred =
932	IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
933	if (OrderedInvertedMask == fcInf)
934	Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
935
936	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::ID: fabs, V: Src0);
937	Value *CmpInf = Builder.CreateFCmp(P: Pred, LHS: Fabs, RHS: Inf);
938	CmpInf->takeName(V: &II);
939	return replaceInstUsesWith(I&: II, V: CmpInf);
940	}
941
942	if ((OrderedMask == fcPosInf \|\| OrderedMask == fcNegInf) &&
943	(IsOrdered \|\| IsUnordered) && !IsStrict) {
944	// is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
945	// is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
946	// is.fpclass(x, fcPosInf\|fcNan) -> fcmp ueq x, +inf
947	// is.fpclass(x, fcNegInf\|fcNan) -> fcmp ueq x, -inf
948	Constant *Inf =
949	ConstantFP::getInfinity(Ty: Src0->getType(), Negative: OrderedMask == fcNegInf);
950	Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(LHS: Src0, RHS: Inf)
951	: Builder.CreateFCmpOEQ(LHS: Src0, RHS: Inf);
952
953	EqInf->takeName(V: &II);
954	return replaceInstUsesWith(I&: II, V: EqInf);
955	}
956
957	if ((OrderedInvertedMask == fcPosInf \|\| OrderedInvertedMask == fcNegInf) &&
958	(IsOrdered \|\| IsUnordered) && !IsStrict) {
959	// is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
960	// is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
961	// is.fpclass(x, ~fcPosInf\|fcNan) -> fcmp une x, +inf
962	// is.fpclass(x, ~fcNegInf\|fcNan) -> fcmp une x, -inf
963	Constant *Inf = ConstantFP::getInfinity(Ty: Src0->getType(),
964	Negative: OrderedInvertedMask == fcNegInf);
965	Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(LHS: Src0, RHS: Inf)
966	: Builder.CreateFCmpONE(LHS: Src0, RHS: Inf);
967	NeInf->takeName(V: &II);
968	return replaceInstUsesWith(I&: II, V: NeInf);
969	}
970
971	if (Mask == fcNan && !IsStrict) {
972	// Equivalent of isnan. Replace with standard fcmp if we don't care about FP
973	// exceptions.
974	Value *IsNan =
975	Builder.CreateFCmpUNO(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
976	IsNan->takeName(V: &II);
977	return replaceInstUsesWith(I&: II, V: IsNan);
978	}
979
980	if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
981	// Equivalent of !isnan. Replace with standard fcmp.
982	Value *FCmp =
983	Builder.CreateFCmpORD(LHS: Src0, RHS: ConstantFP::getZero(Ty: Src0->getType()));
984	FCmp->takeName(V: &II);
985	return replaceInstUsesWith(I&: II, V: FCmp);
986	}
987
988	FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
989
990	// Try to replace with an fcmp with 0
991	//
992	// is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
993	// is.fpclass(x, fcZero \| fcNan) -> fcmp ueq x, 0.0
994	// is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
995	// is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
996	//
997	// is.fpclass(x, fcPosSubnormal \| fcPosNormal \| fcPosInf) -> fcmp ogt x, 0.0
998	// is.fpclass(x, fcPositive \| fcNegZero) -> fcmp oge x, 0.0
999	//
1000	// is.fpclass(x, fcNegSubnormal \| fcNegNormal \| fcNegInf) -> fcmp olt x, 0.0
1001	// is.fpclass(x, fcNegative \| fcPosZero) -> fcmp ole x, 0.0
1002	//
1003	if (!IsStrict && (IsOrdered \|\| IsUnordered) &&
1004	(PredType = fpclassTestIsFCmp0(Mask: OrderedMask, F: *II.getFunction(),
1005	Ty: Src0->getType())) !=
1006	FCmpInst::BAD_FCMP_PREDICATE) {
1007	Constant *Zero = ConstantFP::getZero(Ty: Src0->getType());
1008	// Equivalent of == 0.
1009	Value *FCmp = Builder.CreateFCmp(
1010	P: IsUnordered ? FCmpInst::getUnorderedPredicate(Pred: PredType) : PredType,
1011	LHS: Src0, RHS: Zero);
1012
1013	FCmp->takeName(V: &II);
1014	return replaceInstUsesWith(I&: II, V: FCmp);
1015	}
1016
1017	KnownFPClass Known = computeKnownFPClass(Val: Src0, Interested: Mask, CtxI: &II);
1018
1019	// Clear test bits we know must be false from the source value.
1020	// fp_class (nnan x), qnan\|snan\|other -> fp_class (nnan x), other
1021	// fp_class (ninf x), ninf\|pinf\|other -> fp_class (ninf x), other
1022	if ((Mask & Known.KnownFPClasses) != Mask) {
1023	II.setArgOperand(
1024	i: `1`, v: ConstantInt::get(Ty: Src1->getType(), V: Mask & Known.KnownFPClasses));
1025	return &II;
1026	}
1027
1028	// If none of the tests which can return false are possible, fold to true.
1029	// fp_class (nnan x), ~(qnan\|snan) -> true
1030	// fp_class (ninf x), ~(ninf\|pinf) -> true
1031	if (Mask == Known.KnownFPClasses)
1032	return replaceInstUsesWith(I&: II, V: ConstantInt::get(Ty: II.getType(), V: true));
1033
1034	return nullptr;
1035	}
1036
1037	static std::optional<bool> getKnownSign(Value Op, Instruction CxtI,
1038	const DataLayout &DL, AssumptionCache *AC,
1039	DominatorTree *DT) {
1040	KnownBits Known = computeKnownBits(V: Op, DL, Depth: `0`, AC, CxtI, DT);
1041	if (Known.isNonNegative())
1042	return false;
1043	if (Known.isNegative())
1044	return true;
1045
1046	Value X, Y;
1047	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1048	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLT, LHS: X, RHS: Y, ContextI: CxtI, DL);
1049
1050	return isImpliedByDomCondition(
1051	Pred: ICmpInst::ICMP_SLT, LHS: Op, RHS: Constant::getNullValue(Ty: Op->getType()), ContextI: CxtI, DL);
1052	}
1053
1054	static std::optional<bool> getKnownSignOrZero(Value Op, Instruction CxtI,
1055	const DataLayout &DL,
1056	AssumptionCache *AC,
1057	DominatorTree *DT) {
1058	if (std::optional<bool> Sign = getKnownSign(Op, CxtI, DL, AC, DT))
1059	return Sign;
1060
1061	Value X, Y;
1062	if (match(V: Op, P: m_NSWSub(L: m_Value(V&: X), R: m_Value(V&: Y))))
1063	return isImpliedByDomCondition(Pred: ICmpInst::ICMP_SLE, LHS: X, RHS: Y, ContextI: CxtI, DL);
1064
1065	return std::nullopt;
1066	}
1067
1068	/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1069	static bool signBitMustBeTheSame(Value Op0, Value Op1, Instruction *CxtI,
1070	const DataLayout &DL, AssumptionCache *AC,
1071	DominatorTree *DT) {
1072	std::optional<bool> Known1 = getKnownSign(Op: Op1, CxtI, DL, AC, DT);
1073	if (!Known1)
1074	return false;
1075	std::optional<bool> Known0 = getKnownSign(Op: Op0, CxtI, DL, AC, DT);
1076	if (!Known0)
1077	return false;
1078	return Known0 == Known1;
1079	}
1080
1081	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1082	/// can trigger other combines.
1083	static Instruction moveAddAfterMinMax(IntrinsicInst II,
1084	InstCombiner::BuilderTy &Builder) {
1085	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1086	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
1087	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
1088	"Expected a min or max intrinsic");
1089
1090	// TODO: Match vectors with undef elements, but undef may not propagate.
1091	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1092	Value *X;
1093	const APInt C0, C1;
1094	if (!match(V: Op0, P: m_OneUse(SubPattern: m_Add(L: m_Value(V&: X), R: m_APInt(Res&: C0)))) \|\|
1095	!match(V: Op1, P: m_APInt(Res&: C1)))
1096	return nullptr;
1097
1098	// Check for necessary no-wrap and overflow constraints.
1099	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
1100	auto *Add = cast<BinaryOperator>(Val: Op0);
1101	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
1102	(!IsSigned && !Add->hasNoUnsignedWrap()))
1103	return nullptr;
1104
1105	// If the constant difference overflows, then instsimplify should reduce the
1106	// min/max to the add or C1.
1107	bool Overflow;
1108	APInt CDiff =
1109	IsSigned ? C1->ssub_ov(RHS: C0, Overflow) : C1->usub_ov(RHS: C0, Overflow);
1110	assert(!Overflow && "Expected simplify of min/max");
1111
1112	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1113	// Note: the "mismatched" no-overflow setting does not propagate.
1114	Constant *NewMinMaxC = ConstantInt::get(Ty: II->getType(), V: CDiff);
1115	Value *NewMinMax = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: NewMinMaxC);
1116	return IsSigned ? BinaryOperator::CreateNSWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`))
1117	: BinaryOperator::CreateNUWAdd(V1: NewMinMax, V2: Add->getOperand(i_nocapture: `1`));
1118	}
1119	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1120	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1121	Type *Ty = MinMax1.getType();
1122
1123	// We are looking for a tree of:
1124	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1125	// Where the min and max could be reversed
1126	Instruction *MinMax2;
1127	BinaryOperator *AddSub;
1128	const APInt MinValue, MaxValue;
1129	if (match(V: &MinMax1, P: m_SMin(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MaxValue)))) {
1130	if (!match(V: MinMax2, P: m_SMax(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MinValue))))
1131	return nullptr;
1132	} else if (match(V: &MinMax1,
1133	P: m_SMax(L: m_Instruction(I&: MinMax2), R: m_APInt(Res&: MinValue)))) {
1134	if (!match(V: MinMax2, P: m_SMin(L: m_BinOp(I&: AddSub), R: m_APInt(Res&: MaxValue))))
1135	return nullptr;
1136	} else
1137	return nullptr;
1138
1139	// Check that the constants clamp a saturate, and that the new type would be
1140	// sensible to convert to.
1141	if (!(MaxValue + `1`).isPowerOf2() \|\| -MinValue != *MaxValue + `1`)
1142	return nullptr;
1143	// In what bitwidth can this be treated as saturating arithmetics?
1144	unsigned NewBitWidth = (*MaxValue + `1`).logBase2() + `1`;
1145	// FIXME: This isn't quite right for vectors, but using the scalar type is a
1146	// good first approximation for what should be done there.
1147	if (!shouldChangeType(FromBitWidth: Ty->getScalarType()->getIntegerBitWidth(), ToBitWidth: NewBitWidth))
1148	return nullptr;
1149
1150	// Also make sure that the inner min/max and the add/sub have one use.
1151	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
1152	return nullptr;
1153
1154	// Create the new type (which can be a vector type)
1155	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1156
1157	Intrinsic::ID IntrinsicID;
1158	if (AddSub->getOpcode() == Instruction::Add)
1159	IntrinsicID = Intrinsic::sadd_sat;
1160	else if (AddSub->getOpcode() == Instruction::Sub)
1161	IntrinsicID = Intrinsic::ssub_sat;
1162	else
1163	return nullptr;
1164
1165	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1166	// is usually achieved via a sext from a smaller type.
1167	if (ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `0`), Depth: `0`, CxtI: AddSub) >
1168	NewBitWidth \|\|
1169	ComputeMaxSignificantBits(Op: AddSub->getOperand(i_nocapture: `1`), Depth: `0`, CxtI: AddSub) > NewBitWidth)
1170	return nullptr;
1171
1172	// Finally create and return the sat intrinsic, truncated to the new type
1173	Function *F = Intrinsic::getDeclaration(M: MinMax1.getModule(), id: IntrinsicID, Tys: NewTy);
1174	Value *AT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `0`), DestTy: NewTy);
1175	Value *BT = Builder.CreateTrunc(V: AddSub->getOperand(i_nocapture: `1`), DestTy: NewTy);
1176	Value *Sat = Builder.CreateCall(Callee: F, Args: {AT, BT});
1177	return CastInst::Create(Instruction::SExt, S: Sat, Ty);
1178	}
1179
1180
1181	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1182	/// can only be one of two possible constant values -- turn that into a select
1183	/// of constants.
1184	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
1185	InstCombiner::BuilderTy &Builder) {
1186	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1187	Value *X;
1188	const APInt C0, C1;
1189	if (!match(V: I1, P: m_APInt(Res&: C1)) \|\| !I0->hasOneUse())
1190	return nullptr;
1191
1192	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
1193	switch (II->getIntrinsicID()) {
1194	case Intrinsic::smax:
1195	if (match(V: I0, P: m_SMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1196	Pred = ICmpInst::ICMP_SGT;
1197	break;
1198	case Intrinsic::smin:
1199	if (match(V: I0, P: m_SMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1200	Pred = ICmpInst::ICMP_SLT;
1201	break;
1202	case Intrinsic::umax:
1203	if (match(V: I0, P: m_UMin(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C0 == C1 + `1`)
1204	Pred = ICmpInst::ICMP_UGT;
1205	break;
1206	case Intrinsic::umin:
1207	if (match(V: I0, P: m_UMax(L: m_Value(V&: X), R: m_APInt(Res&: C0))) && C1 == C0 + `1`)
1208	Pred = ICmpInst::ICMP_ULT;
1209	break;
1210	default:
1211	llvm_unreachable("Expected min/max intrinsic");
1212	}
1213	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1214	return nullptr;
1215
1216	// max (min X, 42), 41 --> X > 41 ? 42 : 41
1217	// min (max X, 42), 43 --> X < 43 ? 42 : 43
1218	Value *Cmp = Builder.CreateICmp(P: Pred, LHS: X, RHS: I1);
1219	return SelectInst::Create(C: Cmp, S1: ConstantInt::get(Ty: II->getType(), V: *C0), S2: I1);
1220	}
1221
1222	/// If this min/max has a constant operand and an operand that is a matching
1223	/// min/max with a constant operand, constant-fold the 2 constant operands.
1224	static Value reassociateMinMaxWithConstants(IntrinsicInst II,
1225	IRBuilderBase &Builder,
1226	const SimplifyQuery &SQ) {
1227	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1228	auto *LHS = dyn_cast<MinMaxIntrinsic>(Val: II->getArgOperand(i: `0`));
1229	if (!LHS)
1230	return nullptr;
1231
1232	Constant C0, C1;
1233	if (!match(V: LHS->getArgOperand(i: `1`), P: m_ImmConstant(C&: C0)) \|\|
1234	!match(V: II->getArgOperand(i: `1`), P: m_ImmConstant(C&: C1)))
1235	return nullptr;
1236
1237	// max (max X, C0), C1 --> max X, (max C0, C1)
1238	// min (min X, C0), C1 --> min X, (min C0, C1)
1239	// umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1240	// smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1241	Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1242	if (InnerMinMaxID != MinMaxID &&
1243	!(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) \|\|
1244	(MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1245	isKnownNonNegative(V: C0, SQ) && isKnownNonNegative(V: C1, SQ)))
1246	return nullptr;
1247
1248	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(ID: MinMaxID);
1249	Value *CondC = Builder.CreateICmp(P: Pred, LHS: C0, RHS: C1);
1250	Value *NewC = Builder.CreateSelect(C: CondC, True: C0, False: C1);
1251	return Builder.CreateIntrinsic(ID: InnerMinMaxID, Types: II->getType(),
1252	Args: {LHS->getArgOperand(i: `0`), NewC});
1253	}
1254
1255	/// If this min/max has a matching min/max operand with a constant, try to push
1256	/// the constant operand into this instruction. This can enable more folds.
1257	static Instruction *
1258	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
1259	InstCombiner::BuilderTy &Builder) {
1260	// Match and capture a min/max operand candidate.
1261	Value X, Y;
1262	Constant *C;
1263	Instruction *Inner;
1264	if (!match(V: II, P: m_c_MaxOrMin(L: m_OneUse(SubPattern: m_CombineAnd(
1265	L: m_Instruction(I&: Inner),
1266	R: m_MaxOrMin(L: m_Value(V&: X), R: m_ImmConstant(C)))),
1267	R: m_Value(V&: Y))))
1268	return nullptr;
1269
1270	// The inner op must match. Check for constants to avoid infinite loops.
1271	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1272	auto *InnerMM = dyn_cast<IntrinsicInst>(Val: Inner);
1273	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
1274	match(V: X, P: m_ImmConstant()) \|\| match(V: Y, P: m_ImmConstant()))
1275	return nullptr;
1276
1277	// max (max X, C), Y --> max (max X, Y), C
1278	Function *MinMax =
1279	Intrinsic::getDeclaration(M: II->getModule(), id: MinMaxID, Tys: II->getType());
1280	Value *NewInner = Builder.CreateBinaryIntrinsic(ID: MinMaxID, LHS: X, RHS: Y);
1281	NewInner->takeName(V: Inner);
1282	return CallInst::Create(Func: MinMax, Args: {NewInner, C});
1283	}
1284
1285	/// Reduce a sequence of min/max intrinsics with a common operand.
1286	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
1287	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1288	auto *LHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`));
1289	auto *RHS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `1`));
1290	Intrinsic::ID MinMaxID = II->getIntrinsicID();
1291	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
1292	RHS->getIntrinsicID() != MinMaxID \|\|
1293	(!LHS->hasOneUse() && !RHS->hasOneUse()))
1294	return nullptr;
1295
1296	Value *A = LHS->getArgOperand(i: `0`);
1297	Value *B = LHS->getArgOperand(i: `1`);
1298	Value *C = RHS->getArgOperand(i: `0`);
1299	Value *D = RHS->getArgOperand(i: `1`);
1300
1301	// Look for a common operand.
1302	Value MinMaxOp = nullptr*;
1303	Value ThirdOp = nullptr*;
1304	if (LHS->hasOneUse()) {
1305	// If the LHS is only used in this chain and the RHS is used outside of it,
1306	// reuse the RHS min/max because that will eliminate the LHS.
1307	if (D == A \|\| C == A) {
1308	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
1309	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
1310	MinMaxOp = RHS;
1311	ThirdOp = B;
1312	} else if (D == B \|\| C == B) {
1313	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
1314	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
1315	MinMaxOp = RHS;
1316	ThirdOp = A;
1317	}
1318	} else {
1319	assert(RHS->hasOneUse() && "Expected one-use operand");
1320	// Reuse the LHS. This will eliminate the RHS.
1321	if (D == A \|\| D == B) {
1322	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
1323	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
1324	MinMaxOp = LHS;
1325	ThirdOp = C;
1326	} else if (C == A \|\| C == B) {
1327	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
1328	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
1329	MinMaxOp = LHS;
1330	ThirdOp = D;
1331	}
1332	}
1333
1334	if (!MinMaxOp \|\| !ThirdOp)
1335	return nullptr;
1336
1337	Module *Mod = II->getModule();
1338	Function *MinMax = Intrinsic::getDeclaration(M: Mod, id: MinMaxID, Tys: II->getType());
1339	return CallInst::Create(Func: MinMax, Args: { MinMaxOp, ThirdOp });
1340	}
1341
1342	/// If all arguments of the intrinsic are unary shuffles with the same mask,
1343	/// try to shuffle after the intrinsic.
1344	static Instruction *
1345	foldShuffledIntrinsicOperands(IntrinsicInst *II,
1346	InstCombiner::BuilderTy &Builder) {
1347	// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1348	// etc. Use llvm::isTriviallyVectorizable() and related to determine
1349	// which intrinsics are safe to shuffle?
1350	switch (II->getIntrinsicID()) {
1351	case Intrinsic::smax:
1352	case Intrinsic::smin:
1353	case Intrinsic::umax:
1354	case Intrinsic::umin:
1355	case Intrinsic::fma:
1356	case Intrinsic::fshl:
1357	case Intrinsic::fshr:
1358	break;
1359	default:
1360	return nullptr;
1361	}
1362
1363	Value *X;
1364	ArrayRef<int> Mask;
1365	if (!match(V: II->getArgOperand(i: `0`),
1366	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_Mask (Mask))))
1367	return nullptr;
1368
1369	// At least 1 operand must have 1 use because we are creating 2 instructions.
1370	if (none_of(Range: II->args(), P: [](Value V) { return* V->hasOneUse(); }))
1371	return nullptr;
1372
1373	// See if all arguments are shuffled with the same mask.
1374	SmallVector<Value *, `4`> NewArgs(II->arg_size());
1375	NewArgs [`0`] = X;
1376	Type *SrcTy = X->getType();
1377	for (unsigned i = `1`, e = II->arg_size(); i != e; ++i) {
1378	if (!match(V: II->getArgOperand(i),
1379	P: m_Shuffle(v1: m_Value(V&: X), v2: m_Undef(), mask: m_SpecificMask (Mask))) \|\|
1380	X->getType() != SrcTy)
1381	return nullptr;
1382	NewArgs [i] = X;
1383	}
1384
1385	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1386	Instruction FPI = isa<FPMathOperator>(Val: II) ? II : nullptr*;
1387	Value *NewIntrinsic =
1388	Builder.CreateIntrinsic(ID: II->getIntrinsicID(), Types: SrcTy, Args: NewArgs, FMFSource: FPI);
1389	return new ShuffleVectorInst (NewIntrinsic, Mask);
1390	}
1391
1392	/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1393	/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1394	/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1395	template <Intrinsic::ID IntrID>
1396	static Instruction foldBitOrderCrossLogicOp(Value V,
1397	InstCombiner::BuilderTy &Builder) {
1398	static_assert(IntrID == Intrinsic::bswap \|\| IntrID == Intrinsic::bitreverse,
1399	"This helper only supports BSWAP and BITREVERSE intrinsics");
1400
1401	Value X, Y;
1402	// Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1403	// don't match ConstantExpr that aren't meaningful for this transform.
1404	if (match(V, P: m_OneUse(SubPattern: m_BitwiseLogic(L: m_Value(V&: X), R: m_Value(V&: Y)))) &&
1405	isa<BinaryOperator>(Val: V)) {
1406	Value OldReorderX, OldReorderY;
1407	BinaryOperator::BinaryOps Op = cast<BinaryOperator>(Val: V)->getOpcode();
1408
1409	// If both X and Y are bswap/bitreverse, the transform reduces the number
1410	// of instructions even if there's multiuse.
1411	// If only one operand is bswap/bitreverse, we need to ensure the operand
1412	// have only one use.
1413	if (match(X, m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))) &&
1414	match(Y, m_Intrinsic<IntrID>(m_Value(V&: OldReorderY)))) {
1415	return BinaryOperator::Create(Op, S1: OldReorderX, S2: OldReorderY);
1416	}
1417
1418	if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderX))))) {
1419	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: Y);
1420	return BinaryOperator::Create(Op, S1: OldReorderX, S2: NewReorder);
1421	}
1422
1423	if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(V&: OldReorderY))))) {
1424	Value *NewReorder = Builder.CreateUnaryIntrinsic(ID: IntrID, V: X);
1425	return BinaryOperator::Create(Op, S1: NewReorder, S2: OldReorderY);
1426	}
1427	}
1428	return nullptr;
1429	}
1430
1431	/// CallInst simplification. This mostly only handles folding of intrinsic
1432	/// instructions. For normal calls, it allows visitCallBase to do the heavy
1433	/// lifting.
1434	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
1435	// Don't try to simplify calls without uses. It will not do anything useful,
1436	// but will result in the following folds being skipped.
1437	if (!CI.use_empty()) {
1438	SmallVector<Value *, `4`> Args;
1439	Args.reserve(N: CI.arg_size());
1440	for (Value *Op : CI.args())
1441	Args.push_back(Elt: Op);
1442	if (Value *V = simplifyCall(Call: &CI, Callee: CI.getCalledOperand(), Args,
1443	Q: SQ.getWithInstruction(I: &CI)))
1444	return replaceInstUsesWith(I&: CI, V);
1445	}
1446
1447	if (Value *FreedOp = getFreedOperand(CB: &CI, TLI: &TLI))
1448	return visitFree(FI&: CI, FreedOp);
1449
1450	// If the caller function (i.e. us, the function that contains this CallInst)
1451	// is nounwind, mark the call as nounwind, even if the callee isn't.
1452	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1453	CI.setDoesNotThrow();
1454	return &CI;
1455	}
1456
1457	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &CI);
1458	if (!II) return visitCallBase(Call&: CI);
1459
1460	// For atomic unordered mem intrinsics if len is not a positive or
1461	// not a multiple of element size then behavior is undefined.
1462	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: II))
1463	if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(Val: AMI->getLength()))
1464	if (NumBytes->isNegative() \|\|
1465	(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != `0`)) {
1466	CreateNonTerminatorUnreachable(InsertAt: AMI);
1467	assert(AMI->getType()->isVoidTy() &&
1468	"non void atomic unordered mem intrinsic");
1469	return eraseInstFromFunction(I&: *AMI);
1470	}
1471
1472	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
1473	// instead of in visitCallBase.
1474	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: II)) {
1475	bool Changed = false;
1476
1477	// memmove/cpy/set of zero bytes is a noop.
1478	if (Constant *NumBytes = dyn_cast<Constant>(Val: MI->getLength())) {
1479	if (NumBytes->isNullValue())
1480	return eraseInstFromFunction(I&: CI);
1481	}
1482
1483	// No other transformations apply to volatile transfers.
1484	if (auto *M = dyn_cast<MemIntrinsic>(Val: MI))
1485	if (M->isVolatile())
1486	return nullptr;
1487
1488	// If we have a memmove and the source operation is a constant global,
1489	// then the source and dest pointers can't alias, so we can change this
1490	// into a call to memcpy.
1491	if (auto *MMI = dyn_cast<AnyMemMoveInst>(Val: MI)) {
1492	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(Val: MMI->getSource()))
1493	if (GVSrc->isConstant()) {
1494	Module *M = CI.getModule();
1495	Intrinsic::ID MemCpyID =
1496	isa<AtomicMemMoveInst>(MMI)
1497	? Intrinsic::memcpy_element_unordered_atomic
1498	: Intrinsic::memcpy;
1499	Type *Tys[`3`] = { CI.getArgOperand(i: `0`)->getType(),
1500	CI.getArgOperand(i: `1`)->getType(),
1501	CI.getArgOperand(i: `2`)->getType() };
1502	CI.setCalledFunction(Intrinsic::getDeclaration(M, id: MemCpyID, Tys));
1503	Changed = true;
1504	}
1505	}
1506
1507	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1508	// memmove(x,x,size) -> noop.
1509	if (MTI->getSource() == MTI->getDest())
1510	return eraseInstFromFunction(I&: CI);
1511	}
1512
1513	// If we can determine a pointer alignment that is bigger than currently
1514	// set, update the alignment.
1515	if (auto *MTI = dyn_cast<AnyMemTransferInst>(Val: MI)) {
1516	if (Instruction *I = SimplifyAnyMemTransfer(MI: MTI))
1517	return I;
1518	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(Val: MI)) {
1519	if (Instruction *I = SimplifyAnyMemSet(MI: MSI))
1520	return I;
1521	}
1522
1523	if (Changed) return II;
1524	}
1525
1526	// For fixed width vector result intrinsics, use the generic demanded vector
1527	// support.
1528	if (auto *IIFVTy = dyn_cast<FixedVectorType>(Val: II->getType())) {
1529	auto VWidth = IIFVTy->getNumElements();
1530	APInt PoisonElts(VWidth, `0`);
1531	APInt AllOnesEltMask(APInt::getAllOnes(numBits: VWidth));
1532	if (Value *V = SimplifyDemandedVectorElts(V: II, DemandedElts: AllOnesEltMask, PoisonElts)) {
1533	if (V != II)
1534	return replaceInstUsesWith(I&: *II, V);
1535	return II;
1536	}
1537	}
1538
1539	if (II->isCommutative()) {
1540	if (auto Pair = matchSymmetricPair(LHS: II->getOperand(i_nocapture: `0`), RHS: II->getOperand(i_nocapture: `1`))) {
1541	replaceOperand(I&: *II, OpNum: `0`, V: Pair ->first);
1542	replaceOperand(I&: *II, OpNum: `1`, V: Pair ->second);
1543	return II;
1544	}
1545
1546	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(Call&: CI))
1547	return NewCall;
1548	}
1549
1550	// Unused constrained FP intrinsic calls may have declared side effect, which
1551	// prevents it from being removed. In some cases however the side effect is
1552	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1553	// returns a replacement, the call may be removed.
1554	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(Val: CI)) {
1555	if (simplifyConstrainedFPCall(Call: &CI, Q: SQ.getWithInstruction(I: &CI)))
1556	return eraseInstFromFunction(I&: CI);
1557	}
1558
1559	Intrinsic::ID IID = II->getIntrinsicID();
1560	switch (IID) {
1561	case Intrinsic::objectsize: {
1562	SmallVector<Instruction *> InsertedInstructions;
1563	if (Value V = lowerObjectSizeCall(ObjectSize: II, DL, TLI: &TLI, AA, /MustSucceed=/*false,
1564	InsertedInstructions: &InsertedInstructions)) {
1565	for (Instruction *Inserted : InsertedInstructions)
1566	Worklist.add(I: Inserted);
1567	return replaceInstUsesWith(I&: CI, V);
1568	}
1569	return nullptr;
1570	}
1571	case Intrinsic::abs: {
1572	Value *IIOperand = II->getArgOperand(i: `0`);
1573	bool IntMinIsPoison = cast<Constant>(Val: II->getArgOperand(i: `1`))->isOneValue();
1574
1575	// abs(-x) -> abs(x)
1576	// TODO: Copy nsw if it was present on the neg?
1577	Value *X;
1578	if (match(V: IIOperand, P: m_Neg(V: m_Value(V&: X))))
1579	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1580	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Value(V&: X), R: m_Neg(V: m_Deferred(V: X)))))
1581	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1582	if (match(V: IIOperand, P: m_Select(C: m_Value(), L: m_Neg(V: m_Value(V&: X)), R: m_Deferred(V: X))))
1583	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1584
1585	Value *Y;
1586	// abs(a abs(b)) -> abs(a * b)*
1587	if (match(IIOperand,
1588	m_OneUse(m_c_Mul(m_Value(X),
1589	m_Intrinsic<Intrinsic::abs>(m_Value(Y)))))) {
1590	bool NSW =
1591	cast<Instruction>(Val: IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
1592	auto *XY = NSW ? Builder.CreateNSWMul(LHS: X, RHS: Y) : Builder.CreateMul(LHS: X, RHS: Y);
1593	return replaceOperand(I&: *II, OpNum: `0`, V: XY);
1594	}
1595
1596	if (std::optional<bool> Known =
1597	getKnownSignOrZero(Op: IIOperand, CxtI: II, DL, AC: &AC, DT: &DT)) {
1598	// abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
1599	// abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
1600	if (!*Known)
1601	return replaceInstUsesWith(I&: *II, V: IIOperand);
1602
1603	// abs(x) -> -x if x < 0
1604	// abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
1605	if (IntMinIsPoison)
1606	return BinaryOperator::CreateNSWNeg(Op: IIOperand);
1607	return BinaryOperator::CreateNeg(Op: IIOperand);
1608	}
1609
1610	// abs (sext X) --> zext (abs X)*
1611	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1612	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_SExt(Op: m_Value(V&: X))))) {
1613	Value *NarrowAbs =
1614	Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1615	return CastInst::Create(Instruction::ZExt, S: NarrowAbs, Ty: II->getType());
1616	}
1617
1618	// Match a complicated way to check if a number is odd/even:
1619	// abs (srem X, 2) --> and X, 1
1620	const APInt *C;
1621	if (match(V: IIOperand, P: m_SRem(L: m_Value(V&: X), R: m_APInt(Res&: C))) && *C == `2`)
1622	return BinaryOperator::CreateAnd(V1: X, V2: ConstantInt::get(Ty: II->getType(), V: `1`));
1623
1624	break;
1625	}
1626	case Intrinsic::umin: {
1627	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1628	// umin(x, 1) == zext(x != 0)
1629	if (match(V: I1, P: m_One())) {
1630	assert(II->getType()->getScalarSizeInBits() != `1` &&
1631	"Expected simplify of umin with max constant");
1632	Value *Zero = Constant::getNullValue(Ty: I0->getType());
1633	Value *Cmp = Builder.CreateICmpNE(LHS: I0, RHS: Zero);
1634	return CastInst::Create(Instruction::ZExt, S: Cmp, Ty: II->getType());
1635	}
1636	[[fallthrough]];
1637	}
1638	case Intrinsic::umax: {
1639	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1640	Value X, Y;
1641	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_ZExt(Op: m_Value(V&: Y))) &&
1642	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1643	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1644	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1645	}
1646	Constant *C;
1647	if (match(V: I0, P: m_ZExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1648	I0->hasOneUse()) {
1649	if (Constant *NarrowC = getLosslessUnsignedTrunc(C, TruncTy: X->getType())) {
1650	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1651	return CastInst::Create(Instruction::ZExt, S: NarrowMaxMin, Ty: II->getType());
1652	}
1653	}
1654	// If both operands of unsigned min/max are sign-extended, it is still ok
1655	// to narrow the operation.
1656	[[fallthrough]];
1657	}
1658	case Intrinsic::smax:
1659	case Intrinsic::smin: {
1660	Value I0 = II->getArgOperand(i: `0`), I1 = II->getArgOperand(i: `1`);
1661	Value X, Y;
1662	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_SExt(Op: m_Value(V&: Y))) &&
1663	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
1664	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y);
1665	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1666	}
1667
1668	Constant *C;
1669	if (match(V: I0, P: m_SExt(Op: m_Value(V&: X))) && match(V: I1, P: m_Constant(C)) &&
1670	I0->hasOneUse()) {
1671	if (Constant *NarrowC = getLosslessSignedTrunc(C, TruncTy: X->getType())) {
1672	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: NarrowC);
1673	return CastInst::Create(Instruction::SExt, S: NarrowMaxMin, Ty: II->getType());
1674	}
1675	}
1676
1677	// umin(i1 X, i1 Y) -> and i1 X, Y
1678	// smax(i1 X, i1 Y) -> and i1 X, Y
1679	if ((IID == Intrinsic::umin \|\| IID == Intrinsic::smax) &&
1680	II->getType()->isIntOrIntVectorTy(`1`)) {
1681	return BinaryOperator::CreateAnd(V1: I0, V2: I1);
1682	}
1683
1684	// umax(i1 X, i1 Y) -> or i1 X, Y
1685	// smin(i1 X, i1 Y) -> or i1 X, Y
1686	if ((IID == Intrinsic::umax \|\| IID == Intrinsic::smin) &&
1687	II->getType()->isIntOrIntVectorTy(`1`)) {
1688	return BinaryOperator::CreateOr(V1: I0, V2: I1);
1689	}
1690
1691	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1692	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1693	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1694	// TODO: Canonicalize neg after min/max if I1 is constant.
1695	if (match(V: I0, P: m_NSWNeg(V: m_Value(V&: X))) && match(V: I1, P: m_NSWNeg(V: m_Value(V&: Y))) &&
1696	(I0->hasOneUse() \|\| I1->hasOneUse())) {
1697	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1698	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: X, RHS: Y);
1699	return BinaryOperator::CreateNSWNeg(Op: InvMaxMin);
1700	}
1701	}
1702
1703	// (umax X, (xor X, Pow2))
1704	// -> (or X, Pow2)
1705	// (umin X, (xor X, Pow2))
1706	// -> (and X, ~Pow2)
1707	// (smax X, (xor X, Pos_Pow2))
1708	// -> (or X, Pos_Pow2)
1709	// (smin X, (xor X, Pos_Pow2))
1710	// -> (and X, ~Pos_Pow2)
1711	// (smax X, (xor X, Neg_Pow2))
1712	// -> (and X, ~Neg_Pow2)
1713	// (smin X, (xor X, Neg_Pow2))
1714	// -> (or X, Neg_Pow2)
1715	if ((match(V: I0, P: m_c_Xor(L: m_Specific(V: I1), R: m_Value(V&: X))) \|\|
1716	match(V: I1, P: m_c_Xor(L: m_Specific(V: I0), R: m_Value(V&: X)))) &&
1717	isKnownToBeAPowerOfTwo(V: X, / OrZero / true)) {
1718	bool UseOr = IID == Intrinsic::smax \|\| IID == Intrinsic::umax;
1719	bool UseAndN = IID == Intrinsic::smin \|\| IID == Intrinsic::umin;
1720
1721	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
1722	auto KnownSign = getKnownSign(Op: X, CxtI: II, DL, AC: &AC, DT: &DT);
1723	if (KnownSign == std::nullopt) {
1724	UseOr = false;
1725	UseAndN = false;
1726	} else if (KnownSign /* true is Signed. /) {
1727	UseOr ^= true;
1728	UseAndN ^= true;
1729	Type *Ty = I0->getType();
1730	// Negative power of 2 must be IntMin. It's possible to be able to
1731	// prove negative / power of 2 without actually having known bits, so
1732	// just get the value by hand.
1733	X = Constant::getIntegerValue(
1734	Ty, V: APInt::getSignedMinValue(numBits: Ty->getScalarSizeInBits()));
1735	}
1736	}
1737	if (UseOr)
1738	return BinaryOperator::CreateOr(V1: I0, V2: X);
1739	else if (UseAndN)
1740	return BinaryOperator::CreateAnd(V1: I0, V2: Builder.CreateNot(V: X));
1741	}
1742
1743	// If we can eliminate ~A and Y is free to invert:
1744	// max ~A, Y --> ~(min A, ~Y)
1745	//
1746	// Examples:
1747	// max ~A, ~Y --> ~(min A, Y)
1748	// max ~A, C --> ~(min A, ~C)
1749	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1750	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
1751	Value *A;
1752	if (match(V: X, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: A)))) &&
1753	!isFreeToInvert(V: A, WillInvertAllUses: A->hasOneUse())) {
1754	if (Value *NotY = getFreelyInverted(V: Y, WillInvertAllUses: Y->hasOneUse(), Builder: &Builder)) {
1755	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMaxID: IID);
1756	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(ID: InvID, LHS: A, RHS: NotY);
1757	return BinaryOperator::CreateNot(Op: InvMaxMin);
1758	}
1759	}
1760	return nullptr;
1761	};
1762
1763	if (Instruction *I = moveNotAfterMinMax (I0, I1))
1764	return I;
1765	if (Instruction *I = moveNotAfterMinMax (I1, I0))
1766	return I;
1767
1768	if (Instruction *I = moveAddAfterMinMax(II, Builder))
1769	return I;
1770
1771	// minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
1772	const APInt *RHSC;
1773	if (match(V: I0, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: X), R: m_NegatedPower2(V&: RHSC)))) &&
1774	match(V: I1, P: m_OneUse(SubPattern: m_And(L: m_Value(V&: Y), R: m_SpecificInt(V: *RHSC)))))
1775	return BinaryOperator::CreateAnd(V1: Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y),
1776	V2: ConstantInt::get(Ty: II->getType(), V: *RHSC));
1777
1778	// smax(X, -X) --> abs(X)
1779	// smin(X, -X) --> -abs(X)
1780	// umax(X, -X) --> -abs(X)
1781	// umin(X, -X) --> abs(X)
1782	if (isKnownNegation(X: I0, Y: I1)) {
1783	// We can choose either operand as the input to abs(), but if we can
1784	// eliminate the only use of a value, that's better for subsequent
1785	// transforms/analysis.
1786	if (I0->hasOneUse() && !I1->hasOneUse())
1787	std::swap(a&: I0, b&: I1);
1788
1789	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
1790	// operation and potentially its negation.
1791	bool IntMinIsPoison = isKnownNegation(X: I0, Y: I1, / NeedNSW / true);
1792	Value *Abs = Builder.CreateBinaryIntrinsic(
1793	Intrinsic::abs, I0,
1794	ConstantInt::getBool(II->getContext(), IntMinIsPoison));
1795
1796	// We don't have a "nabs" intrinsic, so negate if needed based on the
1797	// max/min operation.
1798	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
1799	Abs = Builder.CreateNeg(V: Abs, Name: "nabs", HasNSW: IntMinIsPoison);
1800	return replaceInstUsesWith(I&: CI, V: Abs);
1801	}
1802
1803	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
1804	return Sel;
1805
1806	if (Instruction SAdd = matchSAddSubSat(MinMax1&: II))
1807	return SAdd;
1808
1809	if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
1810	return replaceInstUsesWith(I&: *II, V: NewMinMax);
1811
1812	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
1813	return R;
1814
1815	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
1816	return NewMinMax;
1817
1818	// Try to fold minmax with constant RHS based on range information
1819	if (match(V: I1, P: m_APIntAllowPoison(Res&: RHSC))) {
1820	ICmpInst::Predicate Pred =
1821	ICmpInst::getNonStrictPredicate(pred: MinMaxIntrinsic::getPredicate(ID: IID));
1822	bool IsSigned = MinMaxIntrinsic::isSigned(ID: IID);
1823	ConstantRange LHS_CR = computeConstantRangeIncludingKnownBits(
1824	V: I0, ForSigned: IsSigned, SQ: SQ.getWithInstruction(I: II));
1825	if (!LHS_CR.isFullSet()) {
1826	if (LHS_CR.icmp(Pred, Other: *RHSC))
1827	return replaceInstUsesWith(I&: *II, V: I0);
1828	if (LHS_CR.icmp(Pred: ICmpInst::getSwappedPredicate(pred: Pred), Other: *RHSC))
1829	return replaceInstUsesWith(I&: *II,
1830	V: ConstantInt::get(Ty: II->getType(), V: *RHSC));
1831	}
1832	}
1833
1834	break;
1835	}
1836	case Intrinsic::bitreverse: {
1837	Value *IIOperand = II->getArgOperand(i: `0`);
1838	// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
1839	Value *X;
1840	if (match(V: IIOperand, P: m_ZExt(Op: m_Value(V&: X))) &&
1841	X->getType()->isIntOrIntVectorTy(BitWidth: `1`)) {
1842	Type *Ty = II->getType();
1843	APInt SignBit = APInt::getSignMask(BitWidth: Ty->getScalarSizeInBits());
1844	return SelectInst::Create(C: X, S1: ConstantInt::get(Ty, V: SignBit),
1845	S2: ConstantInt::getNullValue(Ty));
1846	}
1847
1848	if (Instruction *crossLogicOpFold =
1849	foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(IIOperand, Builder))
1850	return crossLogicOpFold;
1851
1852	break;
1853	}
1854	case Intrinsic::bswap: {
1855	Value *IIOperand = II->getArgOperand(i: `0`);
1856
1857	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1858	// inverse-shift-of-bswap:
1859	// bswap (shl X, Y) --> lshr (bswap X), Y
1860	// bswap (lshr X, Y) --> shl (bswap X), Y
1861	Value X, Y;
1862	if (match(V: IIOperand, P: m_OneUse(SubPattern: m_LogicalShift(L: m_Value(V&: X), R: m_Value(V&: Y))))) {
1863	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
1864	if (MaskedValueIsZero(V: Y, Mask: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: `3`))) {
1865	Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
1866	BinaryOperator::BinaryOps InverseShift =
1867	cast<BinaryOperator>(Val: IIOperand)->getOpcode() == Instruction::Shl
1868	? Instruction::LShr
1869	: Instruction::Shl;
1870	return BinaryOperator::Create(Op: InverseShift, S1: NewSwap, S2: Y);
1871	}
1872	}
1873
1874	KnownBits Known = computeKnownBits(V: IIOperand, Depth: `0`, CxtI: II);
1875	uint64_t LZ = alignDown(Value: Known.countMinLeadingZeros(), Align: `8`);
1876	uint64_t TZ = alignDown(Value: Known.countMinTrailingZeros(), Align: `8`);
1877	unsigned BW = Known.getBitWidth();
1878
1879	// bswap(x) -> shift(x) if x has exactly one "active byte"
1880	if (BW - LZ - TZ == `8`) {
1881	assert(LZ != TZ && "active byte cannot be in the middle");
1882	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
1883	return BinaryOperator::CreateNUWShl(
1884	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: LZ - TZ));
1885	// -> lshr(x) if the "active byte" is in the high part of x
1886	return BinaryOperator::CreateExactLShr(
1887	V1: IIOperand, V2: ConstantInt::get(Ty: IIOperand->getType(), V: TZ - LZ));
1888	}
1889
1890	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1891	if (match(V: IIOperand, P: m_Trunc(Op: m_BSwap(Op0: m_Value(V&: X))))) {
1892	unsigned C = X->getType()->getScalarSizeInBits() - BW;
1893	Value *CV = ConstantInt::get(Ty: X->getType(), V: C);
1894	Value *V = Builder.CreateLShr(LHS: X, RHS: CV);
1895	return new TruncInst (V, IIOperand->getType());
1896	}
1897
1898	if (Instruction *crossLogicOpFold =
1899	foldBitOrderCrossLogicOp<Intrinsic::bswap>(IIOperand, Builder)) {
1900	return crossLogicOpFold;
1901	}
1902
1903	// Try to fold into bitreverse if bswap is the root of the expression tree.
1904	if (Instruction BitOp = matchBSwapOrBitReverse(I&: II, /MatchBSwaps/ false,
1905	/MatchBitReversals/ true))
1906	return BitOp;
1907	break;
1908	}
1909	case Intrinsic::masked_load:
1910	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II&: II))
1911	return replaceInstUsesWith(I&: CI, V: SimplifiedMaskedOp);
1912	break;
1913	case Intrinsic::masked_store:
1914	return simplifyMaskedStore(II&: *II);
1915	case Intrinsic::masked_gather:
1916	return simplifyMaskedGather(II&: *II);
1917	case Intrinsic::masked_scatter:
1918	return simplifyMaskedScatter(II&: *II);
1919	case Intrinsic::launder_invariant_group:
1920	case Intrinsic::strip_invariant_group:
1921	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II&: II, IC&: *this))
1922	return replaceInstUsesWith(I&: *II, V: SkippedBarrier);
1923	break;
1924	case Intrinsic::powi:
1925	if (ConstantInt *Power = dyn_cast<ConstantInt>(Val: II->getArgOperand(i: `1`))) {
1926	// 0 and 1 are handled in instsimplify
1927	// powi(x, -1) -> 1/x
1928	if (Power->isMinusOne())
1929	return BinaryOperator::CreateFDivFMF(V1: ConstantFP::get(Ty: CI.getType(), V: `1.0`),
1930	V2: II->getArgOperand(i: `0`), FMFSource: II);
1931	// powi(x, 2) -> xx*
1932	if (Power->equalsInt(V: `2`))
1933	return BinaryOperator::CreateFMulFMF(V1: II->getArgOperand(i: `0`),
1934	V2: II->getArgOperand(i: `0`), FMFSource: II);
1935
1936	if (!Power->getValue()[`0`]) {
1937	Value *X;
1938	// If power is even:
1939	// powi(-x, p) -> powi(x, p)
1940	// powi(fabs(x), p) -> powi(x, p)
1941	// powi(copysign(x, y), p) -> powi(x, p)
1942	if (match(II->getArgOperand(`0`), m_FNeg(m_Value(X))) \|\|
1943	match(II->getArgOperand(`0`), m_FAbs(m_Value(X))) \|\|
1944	match(II->getArgOperand(`0`),
1945	m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
1946	return replaceOperand(I&: *II, OpNum: `0`, V: X);
1947	}
1948	}
1949	break;
1950
1951	case Intrinsic::cttz:
1952	case Intrinsic::ctlz:
1953	if (auto I = foldCttzCtlz(II&: II, IC&: *this))
1954	return I;
1955	break;
1956
1957	case Intrinsic::ctpop:
1958	if (auto I = foldCtpop(II&: II, IC&: *this))
1959	return I;
1960	break;
1961
1962	case Intrinsic::fshl:
1963	case Intrinsic::fshr: {
1964	Value Op0 = II->getArgOperand(i: `0`), Op1 = II->getArgOperand(i: `1`);
1965	Type *Ty = II->getType();
1966	unsigned BitWidth = Ty->getScalarSizeInBits();
1967	Constant *ShAmtC;
1968	if (match(V: II->getArgOperand(i: `2`), P: m_ImmConstant(C&: ShAmtC))) {
1969	// Canonicalize a shift amount constant operand to modulo the bit-width.
1970	Constant *WidthC = ConstantInt::get(Ty, V: BitWidth);
1971	Constant *ModuloC =
1972	ConstantFoldBinaryOpOperands(Opcode: Instruction::URem, LHS: ShAmtC, RHS: WidthC, DL);
1973	if (!ModuloC)
1974	return nullptr;
1975	if (ModuloC != ShAmtC)
1976	return replaceOperand(I&: *II, OpNum: `2`, V: ModuloC);
1977
1978	assert(match(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC),
1979	m_One()) &&
1980	"Shift amount expected to be modulo bitwidth");
1981
1982	// Canonicalize funnel shift right by constant to funnel shift left. This
1983	// is not entirely arbitrary. For historical reasons, the backend may
1984	// recognize rotate left patterns but miss rotate right patterns.
1985	if (IID == Intrinsic::fshr) {
1986	// fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
1987	if (!isKnownNonZero(V: ShAmtC, Q: SQ.getWithInstruction(I: II)))
1988	return nullptr;
1989
1990	Constant *LeftShiftC = ConstantExpr::getSub(C1: WidthC, C2: ShAmtC);
1991	Module *Mod = II->getModule();
1992	Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
1993	return CallInst::Create(Func: Fshl, Args: { Op0, Op1, LeftShiftC });
1994	}
1995	assert(IID == Intrinsic::fshl &&
1996	"All funnel shifts by simple constants should go left");
1997
1998	// fshl(X, 0, C) --> shl X, C
1999	// fshl(X, undef, C) --> shl X, C
2000	if (match(V: Op1, P: m_ZeroInt()) \|\| match(V: Op1, P: m_Undef()))
2001	return BinaryOperator::CreateShl(V1: Op0, V2: ShAmtC);
2002
2003	// fshl(0, X, C) --> lshr X, (BW-C)
2004	// fshl(undef, X, C) --> lshr X, (BW-C)
2005	if (match(V: Op0, P: m_ZeroInt()) \|\| match(V: Op0, P: m_Undef()))
2006	return BinaryOperator::CreateLShr(V1: Op1,
2007	V2: ConstantExpr::getSub(C1: WidthC, C2: ShAmtC));
2008
2009	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2010	if (Op0 == Op1 && BitWidth == `16` && match(V: ShAmtC, P: m_SpecificInt(V: `8`))) {
2011	Module *Mod = II->getModule();
2012	Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
2013	return CallInst::Create(Func: Bswap, Args: { Op0 });
2014	}
2015	if (Instruction *BitOp =
2016	matchBSwapOrBitReverse(I&: II, /MatchBSwaps/* true,
2017	/MatchBitReversals/ true))
2018	return BitOp;
2019	}
2020
2021	// Left or right might be masked.
2022	if (SimplifyDemandedInstructionBits(Inst&: *II))
2023	return &CI;
2024
2025	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2026	// so only the low bits of the shift amount are demanded if the bitwidth is
2027	// a power-of-2.
2028	if (!isPowerOf2_32(Value: BitWidth))
2029	break;
2030	APInt Op2Demanded = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: Log2_32_Ceil(Value: BitWidth));
2031	KnownBits Op2Known(BitWidth);
2032	if (SimplifyDemandedBits(I: II, Op: `2`, DemandedMask: Op2Demanded, Known&: Op2Known))
2033	return &CI;
2034	break;
2035	}
2036	case Intrinsic::ptrmask: {
2037	unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2038	KnownBits Known(BitWidth);
2039	if (SimplifyDemandedInstructionBits(Inst&: *II, Known))
2040	return II;
2041
2042	Value InnerPtr, InnerMask;
2043	bool Changed = false;
2044	// Combine:
2045	// (ptrmask (ptrmask p, A), B)
2046	// -> (ptrmask p, (and A, B))
2047	if (match(II->getArgOperand(`0`),
2048	m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
2049	m_Value(InnerMask))))) {
2050	assert(II->getArgOperand(`1`)->getType() == InnerMask->getType() &&
2051	"Mask types must match");
2052	// TODO: If InnerMask == Op1, we could copy attributes from inner
2053	// callsite -> outer callsite.
2054	Value *NewMask = Builder.CreateAnd(LHS: II->getArgOperand(i: `1`), RHS: InnerMask);
2055	replaceOperand(I&: CI, OpNum: `0`, V: InnerPtr);
2056	replaceOperand(I&: CI, OpNum: `1`, V: NewMask);
2057	Changed = true;
2058	}
2059
2060	// See if we can deduce non-null.
2061	if (!CI.hasRetAttr(Attribute::NonNull) &&
2062	(Known.isNonZero() \|\|
2063	isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2064	CI.addRetAttr(Attribute::NonNull);
2065	Changed = true;
2066	}
2067
2068	unsigned NewAlignmentLog =
2069	std::min(a: Value::MaxAlignmentExponent,
2070	b: std::min(a: BitWidth - `1`, b: Known.countMinTrailingZeros()));
2071	// Known bits will capture if we had alignment information associated with
2072	// the pointer argument.
2073	if (NewAlignmentLog > Log2(A: CI.getRetAlign().valueOrOne())) {
2074	CI.addRetAttr(Attr: Attribute::getWithAlignment(
2075	Context&: CI.getContext(), Alignment: Align (uint64_t(`1`) << NewAlignmentLog)));
2076	Changed = true;
2077	}
2078	if (Changed)
2079	return &CI;
2080	break;
2081	}
2082	case Intrinsic::uadd_with_overflow:
2083	case Intrinsic::sadd_with_overflow: {
2084	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2085	return I;
2086
2087	// Given 2 constant operands whose sum does not overflow:
2088	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2089	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2090	Value *X;
2091	const APInt C0, C1;
2092	Value *Arg0 = II->getArgOperand(i: `0`);
2093	Value *Arg1 = II->getArgOperand(i: `1`);
2094	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2095	bool HasNWAdd = IsSigned
2096	? match(V: Arg0, P: m_NSWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)))
2097	: match(V: Arg0, P: m_NUWAddLike(L: m_Value(V&: X), R: m_APInt(Res&: C0)));
2098	if (HasNWAdd && match(V: Arg1, P: m_APInt(Res&: C1))) {
2099	bool Overflow;
2100	APInt NewC =
2101	IsSigned ? C1->sadd_ov(RHS: C0, Overflow) : C1->uadd_ov(RHS: C0, Overflow);
2102	if (!Overflow)
2103	return replaceInstUsesWith(
2104	I&: *II, V: Builder.CreateBinaryIntrinsic(
2105	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: Arg1->getType(), V: NewC)));
2106	}
2107	break;
2108	}
2109
2110	case Intrinsic::umul_with_overflow:
2111	case Intrinsic::smul_with_overflow:
2112	case Intrinsic::usub_with_overflow:
2113	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2114	return I;
2115	break;
2116
2117	case Intrinsic::ssub_with_overflow: {
2118	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2119	return I;
2120
2121	Constant *C;
2122	Value *Arg0 = II->getArgOperand(i: `0`);
2123	Value *Arg1 = II->getArgOperand(i: `1`);
2124	// Given a constant C that is not the minimum signed value
2125	// for an integer of a given bit width:
2126	//
2127	// ssubo X, C -> saddo X, -C
2128	if (match(V: Arg1, P: m_Constant(C)) && C->isNotMinSignedValue()) {
2129	Value *NegVal = ConstantExpr::getNeg(C);
2130	// Build a saddo call that is equivalent to the discovered
2131	// ssubo call.
2132	return replaceInstUsesWith(
2133	*II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2134	Arg0, NegVal));
2135	}
2136
2137	break;
2138	}
2139
2140	case Intrinsic::uadd_sat:
2141	case Intrinsic::sadd_sat:
2142	case Intrinsic::usub_sat:
2143	case Intrinsic::ssub_sat: {
2144	SaturatingInst *SI = cast<SaturatingInst>(Val: II);
2145	Type *Ty = SI->getType();
2146	Value *Arg0 = SI->getLHS();
2147	Value *Arg1 = SI->getRHS();
2148
2149	// Make use of known overflow information.
2150	OverflowResult OR = computeOverflow(BinaryOp: SI->getBinaryOp(), IsSigned: SI->isSigned(),
2151	LHS: Arg0, RHS: Arg1, CxtI: SI);
2152	switch (OR) {
2153	case OverflowResult::MayOverflow:
2154	break;
2155	case OverflowResult::NeverOverflows:
2156	if (SI->isSigned())
2157	return BinaryOperator::CreateNSW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2158	else
2159	return BinaryOperator::CreateNUW(Opc: SI->getBinaryOp(), V1: Arg0, V2: Arg1);
2160	case OverflowResult::AlwaysOverflowsLow: {
2161	unsigned BitWidth = Ty->getScalarSizeInBits();
2162	APInt Min = APSInt::getMinValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2163	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Min));
2164	}
2165	case OverflowResult::AlwaysOverflowsHigh: {
2166	unsigned BitWidth = Ty->getScalarSizeInBits();
2167	APInt Max = APSInt::getMaxValue(numBits: BitWidth, Unsigned: !SI->isSigned());
2168	return replaceInstUsesWith(I&: *SI, V: ConstantInt::get(Ty, V: Max));
2169	}
2170	}
2171
2172	// usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2173	// which after that:
2174	// usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2175	// usub_sat((sub nuw C, A), C1) -> 0 otherwise
2176	Constant C, C1;
2177	Value *A;
2178	if (IID == Intrinsic::usub_sat &&
2179	match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2180	match(Arg1, m_ImmConstant(C1))) {
2181	auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2182	auto *NewSub =
2183	Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2184	return replaceInstUsesWith(I&: *SI, V: NewSub);
2185	}
2186
2187	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2188	if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2189	C->isNotMinSignedValue()) {
2190	Value *NegVal = ConstantExpr::getNeg(C);
2191	return replaceInstUsesWith(
2192	*II, Builder.CreateBinaryIntrinsic(
2193	Intrinsic::sadd_sat, Arg0, NegVal));
2194	}
2195
2196	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2197	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2198	// if Val and Val2 have the same sign
2199	if (auto *Other = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2200	Value *X;
2201	const APInt Val, Val2;
2202	APInt NewVal;
2203	bool IsUnsigned =
2204	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
2205	if (Other->getIntrinsicID() == IID &&
2206	match(V: Arg1, P: m_APInt(Res&: Val)) &&
2207	match(V: Other->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2208	match(V: Other->getArgOperand(i: `1`), P: m_APInt(Res&: Val2))) {
2209	if (IsUnsigned)
2210	NewVal = Val->uadd_sat(RHS: *Val2);
2211	else if (Val->isNonNegative() == Val2->isNonNegative()) {
2212	bool Overflow;
2213	NewVal = Val->sadd_ov(RHS: *Val2, Overflow);
2214	if (Overflow) {
2215	// Both adds together may add more than SignedMaxValue
2216	// without saturating the final result.
2217	break;
2218	}
2219	} else {
2220	// Cannot fold saturated addition with different signs.
2221	break;
2222	}
2223
2224	return replaceInstUsesWith(
2225	I&: *II, V: Builder.CreateBinaryIntrinsic(
2226	ID: IID, LHS: X, RHS: ConstantInt::get(Ty: II->getType(), V: NewVal)));
2227	}
2228	}
2229	break;
2230	}
2231
2232	case Intrinsic::minnum:
2233	case Intrinsic::maxnum:
2234	case Intrinsic::minimum:
2235	case Intrinsic::maximum: {
2236	Value *Arg0 = II->getArgOperand(i: `0`);
2237	Value *Arg1 = II->getArgOperand(i: `1`);
2238	Value X, Y;
2239	if (match(V: Arg0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Arg1, P: m_FNeg(X: m_Value(V&: Y))) &&
2240	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
2241	// If both operands are negated, invert the call and negate the result:
2242	// min(-X, -Y) --> -(max(X, Y))
2243	// max(-X, -Y) --> -(min(X, Y))
2244	Intrinsic::ID NewIID;
2245	switch (IID) {
2246	case Intrinsic::maxnum:
2247	NewIID = Intrinsic::minnum;
2248	break;
2249	case Intrinsic::minnum:
2250	NewIID = Intrinsic::maxnum;
2251	break;
2252	case Intrinsic::maximum:
2253	NewIID = Intrinsic::minimum;
2254	break;
2255	case Intrinsic::minimum:
2256	NewIID = Intrinsic::maximum;
2257	break;
2258	default:
2259	llvm_unreachable("unexpected intrinsic ID");
2260	}
2261	Value *NewCall = Builder.CreateBinaryIntrinsic(ID: NewIID, LHS: X, RHS: Y, FMFSource: II);
2262	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewCall);
2263	FNeg->copyIRFlags(V: II);
2264	return FNeg;
2265	}
2266
2267	// m(m(X, C2), C1) -> m(X, C)
2268	const APFloat C1, C2;
2269	if (auto *M = dyn_cast<IntrinsicInst>(Val: Arg0)) {
2270	if (M->getIntrinsicID() == IID && match(V: Arg1, P: m_APFloat(Res&: C1)) &&
2271	((match(V: M->getArgOperand(i: `0`), P: m_Value(V&: X)) &&
2272	match(V: M->getArgOperand(i: `1`), P: m_APFloat(Res&: C2))) \|\|
2273	(match(V: M->getArgOperand(i: `1`), P: m_Value(V&: X)) &&
2274	match(V: M->getArgOperand(i: `0`), P: m_APFloat(Res&: C2))))) {
2275	APFloat Res(`0.0`);
2276	switch (IID) {
2277	case Intrinsic::maxnum:
2278	Res = maxnum(A: C1, B: C2);
2279	break;
2280	case Intrinsic::minnum:
2281	Res = minnum(A: C1, B: C2);
2282	break;
2283	case Intrinsic::maximum:
2284	Res = maximum(A: C1, B: C2);
2285	break;
2286	case Intrinsic::minimum:
2287	Res = minimum(A: C1, B: C2);
2288	break;
2289	default:
2290	llvm_unreachable("unexpected intrinsic ID");
2291	}
2292	Value *V = Builder.CreateBinaryIntrinsic(
2293	ID: IID, LHS: X, RHS: ConstantFP::get(Ty: Arg0->getType(), V: Res), FMFSource: II);
2294	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
2295	// was a simplification (so Arg0 and its original flags could
2296	// propagate?)
2297	if (auto *CI = dyn_cast<CallInst>(Val: V))
2298	CI->andIRFlags(V: M);
2299	return replaceInstUsesWith(I&: *II, V);
2300	}
2301	}
2302
2303	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2304	if (match(V: Arg0, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: X)))) &&
2305	match(V: Arg1, P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: Y)))) &&
2306	X->getType() == Y->getType()) {
2307	Value *NewCall =
2308	Builder.CreateBinaryIntrinsic(ID: IID, LHS: X, RHS: Y, FMFSource: II, Name: II->getName());
2309	return new FPExtInst (NewCall, II->getType());
2310	}
2311
2312	// max X, -X --> fabs X
2313	// min X, -X --> -(fabs X)
2314	// TODO: Remove one-use limitation? That is obviously better for max,
2315	// hence why we don't check for one-use for that. However,
2316	// it would be an extra instruction for min (fnabs), but
2317	// that is still likely better for analysis and codegen.
2318	auto IsMinMaxOrXNegX = [IID, &X](Value Op0, Value Op1) {
2319	if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2320	return Op0->hasOneUse() \|\|
2321	(IID != Intrinsic::minimum && IID != Intrinsic::minnum);
2322	return false;
2323	};
2324
2325	if (IsMinMaxOrXNegX (Arg0, Arg1) \|\| IsMinMaxOrXNegX (Arg1, Arg0)) {
2326	Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
2327	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum)
2328	R = Builder.CreateFNegFMF(V: R, FMFSource: II);
2329	return replaceInstUsesWith(I&: *II, V: R);
2330	}
2331
2332	break;
2333	}
2334	case Intrinsic::matrix_multiply: {
2335	// Optimize negation in matrix multiplication.
2336
2337	// -A -B -> A * B*
2338	Value A, B;
2339	if (match(V: II->getArgOperand(i: `0`), P: m_FNeg(X: m_Value(V&: A))) &&
2340	match(V: II->getArgOperand(i: `1`), P: m_FNeg(X: m_Value(V&: B)))) {
2341	replaceOperand(I&: *II, OpNum: `0`, V: A);
2342	replaceOperand(I&: *II, OpNum: `1`, V: B);
2343	return II;
2344	}
2345
2346	Value *Op0 = II->getOperand(i_nocapture: `0`);
2347	Value *Op1 = II->getOperand(i_nocapture: `1`);
2348	Value OpNotNeg, NegatedOp;
2349	unsigned NegatedOpArg, OtherOpArg;
2350	if (match(V: Op0, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2351	NegatedOp = Op0;
2352	NegatedOpArg = `0`;
2353	OtherOpArg = `1`;
2354	} else if (match(V: Op1, P: m_FNeg(X: m_Value(V&: OpNotNeg)))) {
2355	NegatedOp = Op1;
2356	NegatedOpArg = `1`;
2357	OtherOpArg = `0`;
2358	} else
2359	// Multiplication doesn't have a negated operand.
2360	break;
2361
2362	// Only optimize if the negated operand has only one use.
2363	if (!NegatedOp->hasOneUse())
2364	break;
2365
2366	Value *OtherOp = II->getOperand(i_nocapture: OtherOpArg);
2367	VectorType *RetTy = cast<VectorType>(Val: II->getType());
2368	VectorType *NegatedOpTy = cast<VectorType>(Val: NegatedOp->getType());
2369	VectorType *OtherOpTy = cast<VectorType>(Val: OtherOp->getType());
2370	ElementCount NegatedCount = NegatedOpTy->getElementCount();
2371	ElementCount OtherCount = OtherOpTy->getElementCount();
2372	ElementCount RetCount = RetTy->getElementCount();
2373	// (-A) B -> A * (-B), if it is cheaper to negate B and vice versa.*
2374	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: OtherCount) &&
2375	ElementCount::isKnownLT(LHS: OtherCount, RHS: RetCount)) {
2376	Value *InverseOtherOp = Builder.CreateFNeg(V: OtherOp);
2377	replaceOperand(I&: *II, OpNum: NegatedOpArg, V: OpNotNeg);
2378	replaceOperand(I&: *II, OpNum: OtherOpArg, V: InverseOtherOp);
2379	return II;
2380	}
2381	// (-A) B -> -(A * B), if it is cheaper to negate the result*
2382	if (ElementCount::isKnownGT(LHS: NegatedCount, RHS: RetCount)) {
2383	SmallVector<Value *, `5`> NewArgs(II->args());
2384	NewArgs [NegatedOpArg] = OpNotNeg;
2385	Instruction *NewMul =
2386	Builder.CreateIntrinsic(RetTy: II->getType(), ID: IID, Args: NewArgs, FMFSource: II);
2387	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: NewMul, FMFSource: II));
2388	}
2389	break;
2390	}
2391	case Intrinsic::fmuladd: {
2392	// Canonicalize fast fmuladd to the separate fmul + fadd.
2393	if (II->isFast()) {
2394	BuilderTy::FastMathFlagGuard Guard(Builder);
2395	Builder.setFastMathFlags(II->getFastMathFlags());
2396	Value *Mul = Builder.CreateFMul(L: II->getArgOperand(i: `0`),
2397	R: II->getArgOperand(i: `1`));
2398	Value *Add = Builder.CreateFAdd(L: Mul, R: II->getArgOperand(i: `2`));
2399	Add->takeName(V: II);
2400	return replaceInstUsesWith(I&: *II, V: Add);
2401	}
2402
2403	// Try to simplify the underlying FMul.
2404	if (Value *V = simplifyFMulInst(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2405	FMF: II->getFastMathFlags(),
2406	Q: SQ.getWithInstruction(I: II))) {
2407	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2408	FAdd->copyFastMathFlags(I: II);
2409	return FAdd;
2410	}
2411
2412	[[fallthrough]];
2413	}
2414	case Intrinsic::fma: {
2415	// fma fneg(x), fneg(y), z -> fma x, y, z
2416	Value *Src0 = II->getArgOperand(i: `0`);
2417	Value *Src1 = II->getArgOperand(i: `1`);
2418	Value X, Y;
2419	if (match(V: Src0, P: m_FNeg(X: m_Value(V&: X))) && match(V: Src1, P: m_FNeg(X: m_Value(V&: Y)))) {
2420	replaceOperand(I&: *II, OpNum: `0`, V: X);
2421	replaceOperand(I&: *II, OpNum: `1`, V: Y);
2422	return II;
2423	}
2424
2425	// fma fabs(x), fabs(x), z -> fma x, x, z
2426	if (match(V: Src0, P: m_FAbs(Op0: m_Value(V&: X))) &&
2427	match(V: Src1, P: m_FAbs(Op0: m_Specific(V: X)))) {
2428	replaceOperand(I&: *II, OpNum: `0`, V: X);
2429	replaceOperand(I&: *II, OpNum: `1`, V: X);
2430	return II;
2431	}
2432
2433	// Try to simplify the underlying FMul. We can only apply simplifications
2434	// that do not require rounding.
2435	if (Value *V = simplifyFMAFMul(LHS: II->getArgOperand(i: `0`), RHS: II->getArgOperand(i: `1`),
2436	FMF: II->getFastMathFlags(),
2437	Q: SQ.getWithInstruction(I: II))) {
2438	auto *FAdd = BinaryOperator::CreateFAdd(V1: V, V2: II->getArgOperand(i: `2`));
2439	FAdd->copyFastMathFlags(I: II);
2440	return FAdd;
2441	}
2442
2443	// fma x, y, 0 -> fmul x, y
2444	// This is always valid for -0.0, but requires nsz for +0.0 as
2445	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
2446	if (match(V: II->getArgOperand(i: `2`), P: m_NegZeroFP()) \|\|
2447	(match(V: II->getArgOperand(i: `2`), P: m_PosZeroFP()) &&
2448	II->getFastMathFlags().noSignedZeros()))
2449	return BinaryOperator::CreateFMulFMF(V1: Src0, V2: Src1, FMFSource: II);
2450
2451	break;
2452	}
2453	case Intrinsic::copysign: {
2454	Value Mag = II->getArgOperand(i: `0`), Sign = II->getArgOperand(i: `1`);
2455	if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
2456	V: Sign, /Depth=/`0`, SQ: getSimplifyQuery().getWithInstruction(I: II))) {
2457	if (*KnownSignBit) {
2458	// If we know that the sign argument is negative, reduce to FNABS:
2459	// copysign Mag, -Sign --> fneg (fabs Mag)
2460	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2461	return replaceInstUsesWith(I&: *II, V: Builder.CreateFNegFMF(V: Fabs, FMFSource: II));
2462	}
2463
2464	// If we know that the sign argument is positive, reduce to FABS:
2465	// copysign Mag, +Sign --> fabs Mag
2466	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
2467	return replaceInstUsesWith(I&: *II, V: Fabs);
2468	}
2469
2470	// Propagate sign argument through nested calls:
2471	// copysign Mag, (copysign ?, X) --> copysign Mag, X
2472	Value *X;
2473	if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
2474	return replaceOperand(I&: *II, OpNum: `1`, V: X);
2475
2476	// Clear sign-bit of constant magnitude:
2477	// copysign -MagC, X --> copysign MagC, X
2478	// TODO: Support constant folding for fabs
2479	const APFloat *MagC;
2480	if (match(V: Mag, P: m_APFloat(Res&: MagC)) && MagC->isNegative()) {
2481	APFloat PosMagC = *MagC;
2482	PosMagC.clearSign();
2483	return replaceOperand(I&: *II, OpNum: `0`, V: ConstantFP::get(Ty: Mag->getType(), V: PosMagC));
2484	}
2485
2486	// Peek through changes of magnitude's sign-bit. This call rewrites those:
2487	// copysign (fabs X), Sign --> copysign X, Sign
2488	// copysign (fneg X), Sign --> copysign X, Sign
2489	if (match(V: Mag, P: m_FAbs(Op0: m_Value(V&: X))) \|\| match(V: Mag, P: m_FNeg(X: m_Value(V&: X))))
2490	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2491
2492	break;
2493	}
2494	case Intrinsic::fabs: {
2495	Value Cond, TVal, *FVal;
2496	if (match(V: II->getArgOperand(i: `0`),
2497	P: m_Select(C: m_Value(V&: Cond), L: m_Value(V&: TVal), R: m_Value(V&: FVal)))) {
2498	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
2499	if (isa<Constant>(Val: TVal) \|\| isa<Constant>(Val: FVal)) {
2500	CallInst *AbsT = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {TVal});
2501	CallInst *AbsF = Builder.CreateCall(Callee: II->getCalledFunction(), Args: {FVal});
2502	SelectInst *SI = SelectInst::Create(C: Cond, S1: AbsT, S2: AbsF);
2503	FastMathFlags FMF1 = II->getFastMathFlags();
2504	FastMathFlags FMF2 =
2505	cast<SelectInst>(Val: II->getArgOperand(i: `0`))->getFastMathFlags();
2506	FMF2.setNoSignedZeros(false);
2507	SI->setFastMathFlags(FMF1 \| FMF2);
2508	return SI;
2509	}
2510	// fabs (select Cond, -FVal, FVal) --> fabs FVal
2511	if (match(V: TVal, P: m_FNeg(X: m_Specific(V: FVal))))
2512	return replaceOperand(I&: *II, OpNum: `0`, V: FVal);
2513	// fabs (select Cond, TVal, -TVal) --> fabs TVal
2514	if (match(V: FVal, P: m_FNeg(X: m_Specific(V: TVal))))
2515	return replaceOperand(I&: *II, OpNum: `0`, V: TVal);
2516	}
2517
2518	Value Magnitude, Sign;
2519	if (match(V: II->getArgOperand(i: `0`),
2520	P: m_CopySign(Op0: m_Value(V&: Magnitude), Op1: m_Value(V&: Sign)))) {
2521	// fabs (copysign x, y) -> (fabs x)
2522	CallInst *AbsSign =
2523	Builder.CreateCall(Callee: II->getCalledFunction(), Args: {Magnitude});
2524	AbsSign->copyFastMathFlags(I: II);
2525	return replaceInstUsesWith(I&: *II, V: AbsSign);
2526	}
2527
2528	[[fallthrough]];
2529	}
2530	case Intrinsic::ceil:
2531	case Intrinsic::floor:
2532	case Intrinsic::round:
2533	case Intrinsic::roundeven:
2534	case Intrinsic::nearbyint:
2535	case Intrinsic::rint:
2536	case Intrinsic::trunc: {
2537	Value *ExtSrc;
2538	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FPExt(Op: m_Value(V&: ExtSrc))))) {
2539	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2540	Value *NarrowII = Builder.CreateUnaryIntrinsic(ID: IID, V: ExtSrc, FMFSource: II);
2541	return new FPExtInst (NarrowII, II->getType());
2542	}
2543	break;
2544	}
2545	case Intrinsic::cos:
2546	case Intrinsic::amdgcn_cos: {
2547	Value X, Sign;
2548	Value *Src = II->getArgOperand(i: `0`);
2549	if (match(V: Src, P: m_FNeg(X: m_Value(V&: X))) \|\| match(V: Src, P: m_FAbs(Op0: m_Value(V&: X))) \|\|
2550	match(V: Src, P: m_CopySign(Op0: m_Value(V&: X), Op1: m_Value(V&: Sign)))) {
2551	// cos(-x) --> cos(x)
2552	// cos(fabs(x)) --> cos(x)
2553	// cos(copysign(x, y)) --> cos(x)
2554	return replaceOperand(I&: *II, OpNum: `0`, V: X);
2555	}
2556	break;
2557	}
2558	case Intrinsic::sin: {
2559	Value *X;
2560	if (match(V: II->getArgOperand(i: `0`), P: m_OneUse(SubPattern: m_FNeg(X: m_Value(V&: X))))) {
2561	// sin(-x) --> -sin(x)
2562	Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
2563	Instruction *FNeg = UnaryOperator::CreateFNeg(V: NewSin);
2564	FNeg->copyFastMathFlags(I: II);
2565	return FNeg;
2566	}
2567	break;
2568	}
2569	case Intrinsic::ldexp: {
2570	// ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
2571	//
2572	// The danger is if the first ldexp would overflow to infinity or underflow
2573	// to zero, but the combined exponent avoids it. We ignore this with
2574	// reassoc.
2575	//
2576	// It's also safe to fold if we know both exponents are >= 0 or <= 0 since
2577	// it would just double down on the overflow/underflow which would occur
2578	// anyway.
2579	//
2580	// TODO: Could do better if we had range tracking for the input value
2581	// exponent. Also could broaden sign check to cover == 0 case.
2582	Value *Src = II->getArgOperand(i: `0`);
2583	Value *Exp = II->getArgOperand(i: `1`);
2584	Value *InnerSrc;
2585	Value *InnerExp;
2586	if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ldexp>(
2587	m_Value(InnerSrc), m_Value(InnerExp)))) &&
2588	Exp->getType() == InnerExp->getType()) {
2589	FastMathFlags FMF = II->getFastMathFlags();
2590	FastMathFlags InnerFlags = cast<FPMathOperator>(Val: Src)->getFastMathFlags();
2591
2592	if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) \|\|
2593	signBitMustBeTheSame(Op0: Exp, Op1: InnerExp, CxtI: II, DL, AC: &AC, DT: &DT)) {
2594	// TODO: Add nsw/nuw probably safe if integer type exceeds exponent
2595	// width.
2596	Value *NewExp = Builder.CreateAdd(LHS: InnerExp, RHS: Exp);
2597	II->setArgOperand(i: `1`, v: NewExp);
2598	II->setFastMathFlags(InnerFlags); // Or the inner flags.
2599	return replaceOperand(I&: *II, OpNum: `0`, V: InnerSrc);
2600	}
2601	}
2602
2603	break;
2604	}
2605	case Intrinsic::ptrauth_auth:
2606	case Intrinsic::ptrauth_resign: {
2607	// (sign\|resign) + (auth\|resign) can be folded by omitting the middle
2608	// sign+auth component if the key and discriminator match.
2609	bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
2610	Value *Key = II->getArgOperand(i: `1`);
2611	Value *Disc = II->getArgOperand(i: `2`);
2612
2613	// AuthKey will be the key we need to end up authenticating against in
2614	// whatever we replace this sequence with.
2615	Value AuthKey = nullptr, AuthDisc = nullptr, *BasePtr;
2616	if (auto CI = dyn_cast<CallBase>(Val: II->getArgOperand(i: `0`))) {
2617	BasePtr = CI->getArgOperand(i: `0`);
2618	if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
2619	if (CI->getArgOperand(i: `1`) != Key \|\| CI->getArgOperand(i: `2`) != Disc)
2620	break;
2621	} else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
2622	if (CI->getArgOperand(i: `3`) != Key \|\| CI->getArgOperand(i: `4`) != Disc)
2623	break;
2624	AuthKey = CI->getArgOperand(i: `1`);
2625	AuthDisc = CI->getArgOperand(i: `2`);
2626	} else
2627	break;
2628	} else
2629	break;
2630
2631	unsigned NewIntrin;
2632	if (AuthKey && NeedSign) {
2633	// resign(0,1) + resign(1,2) = resign(0, 2)
2634	NewIntrin = Intrinsic::ptrauth_resign;
2635	} else if (AuthKey) {
2636	// resign(0,1) + auth(1) = auth(0)
2637	NewIntrin = Intrinsic::ptrauth_auth;
2638	} else if (NeedSign) {
2639	// sign(0) + resign(0, 1) = sign(1)
2640	NewIntrin = Intrinsic::ptrauth_sign;
2641	} else {
2642	// sign(0) + auth(0) = nop
2643	replaceInstUsesWith(I&: *II, V: BasePtr);
2644	eraseInstFromFunction(I&: *II);
2645	return nullptr;
2646	}
2647
2648	SmallVector<Value *, `4`> CallArgs;
2649	CallArgs.push_back(Elt: BasePtr);
2650	if (AuthKey) {
2651	CallArgs.push_back(Elt: AuthKey);
2652	CallArgs.push_back(Elt: AuthDisc);
2653	}
2654
2655	if (NeedSign) {
2656	CallArgs.push_back(Elt: II->getArgOperand(i: `3`));
2657	CallArgs.push_back(Elt: II->getArgOperand(i: `4`));
2658	}
2659
2660	Function *NewFn = Intrinsic::getDeclaration(M: II->getModule(), id: NewIntrin);
2661	return CallInst::Create(Func: NewFn, Args: CallArgs);
2662	}
2663	case Intrinsic::arm_neon_vtbl1:
2664	case Intrinsic::aarch64_neon_tbl1:
2665	if (Value V = simplifyNeonTbl1(II: II, Builder))
2666	return replaceInstUsesWith(I&: *II, V);
2667	break;
2668
2669	case Intrinsic::arm_neon_vmulls:
2670	case Intrinsic::arm_neon_vmullu:
2671	case Intrinsic::aarch64_neon_smull:
2672	case Intrinsic::aarch64_neon_umull: {
2673	Value *Arg0 = II->getArgOperand(i: `0`);
2674	Value *Arg1 = II->getArgOperand(i: `1`);
2675
2676	// Handle mul by zero first:
2677	if (isa<ConstantAggregateZero>(Val: Arg0) \|\| isa<ConstantAggregateZero>(Val: Arg1)) {
2678	return replaceInstUsesWith(I&: CI, V: ConstantAggregateZero::get(Ty: II->getType()));
2679	}
2680
2681	// Check for constant LHS & RHS - in this case we just simplify.
2682	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
2683	IID == Intrinsic::aarch64_neon_umull);
2684	VectorType *NewVT = cast<VectorType>(Val: II->getType());
2685	if (Constant *CV0 = dyn_cast<Constant>(Val: Arg0)) {
2686	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1)) {
2687	Value V0 = Builder.CreateIntCast(V: CV0, DestTy: NewVT, /isSigned=/*!Zext);
2688	Value V1 = Builder.CreateIntCast(V: CV1, DestTy: NewVT, /isSigned=/*!Zext);
2689	return replaceInstUsesWith(I&: CI, V: Builder.CreateMul(LHS: V0, RHS: V1));
2690	}
2691
2692	// Couldn't simplify - canonicalize constant to the RHS.
2693	std::swap(a&: Arg0, b&: Arg1);
2694	}
2695
2696	// Handle mul by one:
2697	if (Constant *CV1 = dyn_cast<Constant>(Val: Arg1))
2698	if (ConstantInt *Splat =
2699	dyn_cast_or_null<ConstantInt>(Val: CV1->getSplatValue()))
2700	if (Splat->isOne())
2701	return CastInst::CreateIntegerCast(S: Arg0, Ty: II->getType(),
2702	/isSigned=/!Zext);
2703
2704	break;
2705	}
2706	case Intrinsic::arm_neon_aesd:
2707	case Intrinsic::arm_neon_aese:
2708	case Intrinsic::aarch64_crypto_aesd:
2709	case Intrinsic::aarch64_crypto_aese: {
2710	Value *DataArg = II->getArgOperand(i: `0`);
2711	Value *KeyArg = II->getArgOperand(i: `1`);
2712
2713	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2714	Value Data, Key;
2715	if (match(V: KeyArg, P: m_ZeroInt()) &&
2716	match(V: DataArg, P: m_Xor(L: m_Value(V&: Data), R: m_Value(V&: Key)))) {
2717	replaceOperand(I&: *II, OpNum: `0`, V: Data);
2718	replaceOperand(I&: *II, OpNum: `1`, V: Key);
2719	return II;
2720	}
2721	break;
2722	}
2723	case Intrinsic::hexagon_V6_vandvrt:
2724	case Intrinsic::hexagon_V6_vandvrt_128B: {
2725	// Simplify Q -> V -> Q conversion.
2726	if (auto Op0 = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2727	Intrinsic::ID ID0 = Op0->getIntrinsicID();
2728	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
2729	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
2730	break;
2731	Value Bytes = Op0->getArgOperand(i: `1`), Mask = II->getArgOperand(i: `1`);
2732	uint64_t Bytes1 = computeKnownBits(V: Bytes, Depth: `0`, CxtI: Op0).One.getZExtValue();
2733	uint64_t Mask1 = computeKnownBits(V: Mask, Depth: `0`, CxtI: II).One.getZExtValue();
2734	// Check if every byte has common bits in Bytes and Mask.
2735	uint64_t C = Bytes1 & Mask1;
2736	if ((C & `0xFF`) && (C & `0xFF00`) && (C & `0xFF0000`) && (C & `0xFF000000`))
2737	return replaceInstUsesWith(I&: *II, V: Op0->getArgOperand(i: `0`));
2738	}
2739	break;
2740	}
2741	case Intrinsic::stackrestore: {
2742	enum class ClassifyResult {
2743	None,
2744	Alloca,
2745	StackRestore,
2746	CallWithSideEffects,
2747	};
2748	auto Classify = [](const Instruction *I) {
2749	if (isa<AllocaInst>(Val: I))
2750	return ClassifyResult::Alloca;
2751
2752	if (auto *CI = dyn_cast<CallInst>(Val: I)) {
2753	if (auto *II = dyn_cast<IntrinsicInst>(Val: CI)) {
2754	if (II->getIntrinsicID() == Intrinsic::stackrestore)
2755	return ClassifyResult::StackRestore;
2756
2757	if (II->mayHaveSideEffects())
2758	return ClassifyResult::CallWithSideEffects;
2759	} else {
2760	// Consider all non-intrinsic calls to be side effects
2761	return ClassifyResult::CallWithSideEffects;
2762	}
2763	}
2764
2765	return ClassifyResult::None;
2766	};
2767
2768	// If the stacksave and the stackrestore are in the same BB, and there is
2769	// no intervening call, alloca, or stackrestore of a different stacksave,
2770	// remove the restore. This can happen when variable allocas are DCE'd.
2771	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(Val: II->getArgOperand(i: `0`))) {
2772	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
2773	SS->getParent() == II->getParent()) {
2774	BasicBlock::iterator BI(SS);
2775	bool CannotRemove = false;
2776	for (++BI; &*BI != II; ++BI) {
2777	switch (Classify (&*BI)) {
2778	case ClassifyResult::None:
2779	// So far so good, look at next instructions.
2780	break;
2781
2782	case ClassifyResult::StackRestore:
2783	// If we found an intervening stackrestore for a different
2784	// stacksave, we can't remove the stackrestore. Otherwise, continue.
2785	if (cast<IntrinsicInst>(Val&: *BI).getArgOperand(i: `0`) != SS)
2786	CannotRemove = true;
2787	break;
2788
2789	case ClassifyResult::Alloca:
2790	case ClassifyResult::CallWithSideEffects:
2791	// If we found an alloca, a non-intrinsic call, or an intrinsic
2792	// call with side effects, we can't remove the stackrestore.
2793	CannotRemove = true;
2794	break;
2795	}
2796	if (CannotRemove)
2797	break;
2798	}
2799
2800	if (!CannotRemove)
2801	return eraseInstFromFunction(I&: CI);
2802	}
2803	}
2804
2805	// Scan down this block to see if there is another stack restore in the
2806	// same block without an intervening call/alloca.
2807	BasicBlock::iterator BI(II);
2808	Instruction *TI = II->getParent()->getTerminator();
2809	bool CannotRemove = false;
2810	for (++BI; &*BI != TI; ++BI) {
2811	switch (Classify (&*BI)) {
2812	case ClassifyResult::None:
2813	// So far so good, look at next instructions.
2814	break;
2815
2816	case ClassifyResult::StackRestore:
2817	// If there is a stackrestore below this one, remove this one.
2818	return eraseInstFromFunction(I&: CI);
2819
2820	case ClassifyResult::Alloca:
2821	case ClassifyResult::CallWithSideEffects:
2822	// If we found an alloca, a non-intrinsic call, or an intrinsic call
2823	// with side effects (such as llvm.stacksave and llvm.read_register),
2824	// we can't remove the stack restore.
2825	CannotRemove = true;
2826	break;
2827	}
2828	if (CannotRemove)
2829	break;
2830	}
2831
2832	// If the stack restore is in a return, resume, or unwind block and if there
2833	// are no allocas or calls between the restore and the return, nuke the
2834	// restore.
2835	if (!CannotRemove && (isa<ReturnInst>(Val: TI) \|\| isa<ResumeInst>(Val: TI)))
2836	return eraseInstFromFunction(I&: CI);
2837	break;
2838	}
2839	case Intrinsic::lifetime_end:
2840	// Asan needs to poison memory to detect invalid access which is possible
2841	// even for empty lifetime range.
2842	if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) \|\|
2843	II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) \|\|
2844	II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2845	break;
2846
2847	if (removeTriviallyEmptyRange(II, this, [](const IntrinsicInst &I) {
2848	return I.getIntrinsicID() == Intrinsic::lifetime_start;
2849	}))
2850	return nullptr;
2851	break;
2852	case Intrinsic::assume: {
2853	Value *IIOperand = II->getArgOperand(i: `0`);
2854	SmallVector<OperandBundleDef, `4`> OpBundles;
2855	II->getOperandBundlesAsDefs(Defs&: OpBundles);
2856
2857	/// This will remove the boolean Condition from the assume given as
2858	/// argument and remove the assume if it becomes useless.
2859	/// always returns nullptr for use as a return values.
2860	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
2861	assert(isa<AssumeInst>(Assume));
2862	if (isAssumeWithEmptyBundle(Assume: *cast<AssumeInst>(Val: II)))
2863	return eraseInstFromFunction(I&: CI);
2864	replaceUse(U&: II->getOperandUse(i: `0`), NewValue: ConstantInt::getTrue(Context&: II->getContext()));
2865	return nullptr;
2866	};
2867	// Remove an assume if it is followed by an identical assume.
2868	// TODO: Do we need this? Unless there are conflicting assumptions, the
2869	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
2870	Instruction *Next = II->getNextNonDebugInstruction();
2871	if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
2872	return RemoveConditionFromAssume (Next);
2873
2874	// Canonicalize assume(a && b) -> assume(a); assume(b);
2875	// Note: New assumption intrinsics created here are registered by
2876	// the InstCombineIRInserter object.
2877	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
2878	Value *AssumeIntrinsic = II->getCalledOperand();
2879	Value A, B;
2880	if (match(V: IIOperand, P: m_LogicalAnd(L: m_Value(V&: A), R: m_Value(V&: B)))) {
2881	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: A, OpBundles,
2882	Name: II->getName());
2883	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic, Args: B, Name: II->getName());
2884	return eraseInstFromFunction(I&: *II);
2885	}
2886	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
2887	if (match(V: IIOperand, P: m_Not(V: m_LogicalOr(L: m_Value(V&: A), R: m_Value(V&: B))))) {
2888	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
2889	Args: Builder.CreateNot(V: A), OpBundles, Name: II->getName());
2890	Builder.CreateCall(FTy: AssumeIntrinsicTy, Callee: AssumeIntrinsic,
2891	Args: Builder.CreateNot(V: B), Name: II->getName());
2892	return eraseInstFromFunction(I&: *II);
2893	}
2894
2895	// assume( (load addr) != null ) -> add 'nonnull' metadata to load
2896	// (if assume is valid at the load)
2897	CmpInst::Predicate Pred;
2898	Instruction *LHS;
2899	if (match(V: IIOperand, P: m_ICmp(Pred, L: m_Instruction(I&: LHS), R: m_Zero())) &&
2900	Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
2901	LHS->getType()->isPointerTy() &&
2902	isValidAssumeForContext(I: II, CxtI: LHS, DT: &DT)) {
2903	MDNode *MD = MDNode::get(Context&: II->getContext(), MDs: std::nullopt);
2904	LHS->setMetadata(KindID: LLVMContext::MD_nonnull, Node: MD);
2905	LHS->setMetadata(KindID: LLVMContext::MD_noundef, Node: MD);
2906	return RemoveConditionFromAssume (II);
2907
2908	// TODO: apply nonnull return attributes to calls and invokes
2909	// TODO: apply range metadata for range check patterns?
2910	}
2911
2912	// Separate storage assumptions apply to the underlying allocations, not any
2913	// particular pointer within them. When evaluating the hints for AA purposes
2914	// we getUnderlyingObject them; by precomputing the answers here we can
2915	// avoid having to do so repeatedly there.
2916	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
2917	OperandBundleUse OBU = II->getOperandBundleAt(Index: Idx);
2918	if (OBU.getTagName() == "separate_storage") {
2919	assert(OBU.Inputs.size() == `2`);
2920	auto MaybeSimplifyHint = [&](const Use &U) {
2921	Value *Hint = U.get();
2922	// Not having a limit is safe because InstCombine removes unreachable
2923	// code.
2924	Value UnderlyingObject = getUnderlyingObject(V: Hint, /MaxLookup/* `0`);
2925	if (Hint != UnderlyingObject)
2926	replaceUse(U&: const_cast<Use &>(U), NewValue: UnderlyingObject);
2927	};
2928	MaybeSimplifyHint (OBU.Inputs [`0`]);
2929	MaybeSimplifyHint (OBU.Inputs [`1`]);
2930	}
2931	}
2932
2933	// Convert nonnull assume like:
2934	// %A = icmp ne i32 %PTR, null*
2935	// call void @llvm.assume(i1 %A)
2936	// into
2937	// call void @llvm.assume(i1 true) [ "nonnull"(i32 %PTR) ]*
2938	if (EnableKnowledgeRetention &&
2939	match(V: IIOperand, P: m_Cmp(Pred, L: m_Value(V&: A), R: m_Zero())) &&
2940	Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
2941	if (auto *Replacement = buildAssumeFromKnowledge(
2942	{RetainedKnowledge{Attribute::NonNull, `0`, A}}, Next, &AC, &DT)) {
2943
2944	Replacement->insertBefore(Next);
2945	AC.registerAssumption(CI: Replacement);
2946	return RemoveConditionFromAssume (II);
2947	}
2948	}
2949
2950	// Convert alignment assume like:
2951	// %B = ptrtoint i32 %A to i64*
2952	// %C = and i64 %B, Constant
2953	// %D = icmp eq i64 %C, 0
2954	// call void @llvm.assume(i1 %D)
2955	// into
2956	// call void @llvm.assume(i1 true) [ "align"(i32 [[A]], i64 Constant + 1)]*
2957	uint64_t AlignMask;
2958	if (EnableKnowledgeRetention &&
2959	match(V: IIOperand,
2960	P: m_Cmp(Pred, L: m_And(L: m_Value(V&: A), R: m_ConstantInt(V&: AlignMask)),
2961	R: m_Zero())) &&
2962	Pred == CmpInst::ICMP_EQ) {
2963	if (isPowerOf2_64(Value: AlignMask + `1`)) {
2964	uint64_t Offset = `0`;
2965	match(V: A, P: m_Add(L: m_Value(V&: A), R: m_ConstantInt(V&: Offset)));
2966	if (match(V: A, P: m_PtrToInt(Op: m_Value(V&: A)))) {
2967	/// Note: this doesn't preserve the offset information but merges
2968	/// offset and alignment.
2969	/// TODO: we can generate a GEP instead of merging the alignment with
2970	/// the offset.
2971	RetainedKnowledge RK{Attribute::Alignment,
2972	(unsigned)MinAlign(Offset, AlignMask + `1`), A};
2973	if (auto *Replacement =
2974	buildAssumeFromKnowledge(Knowledge: RK, CtxI: Next, AC: &AC, DT: &DT)) {
2975
2976	Replacement->insertAfter(InsertPos: II);
2977	AC.registerAssumption(CI: Replacement);
2978	}
2979	return RemoveConditionFromAssume (II);
2980	}
2981	}
2982	}
2983
2984	/// Canonicalize Knowledge in operand bundles.
2985	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
2986	for (unsigned Idx = `0`; Idx < II->getNumOperandBundles(); Idx++) {
2987	auto &BOI = II->bundle_op_info_begin()[Idx];
2988	RetainedKnowledge RK =
2989	llvm::getKnowledgeFromBundle(Assume&: cast<AssumeInst>(Val&: *II), BOI);
2990	if (BOI.End - BOI.Begin > `2`)
2991	continue; // Prevent reducing knowledge in an align with offset since
2992	// extracting a RetainedKnowledge from them looses offset
2993	// information
2994	RetainedKnowledge CanonRK =
2995	llvm::simplifyRetainedKnowledge(Assume: cast<AssumeInst>(Val: II), RK,
2996	AC: &getAssumptionCache(),
2997	DT: &getDominatorTree());
2998	if (CanonRK == RK)
2999	continue;
3000	if (!CanonRK) {
3001	if (BOI.End - BOI.Begin > `0`) {
3002	Worklist.pushValue(V: II->op_begin()[BOI.Begin]);
3003	Value::dropDroppableUse(U&: II->op_begin()[BOI.Begin]);
3004	}
3005	continue;
3006	}
3007	assert(RK.AttrKind == CanonRK.AttrKind);
3008	if (BOI.End - BOI.Begin > `0`)
3009	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3010	if (BOI.End - BOI.Begin > `1`)
3011	II->op_begin()[BOI.Begin + `1`].set(ConstantInt::get(
3012	Ty: Type::getInt64Ty(C&: II->getContext()), V: CanonRK.ArgValue));
3013	if (RK.WasOn)
3014	Worklist.pushValue(V: RK.WasOn);
3015	return II;
3016	}
3017	}
3018
3019	// If there is a dominating assume with the same condition as this one,
3020	// then this one is redundant, and should be removed.
3021	KnownBits Known(`1`);
3022	computeKnownBits(V: IIOperand, Known, Depth: `0`, CxtI: II);
3023	if (Known.isAllOnes() && isAssumeWithEmptyBundle(Assume: cast<AssumeInst>(Val&: *II)))
3024	return eraseInstFromFunction(I&: *II);
3025
3026	// assume(false) is unreachable.
3027	if (match(V: IIOperand, P: m_CombineOr(L: m_Zero(), R: m_Undef()))) {
3028	CreateNonTerminatorUnreachable(InsertAt: II);
3029	return eraseInstFromFunction(I&: *II);
3030	}
3031
3032	// Update the cache of affected values for this assumption (we might be
3033	// here because we just simplified the condition).
3034	AC.updateAffectedValues(CI: cast<AssumeInst>(Val: II));
3035	break;
3036	}
3037	case Intrinsic::experimental_guard: {
3038	// Is this guard followed by another guard? We scan forward over a small
3039	// fixed window of instructions to handle common cases with conditions
3040	// computed between guards.
3041	Instruction *NextInst = II->getNextNonDebugInstruction();
3042	for (unsigned i = `0`; i < GuardWideningWindow; i++) {
3043	// Note: Using context-free form to avoid compile time blow up
3044	if (!isSafeToSpeculativelyExecute(I: NextInst))
3045	break;
3046	NextInst = NextInst->getNextNonDebugInstruction();
3047	}
3048	Value NextCond = nullptr*;
3049	if (match(NextInst,
3050	m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3051	Value *CurrCond = II->getArgOperand(i: `0`);
3052
3053	// Remove a guard that it is immediately preceded by an identical guard.
3054	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3055	if (CurrCond != NextCond) {
3056	Instruction *MoveI = II->getNextNonDebugInstruction();
3057	while (MoveI != NextInst) {
3058	auto *Temp = MoveI;
3059	MoveI = MoveI->getNextNonDebugInstruction();
3060	Temp->moveBefore(MovePos: II);
3061	}
3062	replaceOperand(I&: *II, OpNum: `0`, V: Builder.CreateAnd(LHS: CurrCond, RHS: NextCond));
3063	}
3064	eraseInstFromFunction(I&: *NextInst);
3065	return II;
3066	}
3067	break;
3068	}
3069	case Intrinsic::vector_insert: {
3070	Value *Vec = II->getArgOperand(i: `0`);
3071	Value *SubVec = II->getArgOperand(i: `1`);
3072	Value *Idx = II->getArgOperand(i: `2`);
3073	auto *DstTy = dyn_cast<FixedVectorType>(Val: II->getType());
3074	auto *VecTy = dyn_cast<FixedVectorType>(Val: Vec->getType());
3075	auto *SubVecTy = dyn_cast<FixedVectorType>(Val: SubVec->getType());
3076
3077	// Only canonicalize if the destination vector, Vec, and SubVec are all
3078	// fixed vectors.
3079	if (DstTy && VecTy && SubVecTy) {
3080	unsigned DstNumElts = DstTy->getNumElements();
3081	unsigned VecNumElts = VecTy->getNumElements();
3082	unsigned SubVecNumElts = SubVecTy->getNumElements();
3083	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3084
3085	// An insert that entirely overwrites Vec with SubVec is a nop.
3086	if (VecNumElts == SubVecNumElts)
3087	return replaceInstUsesWith(I&: CI, V: SubVec);
3088
3089	// Widen SubVec into a vector of the same width as Vec, since
3090	// shufflevector requires the two input vectors to be the same width.
3091	// Elements beyond the bounds of SubVec within the widened vector are
3092	// undefined.
3093	SmallVector<int, `8`> WidenMask;
3094	unsigned i;
3095	for (i = `0`; i != SubVecNumElts; ++i)
3096	WidenMask.push_back(Elt: i);
3097	for (; i != VecNumElts; ++i)
3098	WidenMask.push_back(Elt: PoisonMaskElem);
3099
3100	Value *WidenShuffle = Builder.CreateShuffleVector(V: SubVec, Mask: WidenMask);
3101
3102	SmallVector<int, `8`> Mask;
3103	for (unsigned i = `0`; i != IdxN; ++i)
3104	Mask.push_back(Elt: i);
3105	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3106	Mask.push_back(Elt: i);
3107	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3108	Mask.push_back(Elt: i);
3109
3110	Value *Shuffle = Builder.CreateShuffleVector(V1: Vec, V2: WidenShuffle, Mask);
3111	return replaceInstUsesWith(I&: CI, V: Shuffle);
3112	}
3113	break;
3114	}
3115	case Intrinsic::vector_extract: {
3116	Value *Vec = II->getArgOperand(i: `0`);
3117	Value *Idx = II->getArgOperand(i: `1`);
3118
3119	Type *ReturnType = II->getType();
3120	// (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3121	// ExtractIdx)
3122	unsigned ExtractIdx = cast<ConstantInt>(Val: Idx)->getZExtValue();
3123	Value InsertTuple, InsertIdx, *InsertValue;
3124	if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
3125	m_Value(InsertValue),
3126	m_Value(InsertIdx))) &&
3127	InsertValue->getType() == ReturnType) {
3128	unsigned Index = cast<ConstantInt>(Val: InsertIdx)->getZExtValue();
3129	// Case where we get the same index right after setting it.
3130	// extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3131	// InsertValue
3132	if (ExtractIdx == Index)
3133	return replaceInstUsesWith(I&: CI, V: InsertValue);
3134	// If we are getting a different index than what was set in the
3135	// insert.vector intrinsic. We can just set the input tuple to the one up
3136	// in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3137	// InsertIndex), ExtractIndex)
3138	// --> extract.vector(InsertTuple, ExtractIndex)
3139	else
3140	return replaceOperand(I&: CI, OpNum: `0`, V: InsertTuple);
3141	}
3142
3143	auto *DstTy = dyn_cast<VectorType>(Val: ReturnType);
3144	auto *VecTy = dyn_cast<VectorType>(Val: Vec->getType());
3145
3146	if (DstTy && VecTy) {
3147	auto DstEltCnt = DstTy->getElementCount();
3148	auto VecEltCnt = VecTy->getElementCount();
3149	unsigned IdxN = cast<ConstantInt>(Val: Idx)->getZExtValue();
3150
3151	// Extracting the entirety of Vec is a nop.
3152	if (DstEltCnt == VecTy->getElementCount()) {
3153	replaceInstUsesWith(I&: CI, V: Vec);
3154	return eraseInstFromFunction(I&: CI);
3155	}
3156
3157	// Only canonicalize to shufflevector if the destination vector and
3158	// Vec are fixed vectors.
3159	if (VecEltCnt.isScalable() \|\| DstEltCnt.isScalable())
3160	break;
3161
3162	SmallVector<int, `8`> Mask;
3163	for (unsigned i = `0`; i != DstEltCnt.getKnownMinValue(); ++i)
3164	Mask.push_back(Elt: IdxN + i);
3165
3166	Value *Shuffle = Builder.CreateShuffleVector(V: Vec, Mask);
3167	return replaceInstUsesWith(I&: CI, V: Shuffle);
3168	}
3169	break;
3170	}
3171	case Intrinsic::experimental_vector_reverse: {
3172	Value BO0, BO1, X, Y;
3173	Value *Vec = II->getArgOperand(i: `0`);
3174	if (match(V: Vec, P: m_OneUse(SubPattern: m_BinOp(L: m_Value(V&: BO0), R: m_Value(V&: BO1))))) {
3175	auto *OldBinOp = cast<BinaryOperator>(Val: Vec);
3176	if (match(V: BO0, P: m_VecReverse(Op0: m_Value(V&: X)))) {
3177	// rev(binop rev(X), rev(Y)) --> binop X, Y
3178	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))))
3179	return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags(
3180	Opc: OldBinOp->getOpcode(), V1: X, V2: Y,
3181	CopyO: OldBinOp, Name: OldBinOp->getName(),
3182	InsertBefore: II->getIterator()));
3183	// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
3184	if (isSplatValue(V: BO1))
3185	return replaceInstUsesWith(I&: CI, V: BinaryOperator::CreateWithCopiedFlags(
3186	Opc: OldBinOp->getOpcode(), V1: X, V2: BO1,
3187	CopyO: OldBinOp, Name: OldBinOp->getName(),
3188	InsertBefore: II->getIterator()));
3189	}
3190	// rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
3191	if (match(V: BO1, P: m_VecReverse(Op0: m_Value(V&: Y))) && isSplatValue(V: BO0))
3192	return replaceInstUsesWith(I&: CI,
3193	V: BinaryOperator::CreateWithCopiedFlags(
3194	Opc: OldBinOp->getOpcode(), V1: BO0, V2: Y, CopyO: OldBinOp,
3195	Name: OldBinOp->getName(), InsertBefore: II->getIterator()));
3196	}
3197	// rev(unop rev(X)) --> unop X
3198	if (match(V: Vec, P: m_OneUse(SubPattern: m_UnOp(X: m_VecReverse(Op0: m_Value(V&: X)))))) {
3199	auto *OldUnOp = cast<UnaryOperator>(Val: Vec);
3200	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
3201	Opc: OldUnOp->getOpcode(), V: X, CopyO: OldUnOp, Name: OldUnOp->getName(),
3202	InsertBefore: II->getIterator());
3203	return replaceInstUsesWith(I&: CI, V: NewUnOp);
3204	}
3205	break;
3206	}
3207	case Intrinsic::vector_reduce_or:
3208	case Intrinsic::vector_reduce_and: {
3209	// Canonicalize logical or/and reductions:
3210	// Or reduction for i1 is represented as:
3211	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3212	// %res = cmp ne iReduxWidth %val, 0
3213	// And reduction for i1 is represented as:
3214	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
3215	// %res = cmp eq iReduxWidth %val, 11111
3216	Value *Arg = II->getArgOperand(i: `0`);
3217	Value *Vect;
3218	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3219	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3220	if (FTy->getElementType() == Builder.getInt1Ty()) {
3221	Value *Res = Builder.CreateBitCast(
3222	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3223	if (IID == Intrinsic::vector_reduce_and) {
3224	Res = Builder.CreateICmpEQ(
3225	LHS: Res, RHS: ConstantInt::getAllOnesValue(Ty: Res->getType()));
3226	} else {
3227	assert(IID == Intrinsic::vector_reduce_or &&
3228	"Expected or reduction.");
3229	Res = Builder.CreateIsNotNull(Arg: Res);
3230	}
3231	if (Arg != Vect)
3232	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3233	DestTy: II->getType());
3234	return replaceInstUsesWith(I&: CI, V: Res);
3235	}
3236	}
3237	[[fallthrough]];
3238	}
3239	case Intrinsic::vector_reduce_add: {
3240	if (IID == Intrinsic::vector_reduce_add) {
3241	// Convert vector_reduce_add(ZExt(<n x i1>)) to
3242	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3243	// Convert vector_reduce_add(SExt(<n x i1>)) to
3244	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
3245	// Convert vector_reduce_add(<n x i1>) to
3246	// Trunc(ctpop(bitcast <n x i1> to in)).
3247	Value *Arg = II->getArgOperand(i: `0`);
3248	Value *Vect;
3249	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3250	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3251	if (FTy->getElementType() == Builder.getInt1Ty()) {
3252	Value *V = Builder.CreateBitCast(
3253	V: Vect, DestTy: Builder.getIntNTy(N: FTy->getNumElements()));
3254	Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
3255	if (Res->getType() != II->getType())
3256	Res = Builder.CreateZExtOrTrunc(V: Res, DestTy: II->getType());
3257	if (Arg != Vect &&
3258	cast<Instruction>(Val: Arg)->getOpcode() == Instruction::SExt)
3259	Res = Builder.CreateNeg(V: Res);
3260	return replaceInstUsesWith(I&: CI, V: Res);
3261	}
3262	}
3263	}
3264	[[fallthrough]];
3265	}
3266	case Intrinsic::vector_reduce_xor: {
3267	if (IID == Intrinsic::vector_reduce_xor) {
3268	// Exclusive disjunction reduction over the vector with
3269	// (potentially-extended) i1 element type is actually a
3270	// (potentially-extended) arithmetic `add` reduction over the original
3271	// non-extended value:
3272	// vector_reduce_xor(?ext(<n x i1>))
3273	// -->
3274	// ?ext(vector_reduce_add(<n x i1>))
3275	Value *Arg = II->getArgOperand(i: `0`);
3276	Value *Vect;
3277	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3278	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3279	if (FTy->getElementType() == Builder.getInt1Ty()) {
3280	Value *Res = Builder.CreateAddReduce(Src: Vect);
3281	if (Arg != Vect)
3282	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3283	DestTy: II->getType());
3284	return replaceInstUsesWith(I&: CI, V: Res);
3285	}
3286	}
3287	}
3288	[[fallthrough]];
3289	}
3290	case Intrinsic::vector_reduce_mul: {
3291	if (IID == Intrinsic::vector_reduce_mul) {
3292	// Multiplicative reduction over the vector with (potentially-extended)
3293	// i1 element type is actually a (potentially zero-extended)
3294	// logical `and` reduction over the original non-extended value:
3295	// vector_reduce_mul(?ext(<n x i1>))
3296	// -->
3297	// zext(vector_reduce_and(<n x i1>))
3298	Value *Arg = II->getArgOperand(i: `0`);
3299	Value *Vect;
3300	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3301	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3302	if (FTy->getElementType() == Builder.getInt1Ty()) {
3303	Value *Res = Builder.CreateAndReduce(Src: Vect);
3304	if (Res->getType() != II->getType())
3305	Res = Builder.CreateZExt(V: Res, DestTy: II->getType());
3306	return replaceInstUsesWith(I&: CI, V: Res);
3307	}
3308	}
3309	}
3310	[[fallthrough]];
3311	}
3312	case Intrinsic::vector_reduce_umin:
3313	case Intrinsic::vector_reduce_umax: {
3314	if (IID == Intrinsic::vector_reduce_umin \|\|
3315	IID == Intrinsic::vector_reduce_umax) {
3316	// UMin/UMax reduction over the vector with (potentially-extended)
3317	// i1 element type is actually a (potentially-extended)
3318	// logical `and`/`or` reduction over the original non-extended value:
3319	// vector_reduce_u{min,max}(?ext(<n x i1>))
3320	// -->
3321	// ?ext(vector_reduce_{and,or}(<n x i1>))
3322	Value *Arg = II->getArgOperand(i: `0`);
3323	Value *Vect;
3324	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3325	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3326	if (FTy->getElementType() == Builder.getInt1Ty()) {
3327	Value *Res = IID == Intrinsic::vector_reduce_umin
3328	? Builder.CreateAndReduce(Vect)
3329	: Builder.CreateOrReduce(Vect);
3330	if (Arg != Vect)
3331	Res = Builder.CreateCast(Op: cast<CastInst>(Val: Arg)->getOpcode(), V: Res,
3332	DestTy: II->getType());
3333	return replaceInstUsesWith(I&: CI, V: Res);
3334	}
3335	}
3336	}
3337	[[fallthrough]];
3338	}
3339	case Intrinsic::vector_reduce_smin:
3340	case Intrinsic::vector_reduce_smax: {
3341	if (IID == Intrinsic::vector_reduce_smin \|\|
3342	IID == Intrinsic::vector_reduce_smax) {
3343	// SMin/SMax reduction over the vector with (potentially-extended)
3344	// i1 element type is actually a (potentially-extended)
3345	// logical `and`/`or` reduction over the original non-extended value:
3346	// vector_reduce_s{min,max}(<n x i1>)
3347	// -->
3348	// vector_reduce_{or,and}(<n x i1>)
3349	// and
3350	// vector_reduce_s{min,max}(sext(<n x i1>))
3351	// -->
3352	// sext(vector_reduce_{or,and}(<n x i1>))
3353	// and
3354	// vector_reduce_s{min,max}(zext(<n x i1>))
3355	// -->
3356	// zext(vector_reduce_{and,or}(<n x i1>))
3357	Value *Arg = II->getArgOperand(i: `0`);
3358	Value *Vect;
3359	if (match(V: Arg, P: m_ZExtOrSExtOrSelf(Op: m_Value(V&: Vect)))) {
3360	if (auto *FTy = dyn_cast<FixedVectorType>(Val: Vect->getType()))
3361	if (FTy->getElementType() == Builder.getInt1Ty()) {
3362	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
3363	if (Arg != Vect)
3364	ExtOpc = cast<CastInst>(Val: Arg)->getOpcode();
3365	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
3366	(ExtOpc == Instruction::CastOps::ZExt))
3367	? Builder.CreateAndReduce(Vect)
3368	: Builder.CreateOrReduce(Vect);
3369	if (Arg != Vect)
3370	Res = Builder.CreateCast(Op: ExtOpc, V: Res, DestTy: II->getType());
3371	return replaceInstUsesWith(I&: CI, V: Res);
3372	}
3373	}
3374	}
3375	[[fallthrough]];
3376	}
3377	case Intrinsic::vector_reduce_fmax:
3378	case Intrinsic::vector_reduce_fmin:
3379	case Intrinsic::vector_reduce_fadd:
3380	case Intrinsic::vector_reduce_fmul: {
3381	bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
3382	IID != Intrinsic::vector_reduce_fmul) \|\|
3383	II->hasAllowReassoc();
3384	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
3385	IID == Intrinsic::vector_reduce_fmul)
3386	? `1`
3387	: `0`;
3388	Value *Arg = II->getArgOperand(i: ArgIdx);
3389	Value *V;
3390	ArrayRef<int> Mask;
3391	if (!isa<FixedVectorType>(Val: Arg->getType()) \|\| !CanBeReassociated \|\|
3392	!match(V: Arg, P: m_Shuffle(v1: m_Value(V), v2: m_Undef(), mask: m_Mask (Mask))) \|\|
3393	!cast<ShuffleVectorInst>(Val: Arg)->isSingleSource())
3394	break;
3395	int Sz = Mask.size();
3396	SmallBitVector UsedIndices(Sz);
3397	for (int Idx : Mask) {
3398	if (Idx == PoisonMaskElem \|\| UsedIndices.test(Idx))
3399	break;
3400	UsedIndices.set(Idx);
3401	}
3402	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
3403	// other changes.
3404	if (UsedIndices.all()) {
3405	replaceUse(U&: II->getOperandUse(i: ArgIdx), NewValue: V);
3406	return nullptr;
3407	}
3408	break;
3409	}
3410	case Intrinsic::is_fpclass: {
3411	if (Instruction I = foldIntrinsicIsFPClass(II&: II))
3412	return I;
3413	break;
3414	}
3415	case Intrinsic::threadlocal_address: {
3416	Align MinAlign = getKnownAlignment(V: II->getArgOperand(i: `0`), DL, CxtI: II, AC: &AC, DT: &DT);
3417	MaybeAlign Align = II->getRetAlign();
3418	if (MinAlign > Align.valueOrOne()) {
3419	II->addRetAttr(Attr: Attribute::getWithAlignment(Context&: II->getContext(), Alignment: MinAlign));
3420	return II;
3421	}
3422	break;
3423	}
3424	default: {
3425	// Handle target specific intrinsics
3426	std::optional<Instruction > V = targetInstCombineIntrinsic(II&: II);
3427	if (V)
3428	return *V;
3429	break;
3430	}
3431	}
3432
3433	// Try to fold intrinsic into select operands. This is legal if:
3434	// The intrinsic is speculatable.*
3435	// The select condition is not a vector, or the intrinsic does not*
3436	// perform cross-lane operations.
3437	switch (IID) {
3438	case Intrinsic::ctlz:
3439	case Intrinsic::cttz:
3440	case Intrinsic::ctpop:
3441	case Intrinsic::umin:
3442	case Intrinsic::umax:
3443	case Intrinsic::smin:
3444	case Intrinsic::smax:
3445	case Intrinsic::usub_sat:
3446	case Intrinsic::uadd_sat:
3447	case Intrinsic::ssub_sat:
3448	case Intrinsic::sadd_sat:
3449	for (Value *Op : II->args())
3450	if (auto *Sel = dyn_cast<SelectInst>(Val: Op))
3451	if (Instruction R = FoldOpIntoSelect(Op&: II, SI: Sel))
3452	return R;
3453	[[fallthrough]];
3454	default:
3455	break;
3456	}
3457
3458	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
3459	return Shuf;
3460
3461	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
3462	// context, so it is handled in visitCallBase and we should trigger it.
3463	return visitCallBase(Call&: *II);
3464	}
3465
3466	// Fence instruction simplification
3467	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
3468	auto *NFI = dyn_cast<FenceInst>(Val: FI.getNextNonDebugInstruction());
3469	// This check is solely here to handle arbitrary target-dependent syncscopes.
3470	// TODO: Can remove if does not matter in practice.
3471	if (NFI && FI.isIdenticalTo(I: NFI))
3472	return eraseInstFromFunction(I&: FI);
3473
3474	// Returns true if FI1 is identical or stronger fence than FI2.
3475	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
3476	auto FI1SyncScope = FI1->getSyncScopeID();
3477	// Consider same scope, where scope is global or single-thread.
3478	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
3479	(FI1SyncScope != SyncScope::System &&
3480	FI1SyncScope != SyncScope::SingleThread))
3481	return false;
3482
3483	return isAtLeastOrStrongerThan(AO: FI1->getOrdering(), Other: FI2->getOrdering());
3484	};
3485	if (NFI && isIdenticalOrStrongerFence (NFI, &FI))
3486	return eraseInstFromFunction(I&: FI);
3487
3488	if (auto *PFI = dyn_cast_or_null<FenceInst>(Val: FI.getPrevNonDebugInstruction()))
3489	if (isIdenticalOrStrongerFence (PFI, &FI))
3490	return eraseInstFromFunction(I&: FI);
3491	return nullptr;
3492	}
3493
3494	// InvokeInst simplification
3495	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
3496	return visitCallBase(Call&: II);
3497	}
3498
3499	// CallBrInst simplification
3500	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
3501	return visitCallBase(Call&: CBI);
3502	}
3503
3504	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
3505	if (!CI->getCalledFunction()) return nullptr;
3506
3507	// Skip optimizing notail and musttail calls so
3508	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
3509	// LibCallSimplifier::optimizeCall should try to preseve tail calls though.
3510	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
3511	return nullptr;
3512
3513	auto InstCombineRAUW = [this](Instruction From, Value With) {
3514	replaceInstUsesWith(I&: *From, V: With);
3515	};
3516	auto InstCombineErase = [this](Instruction *I) {
3517	eraseInstFromFunction(I&: *I);
3518	};
3519	LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
3520	InstCombineErase);
3521	if (Value *With = Simplifier.optimizeCall(CI, B&: Builder)) {
3522	++NumSimplified;
3523	return CI->use_empty() ? CI : replaceInstUsesWith(I&: *CI, V: With);
3524	}
3525
3526	return nullptr;
3527	}
3528
3529	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
3530	// Strip off at most one level of pointer casts, looking for an alloca. This
3531	// is good enough in practice and simpler than handling any number of casts.
3532	Value *Underlying = TrampMem->stripPointerCasts();
3533	if (Underlying != TrampMem &&
3534	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
3535	return nullptr;
3536	if (!isa<AllocaInst>(Val: Underlying))
3537	return nullptr;
3538
3539	IntrinsicInst InitTrampoline = nullptr*;
3540	for (User *U : TrampMem->users()) {
3541	IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U);
3542	if (!II)
3543	return nullptr;
3544	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3545	if (InitTrampoline)
3546	// More than one init_trampoline writes to this value. Give up.
3547	return nullptr;
3548	InitTrampoline = II;
3549	continue;
3550	}
3551	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3552	// Allow any number of calls to adjust.trampoline.
3553	continue;
3554	return nullptr;
3555	}
3556
3557	// No call to init.trampoline found.
3558	if (!InitTrampoline)
3559	return nullptr;
3560
3561	// Check that the alloca is being used in the expected way.
3562	if (InitTrampoline->getOperand(i_nocapture: `0`) != TrampMem)
3563	return nullptr;
3564
3565	return InitTrampoline;
3566	}
3567
3568	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
3569	Value *TrampMem) {
3570	// Visit all the previous instructions in the basic block, and try to find a
3571	// init.trampoline which has a direct path to the adjust.trampoline.
3572	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3573	E = AdjustTramp->getParent()->begin();
3574	I != E;) {
3575	Instruction Inst = &--I;
3576	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val&: I))
3577	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3578	II->getOperand(`0`) == TrampMem)
3579	return II;
3580	if (Inst->mayWriteToMemory())
3581	return nullptr;
3582	}
3583	return nullptr;
3584	}
3585
3586	// Given a call to llvm.adjust.trampoline, find and return the corresponding
3587	// call to llvm.init.trampoline if the call to the trampoline can be optimized
3588	// to a direct call to a function. Otherwise return NULL.
3589	static IntrinsicInst findInitTrampoline(Value Callee) {
3590	Callee = Callee->stripPointerCasts();
3591	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Val: Callee);
3592	if (!AdjustTramp \|\|
3593	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3594	return nullptr;
3595
3596	Value *TrampMem = AdjustTramp->getOperand(i_nocapture: `0`);
3597
3598	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
3599	return IT;
3600	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3601	return IT;
3602	return nullptr;
3603	}
3604
3605	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
3606	const TargetLibraryInfo *TLI) {
3607	// Note: We only handle cases which can't be driven from generic attributes
3608	// here. So, for example, nonnull and noalias (which are common properties
3609	// of some allocation functions) are expected to be handled via annotation
3610	// of the respective allocator declaration with generic attributes.
3611	bool Changed = false;
3612
3613	if (!Call.getType()->isPointerTy())
3614	return Changed;
3615
3616	std::optional<APInt> Size = getAllocSize(CB: &Call, TLI);
3617	if (Size && *Size != `0`) {
3618	// TODO: We really should just emit deref_or_null here and then
3619	// let the generic inference code combine that with nonnull.
3620	if (Call.hasRetAttr(Attribute::NonNull)) {
3621	Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
3622	Call.addRetAttr(Attr: Attribute::getWithDereferenceableBytes(
3623	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3624	} else {
3625	Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
3626	Call.addRetAttr(Attr: Attribute::getWithDereferenceableOrNullBytes(
3627	Context&: Call.getContext(), Bytes: Size ->getLimitedValue()));
3628	}
3629	}
3630
3631	// Add alignment attribute if alignment is a power of two constant.
3632	Value *Alignment = getAllocAlignment(V: &Call, TLI);
3633	if (!Alignment)
3634	return Changed;
3635
3636	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Val: Alignment);
3637	if (AlignOpC && AlignOpC->getValue().ult(RHS: llvm::Value::MaximumAlignment)) {
3638	uint64_t AlignmentVal = AlignOpC->getZExtValue();
3639	if (llvm::isPowerOf2_64(Value: AlignmentVal)) {
3640	Align ExistingAlign = Call.getRetAlign().valueOrOne();
3641	Align NewAlign = Align (AlignmentVal);
3642	if (NewAlign > ExistingAlign) {
3643	Call.addRetAttr(
3644	Attr: Attribute::getWithAlignment(Context&: Call.getContext(), Alignment: NewAlign));
3645	Changed = true;
3646	}
3647	}
3648	}
3649	return Changed;
3650	}
3651
3652	/// Improvements for call, callbr and invoke instructions.
3653	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
3654	bool Changed = annotateAnyAllocSite(Call, TLI: &TLI);
3655
3656	// Mark any parameters that are known to be non-null with the nonnull
3657	// attribute. This is helpful for inlining calls to functions with null
3658	// checks on their arguments.
3659	SmallVector<unsigned, `4`> ArgNos;
3660	unsigned ArgNo = `0`;
3661
3662	for (Value *V : Call.args()) {
3663	if (V->getType()->isPointerTy() &&
3664	!Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
3665	isKnownNonZero(V, getSimplifyQuery().getWithInstruction(&Call)))
3666	ArgNos.push_back(Elt: ArgNo);
3667	ArgNo++;
3668	}
3669
3670	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
3671
3672	if (!ArgNos.empty()) {
3673	AttributeList AS = Call.getAttributes();
3674	LLVMContext &Ctx = Call.getContext();
3675	AS = AS.addParamAttribute(Ctx, ArgNos,
3676	Attribute::get(Ctx, Attribute::NonNull));
3677	Call.setAttributes(AS);
3678	Changed = true;
3679	}
3680
3681	// If the callee is a pointer to a function, attempt to move any casts to the
3682	// arguments of the call/callbr/invoke.
3683	Value *Callee = Call.getCalledOperand();
3684	Function *CalleeF = dyn_cast<Function>(Val: Callee);
3685	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
3686	transformConstExprCastCall(Call))
3687	return nullptr;
3688
3689	if (CalleeF) {
3690	// Remove the convergent attr on calls when the callee is not convergent.
3691	if (Call.isConvergent() && !CalleeF->isConvergent() &&
3692	!CalleeF->isIntrinsic()) {
3693	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3694	<< "\n");
3695	Call.setNotConvergent();
3696	return &Call;
3697	}
3698
3699	// If the call and callee calling conventions don't match, and neither one
3700	// of the calling conventions is compatible with C calling convention
3701	// this call must be unreachable, as the call is undefined.
3702	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
3703	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
3704	TargetLibraryInfoImpl::isCallingConvCCompatible(CI: &Call)) &&
3705	!(Call.getCallingConv() == llvm::CallingConv::C &&
3706	TargetLibraryInfoImpl::isCallingConvCCompatible(Callee: CalleeF))) &&
3707	// Only do this for calls to a function with a body. A prototype may
3708	// not actually end up matching the implementation's calling conv for a
3709	// variety of reasons (e.g. it may be written in assembly).
3710	!CalleeF->isDeclaration()) {
3711	Instruction *OldCall = &Call;
3712	CreateNonTerminatorUnreachable(InsertAt: OldCall);
3713	// If OldCall does not return void then replaceInstUsesWith poison.
3714	// This allows ValueHandlers and custom metadata to adjust itself.
3715	if (!OldCall->getType()->isVoidTy())
3716	replaceInstUsesWith(I&: *OldCall, V: PoisonValue::get(T: OldCall->getType()));
3717	if (isa<CallInst>(Val: OldCall))
3718	return eraseInstFromFunction(I&: *OldCall);
3719
3720	// We cannot remove an invoke or a callbr, because it would change thexi
3721	// CFG, just change the callee to a null pointer.
3722	cast<CallBase>(Val: OldCall)->setCalledFunction(
3723	FTy: CalleeF->getFunctionType(),
3724	Fn: Constant::getNullValue(Ty: CalleeF->getType()));
3725	return nullptr;
3726	}
3727	}
3728
3729	// Calling a null function pointer is undefined if a null address isn't
3730	// dereferenceable.
3731	if ((isa<ConstantPointerNull>(Val: Callee) &&
3732	!NullPointerIsDefined(F: Call.getFunction())) \|\|
3733	isa<UndefValue>(Val: Callee)) {
3734	// If Call does not return void then replaceInstUsesWith poison.
3735	// This allows ValueHandlers and custom metadata to adjust itself.
3736	if (!Call.getType()->isVoidTy())
3737	replaceInstUsesWith(I&: Call, V: PoisonValue::get(T: Call.getType()));
3738
3739	if (Call.isTerminator()) {
3740	// Can't remove an invoke or callbr because we cannot change the CFG.
3741	return nullptr;
3742	}
3743
3744	// This instruction is not reachable, just remove it.
3745	CreateNonTerminatorUnreachable(InsertAt: &Call);
3746	return eraseInstFromFunction(I&: Call);
3747	}
3748
3749	if (IntrinsicInst *II = findInitTrampoline(Callee))
3750	return transformCallThroughTrampoline(Call, Tramp&: *II);
3751
3752	if (isa<InlineAsm>(Val: Callee) && !Call.doesNotThrow()) {
3753	InlineAsm *IA = cast<InlineAsm>(Val: Callee);
3754	if (!IA->canThrow()) {
3755	// Normal inline asm calls cannot throw - mark them
3756	// 'nounwind'.
3757	Call.setDoesNotThrow();
3758	Changed = true;
3759	}
3760	}
3761
3762	// Try to optimize the call if possible, we require DataLayout for most of
3763	// this. None of these calls are seen as possibly dead so go ahead and
3764	// delete the instruction now.
3765	if (CallInst *CI = dyn_cast<CallInst>(Val: &Call)) {
3766	Instruction *I = tryOptimizeCall(CI);
3767	// If we changed something return the result, etc. Otherwise let
3768	// the fallthrough check.
3769	if (I) return eraseInstFromFunction(I&: *I);
3770	}
3771
3772	if (!Call.use_empty() && !Call.isMustTailCall())
3773	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
3774	Type *CallTy = Call.getType();
3775	Type *RetArgTy = ReturnedArg->getType();
3776	if (RetArgTy->canLosslesslyBitCastTo(Ty: CallTy))
3777	return replaceInstUsesWith(
3778	I&: Call, V: Builder.CreateBitOrPointerCast(V: ReturnedArg, DestTy: CallTy));
3779	}
3780
3781	// Drop unnecessary kcfi operand bundles from calls that were converted
3782	// into direct calls.
3783	auto Bundle = Call.getOperandBundle(ID: LLVMContext::OB_kcfi);
3784	if (Bundle && !Call.isIndirectCall()) {
3785	DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
3786	if (CalleeF) {
3787	ConstantInt FunctionType = nullptr*;
3788	ConstantInt *ExpectedType = cast<ConstantInt>(Bundle ->Inputs[`0`]);
3789
3790	if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
3791	FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(`0`));
3792
3793	if (FunctionType &&
3794	FunctionType->getZExtValue() != ExpectedType->getZExtValue())
3795	dbgs() << Call.getModule()->getName()
3796	<< ": warning: kcfi: " << Call.getCaller()->getName()
3797	<< ": call to " << CalleeF->getName()
3798	<< " using a mismatching function pointer type\n";
3799	}
3800	});
3801
3802	return CallBase::removeOperandBundle(CB: &Call, ID: LLVMContext::OB_kcfi);
3803	}
3804
3805	if (isRemovableAlloc(V: &Call, TLI: &TLI))
3806	return visitAllocSite(FI&: Call);
3807
3808	// Handle intrinsics which can be used in both call and invoke context.
3809	switch (Call.getIntrinsicID()) {
3810	case Intrinsic::experimental_gc_statepoint: {
3811	GCStatepointInst &GCSP = *cast<GCStatepointInst>(Val: &Call);
3812	SmallPtrSet<Value *, `32`> LiveGcValues;
3813	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3814	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3815
3816	// Remove the relocation if unused.
3817	if (GCR.use_empty()) {
3818	eraseInstFromFunction(I&: GCR);
3819	continue;
3820	}
3821
3822	Value *DerivedPtr = GCR.getDerivedPtr();
3823	Value *BasePtr = GCR.getBasePtr();
3824
3825	// Undef is undef, even after relocation.
3826	if (isa<UndefValue>(Val: DerivedPtr) \|\| isa<UndefValue>(Val: BasePtr)) {
3827	replaceInstUsesWith(I&: GCR, V: UndefValue::get(T: GCR.getType()));
3828	eraseInstFromFunction(I&: GCR);
3829	continue;
3830	}
3831
3832	if (auto *PT = dyn_cast<PointerType>(Val: GCR.getType())) {
3833	// The relocation of null will be null for most any collector.
3834	// TODO: provide a hook for this in GCStrategy. There might be some
3835	// weird collector this property does not hold for.
3836	if (isa<ConstantPointerNull>(Val: DerivedPtr)) {
3837	// Use null-pointer of gc_relocate's type to replace it.
3838	replaceInstUsesWith(I&: GCR, V: ConstantPointerNull::get(T: PT));
3839	eraseInstFromFunction(I&: GCR);
3840	continue;
3841	}
3842
3843	// isKnownNonNull -> nonnull attribute
3844	if (!GCR.hasRetAttr(Attribute::NonNull) &&
3845	isKnownNonZero(DerivedPtr,
3846	getSimplifyQuery().getWithInstruction(&Call))) {
3847	GCR.addRetAttr(Attribute::NonNull);
3848	// We discovered new fact, re-check users.
3849	Worklist.pushUsersToWorkList(I&: GCR);
3850	}
3851	}
3852
3853	// If we have two copies of the same pointer in the statepoint argument
3854	// list, canonicalize to one. This may let us common gc.relocates.
3855	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
3856	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
3857	auto *OpIntTy = GCR.getOperand(i_nocapture: `2`)->getType();
3858	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy, V: GCR.getBasePtrIndex()));
3859	}
3860
3861	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3862	// Canonicalize on the type from the uses to the defs
3863
3864	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3865	LiveGcValues.insert(Ptr: BasePtr);
3866	LiveGcValues.insert(Ptr: DerivedPtr);
3867	}
3868	std::optional<OperandBundleUse> Bundle =
3869	GCSP.getOperandBundle(ID: LLVMContext::OB_gc_live);
3870	unsigned NumOfGCLives = LiveGcValues.size();
3871	if (!Bundle \|\| NumOfGCLives == Bundle ->Inputs.size())
3872	break;
3873	// We can reduce the size of gc live bundle.
3874	DenseMap<Value , unsigned*> Val2Idx;
3875	std::vector<Value *> NewLiveGc;
3876	for (Value *V : Bundle ->Inputs) {
3877	if (Val2Idx.count(Val: V))
3878	continue;
3879	if (LiveGcValues.count(Ptr: V)) {
3880	Val2Idx [V] = NewLiveGc.size();
3881	NewLiveGc.push_back(x: V);
3882	} else
3883	Val2Idx [V] = NumOfGCLives;
3884	}
3885	// Update all gc.relocates
3886	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3887	GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3888	Value *BasePtr = GCR.getBasePtr();
3889	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
3890	"Missed live gc for base pointer");
3891	auto *OpIntTy1 = GCR.getOperand(i_nocapture: `1`)->getType();
3892	GCR.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: OpIntTy1, V: Val2Idx [BasePtr]));
3893	Value *DerivedPtr = GCR.getDerivedPtr();
3894	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
3895	"Missed live gc for derived pointer");
3896	auto *OpIntTy2 = GCR.getOperand(i_nocapture: `2`)->getType();
3897	GCR.setOperand(i_nocapture: `2`, Val_nocapture: ConstantInt::get(Ty: OpIntTy2, V: Val2Idx [DerivedPtr]));
3898	}
3899	// Create new statepoint instruction.
3900	OperandBundleDef NewBundle("gc-live", NewLiveGc);
3901	return CallBase::Create(CB: &Call, Bundle: NewBundle);
3902	}
3903	default: { break; }
3904	}
3905
3906	return Changed ? &Call : nullptr;
3907	}
3908
3909	/// If the callee is a constexpr cast of a function, attempt to move the cast to
3910	/// the arguments of the call/invoke.
3911	/// CallBrInst is not supported.
3912	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
3913	auto *Callee =
3914	dyn_cast<Function>(Val: Call.getCalledOperand()->stripPointerCasts());
3915	if (!Callee)
3916	return false;
3917
3918	assert(!isa<CallBrInst>(Call) &&
3919	"CallBr's don't have a single point after a def to insert at");
3920
3921	// If this is a call to a thunk function, don't remove the cast. Thunks are
3922	// used to transparently forward all incoming parameters and outgoing return
3923	// values, so it's important to leave the cast in place.
3924	if (Callee->hasFnAttribute(Kind: "thunk"))
3925	return false;
3926
3927	// If this is a call to a naked function, the assembly might be
3928	// using an argument, or otherwise rely on the frame layout,
3929	// the function prototype will mismatch.
3930	if (Callee->hasFnAttribute(Attribute::Naked))
3931	return false;
3932
3933	// If this is a musttail call, the callee's prototype must match the caller's
3934	// prototype with the exception of pointee types. The code below doesn't
3935	// implement that, so we can't do this transform.
3936	// TODO: Do the transform if it only requires adding pointer casts.
3937	if (Call.isMustTailCall())
3938	return false;
3939
3940	Instruction *Caller = &Call;
3941	const AttributeList &CallerPAL = Call.getAttributes();
3942
3943	// Okay, this is a cast from a function to a different type. Unless doing so
3944	// would cause a type conversion of one of our arguments, change this call to
3945	// be a direct call with arguments casted to the appropriate types.
3946	FunctionType *FT = Callee->getFunctionType();
3947	Type *OldRetTy = Caller->getType();
3948	Type *NewRetTy = FT->getReturnType();
3949
3950	// Check to see if we are changing the return type...
3951	if (OldRetTy != NewRetTy) {
3952
3953	if (NewRetTy->isStructTy())
3954	return false; // TODO: Handle multiple return values.
3955
3956	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: NewRetTy, DestTy: OldRetTy, DL)) {
3957	if (Callee->isDeclaration())
3958	return false; // Cannot transform this return value.
3959
3960	if (!Caller->use_empty() &&
3961	// void -> non-void is handled specially
3962	!NewRetTy->isVoidTy())
3963	return false; // Cannot transform this return value.
3964	}
3965
3966	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
3967	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3968	if (RAttrs.overlaps(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy)))
3969	return false; // Attribute not compatible with transformed value.
3970	}
3971
3972	// If the callbase is an invoke instruction, and the return value is
3973	// used by a PHI node in a successor, we cannot change the return type of
3974	// the call because there is no place to put the cast instruction (without
3975	// breaking the critical edge). Bail out in this case.
3976	if (!Caller->use_empty()) {
3977	BasicBlock PhisNotSupportedBlock = nullptr*;
3978	if (auto *II = dyn_cast<InvokeInst>(Val: Caller))
3979	PhisNotSupportedBlock = II->getNormalDest();
3980	if (PhisNotSupportedBlock)
3981	for (User *U : Caller->users())
3982	if (PHINode *PN = dyn_cast<PHINode>(Val: U))
3983	if (PN->getParent() == PhisNotSupportedBlock)
3984	return false;
3985	}
3986	}
3987
3988	unsigned NumActualArgs = Call.arg_size();
3989	unsigned NumCommonArgs = std::min(a: FT->getNumParams(), b: NumActualArgs);
3990
3991	// Prevent us turning:
3992	// declare void @takes_i32_inalloca(i32 inalloca)*
3993	// call void bitcast (void (i32)* @takes_i32_inalloca to void (i32))(i32 0)
3994	//
3995	// into:
3996	// call void @takes_i32_inalloca(i32 null)*
3997	//
3998	// Similarly, avoid folding away bitcasts of byval calls.
3999	if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) \|\|
4000	Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
4001	return false;
4002
4003	auto AI = Call.arg_begin();
4004	for (unsigned i = `0`, e = NumCommonArgs; i != e; ++i, ++AI) {
4005	Type *ParamTy = FT->getParamType(i);
4006	Type ActTy = (AI)->getType();
4007
4008	if (!CastInst::isBitOrNoopPointerCastable(SrcTy: ActTy, DestTy: ParamTy, DL))
4009	return false; // Cannot transform this parameter value.
4010
4011	// Check if there are any incompatible attributes we cannot drop safely.
4012	if (AttrBuilder (FT->getContext(), CallerPAL.getParamAttrs(ArgNo: i))
4013	.overlaps(AM: AttributeFuncs::typeIncompatible(
4014	Ty: ParamTy, ASK: AttributeFuncs::ASK_UNSAFE_TO_DROP)))
4015	return false; // Attribute not compatible with transformed value.
4016
4017	if (Call.isInAllocaArgument(i) \|\|
4018	CallerPAL.hasParamAttr(i, Attribute::Preallocated))
4019	return false; // Cannot transform to and from inalloca/preallocated.
4020
4021	if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
4022	return false;
4023
4024	if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
4025	Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
4026	return false; // Cannot transform to or from byval.
4027	}
4028
4029	if (Callee->isDeclaration()) {
4030	// Do not delete arguments unless we have a function body.
4031	if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4032	return false;
4033
4034	// If the callee is just a declaration, don't change the varargsness of the
4035	// call. We don't want to introduce a varargs call where one doesn't
4036	// already exist.
4037	if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
4038	return false;
4039
4040	// If both the callee and the cast type are varargs, we still have to make
4041	// sure the number of fixed parameters are the same or we have the same
4042	// ABI issues as if we introduce a varargs call.
4043	if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
4044	FT->getNumParams() != Call.getFunctionType()->getNumParams())
4045	return false;
4046	}
4047
4048	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4049	!CallerPAL.isEmpty()) {
4050	// In this case we have more arguments than the new function type, but we
4051	// won't be dropping them. Check that these extra arguments have attributes
4052	// that are compatible with being a vararg call argument.
4053	unsigned SRetIdx;
4054	if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4055	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
4056	return false;
4057	}
4058
4059	// Okay, we decided that this is a safe thing to do: go ahead and start
4060	// inserting cast instructions as necessary.
4061	SmallVector<Value *, `8`> Args;
4062	SmallVector<AttributeSet, `8`> ArgAttrs;
4063	Args.reserve(N: NumActualArgs);
4064	ArgAttrs.reserve(N: NumActualArgs);
4065
4066	// Get any return attributes.
4067	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
4068
4069	// If the return value is not being used, the type may not be compatible
4070	// with the existing attributes. Wipe out any problematic attributes.
4071	RAttrs.remove(AM: AttributeFuncs::typeIncompatible(Ty: NewRetTy));
4072
4073	LLVMContext &Ctx = Call.getContext();
4074	AI = Call.arg_begin();
4075	for (unsigned i = `0`; i != NumCommonArgs; ++i, ++AI) {
4076	Type *ParamTy = FT->getParamType(i);
4077
4078	Value NewArg = AI;
4079	if ((*AI)->getType() != ParamTy)
4080	NewArg = Builder.CreateBitOrPointerCast(V: *AI, DestTy: ParamTy);
4081	Args.push_back(Elt: NewArg);
4082
4083	// Add any parameter attributes except the ones incompatible with the new
4084	// type. Note that we made sure all incompatible ones are safe to drop.
4085	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
4086	Ty: ParamTy, ASK: AttributeFuncs::ASK_SAFE_TO_DROP);
4087	ArgAttrs.push_back(
4088	Elt: CallerPAL.getParamAttrs(ArgNo: i).removeAttributes(C&: Ctx, AttrsToRemove: IncompatibleAttrs));
4089	}
4090
4091	// If the function takes more arguments than the call was taking, add them
4092	// now.
4093	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4094	Args.push_back(Elt: Constant::getNullValue(Ty: FT->getParamType(i)));
4095	ArgAttrs.push_back(Elt: AttributeSet ());
4096	}
4097
4098	// If we are removing arguments to the function, emit an obnoxious warning.
4099	if (FT->getNumParams() < NumActualArgs) {
4100	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4101	if (FT->isVarArg()) {
4102	// Add all of the arguments in their promoted form to the arg list.
4103	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4104	Type PTy = getPromotedType(Ty: (AI)->getType());
4105	Value NewArg = AI;
4106	if (PTy != (*AI)->getType()) {
4107	// Must promote to pass through va_arg area!
4108	Instruction::CastOps opcode =
4109	CastInst::getCastOpcode(Val: AI, SrcIsSigned: false, Ty: PTy, DstIsSigned: false*);
4110	NewArg = Builder.CreateCast(Op: opcode, V: *AI, DestTy: PTy);
4111	}
4112	Args.push_back(Elt: NewArg);
4113
4114	// Add any parameter attributes.
4115	ArgAttrs.push_back(Elt: CallerPAL.getParamAttrs(ArgNo: i));
4116	}
4117	}
4118	}
4119
4120	AttributeSet FnAttrs = CallerPAL.getFnAttrs();
4121
4122	if (NewRetTy->isVoidTy())
4123	Caller->setName(""); // Void type should not have a name.
4124
4125	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
4126	"missing argument attributes");
4127	AttributeList NewCallerPAL = AttributeList::get(
4128	C&: Ctx, FnAttrs, RetAttrs: AttributeSet::get(C&: Ctx, B: RAttrs), ArgAttrs);
4129
4130	SmallVector<OperandBundleDef, `1`> OpBundles;
4131	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4132
4133	CallBase *NewCall;
4134	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: Caller)) {
4135	NewCall = Builder.CreateInvoke(Callee, NormalDest: II->getNormalDest(),
4136	UnwindDest: II->getUnwindDest(), Args, OpBundles);
4137	} else {
4138	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
4139	cast<CallInst>(Val: NewCall)->setTailCallKind(
4140	cast<CallInst>(Val: Caller)->getTailCallKind());
4141	}
4142	NewCall->takeName(V: Caller);
4143	NewCall->setCallingConv(Call.getCallingConv());
4144	NewCall->setAttributes(NewCallerPAL);
4145
4146	// Preserve prof metadata if any.
4147	NewCall->copyMetadata(SrcInst: *Caller, WL: {LLVMContext::MD_prof});
4148
4149	// Insert a cast of the return type as necessary.
4150	Instruction *NC = NewCall;
4151	Value *NV = NC;
4152	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4153	if (!NV->getType()->isVoidTy()) {
4154	NV = NC = CastInst::CreateBitOrPointerCast(S: NC, Ty: OldRetTy);
4155	NC->setDebugLoc(Caller->getDebugLoc());
4156
4157	auto OptInsertPt = NewCall->getInsertionPointAfterDef();
4158	assert(OptInsertPt && "No place to insert cast");
4159	InsertNewInstBefore(New: NC, Old: *OptInsertPt);
4160	Worklist.pushUsersToWorkList(I&: *Caller);
4161	} else {
4162	NV = PoisonValue::get(T: Caller->getType());
4163	}
4164	}
4165
4166	if (!Caller->use_empty())
4167	replaceInstUsesWith(I&: *Caller, V: NV);
4168	else if (Caller->hasValueHandle()) {
4169	if (OldRetTy == NV->getType())
4170	ValueHandleBase::ValueIsRAUWd(Old: Caller, New: NV);
4171	else
4172	// We cannot call ValueIsRAUWd with a different type, and the
4173	// actual tracked value will disappear.
4174	ValueHandleBase::ValueIsDeleted(V: Caller);
4175	}
4176
4177	eraseInstFromFunction(I&: *Caller);
4178	return true;
4179	}
4180
4181	/// Turn a call to a function created by init_trampoline / adjust_trampoline
4182	/// intrinsic pair into a direct call to the underlying function.
4183	Instruction *
4184	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
4185	IntrinsicInst &Tramp) {
4186	FunctionType *FTy = Call.getFunctionType();
4187	AttributeList Attrs = Call.getAttributes();
4188
4189	// If the call already has the 'nest' attribute somewhere then give up -
4190	// otherwise 'nest' would occur twice after splicing in the chain.
4191	if (Attrs.hasAttrSomewhere(Attribute::Nest))
4192	return nullptr;
4193
4194	Function *NestF = cast<Function>(Val: Tramp.getArgOperand(i: `1`)->stripPointerCasts());
4195	FunctionType *NestFTy = NestF->getFunctionType();
4196
4197	AttributeList NestAttrs = NestF->getAttributes();
4198	if (!NestAttrs.isEmpty()) {
4199	unsigned NestArgNo = `0`;
4200	Type NestTy = nullptr*;
4201	AttributeSet NestAttr;
4202
4203	// Look for a parameter marked with the 'nest' attribute.
4204	for (FunctionType::param_iterator I = NestFTy->param_begin(),
4205	E = NestFTy->param_end();
4206	I != E; ++NestArgNo, ++I) {
4207	AttributeSet AS = NestAttrs.getParamAttrs(ArgNo: NestArgNo);
4208	if (AS.hasAttribute(Attribute::Nest)) {
4209	// Record the parameter type and any other attributes.
4210	NestTy = *I;
4211	NestAttr = AS;
4212	break;
4213	}
4214	}
4215
4216	if (NestTy) {
4217	std::vector<Value*> NewArgs;
4218	std::vector<AttributeSet> NewArgAttrs;
4219	NewArgs.reserve(n: Call.arg_size() + `1`);
4220	NewArgAttrs.reserve(n: Call.arg_size());
4221
4222	// Insert the nest argument into the call argument list, which may
4223	// mean appending it. Likewise for attributes.
4224
4225	{
4226	unsigned ArgNo = `0`;
4227	auto I = Call.arg_begin(), E = Call.arg_end();
4228	do {
4229	if (ArgNo == NestArgNo) {
4230	// Add the chain argument and attributes.
4231	Value *NestVal = Tramp.getArgOperand(i: `2`);
4232	if (NestVal->getType() != NestTy)
4233	NestVal = Builder.CreateBitCast(V: NestVal, DestTy: NestTy, Name: "nest");
4234	NewArgs.push_back(x: NestVal);
4235	NewArgAttrs.push_back(x: NestAttr);
4236	}
4237
4238	if (I == E)
4239	break;
4240
4241	// Add the original argument and attributes.
4242	NewArgs.push_back(x: *I);
4243	NewArgAttrs.push_back(x: Attrs.getParamAttrs(ArgNo));
4244
4245	++ArgNo;
4246	++I;
4247	} while (true);
4248	}
4249
4250	// The trampoline may have been bitcast to a bogus type (FTy).
4251	// Handle this by synthesizing a new function type, equal to FTy
4252	// with the chain parameter inserted.
4253
4254	std::vector<Type*> NewTypes;
4255	NewTypes.reserve(n: FTy->getNumParams()+`1`);
4256
4257	// Insert the chain's type into the list of parameter types, which may
4258	// mean appending it.
4259	{
4260	unsigned ArgNo = `0`;
4261	FunctionType::param_iterator I = FTy->param_begin(),
4262	E = FTy->param_end();
4263
4264	do {
4265	if (ArgNo == NestArgNo)
4266	// Add the chain's type.
4267	NewTypes.push_back(x: NestTy);
4268
4269	if (I == E)
4270	break;
4271
4272	// Add the original type.
4273	NewTypes.push_back(x: *I);
4274
4275	++ArgNo;
4276	++I;
4277	} while (true);
4278	}
4279
4280	// Replace the trampoline call with a direct call. Let the generic
4281	// code sort out any function type mismatches.
4282	FunctionType *NewFTy =
4283	FunctionType::get(Result: FTy->getReturnType(), Params: NewTypes, isVarArg: FTy->isVarArg());
4284	AttributeList NewPAL =
4285	AttributeList::get(C&: FTy->getContext(), FnAttrs: Attrs.getFnAttrs(),
4286	RetAttrs: Attrs.getRetAttrs(), ArgAttrs: NewArgAttrs);
4287
4288	SmallVector<OperandBundleDef, `1`> OpBundles;
4289	Call.getOperandBundlesAsDefs(Defs&: OpBundles);
4290
4291	Instruction *NewCaller;
4292	if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &Call)) {
4293	NewCaller = InvokeInst::Create(Ty: NewFTy, Func: NestF, IfNormal: II->getNormalDest(),
4294	IfException: II->getUnwindDest(), Args: NewArgs, Bundles: OpBundles);
4295	cast<InvokeInst>(Val: NewCaller)->setCallingConv(II->getCallingConv());
4296	cast<InvokeInst>(Val: NewCaller)->setAttributes(NewPAL);
4297	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Val: &Call)) {
4298	NewCaller =
4299	CallBrInst::Create(Ty: NewFTy, Func: NestF, DefaultDest: CBI->getDefaultDest(),
4300	IndirectDests: CBI->getIndirectDests(), Args: NewArgs, Bundles: OpBundles);
4301	cast<CallBrInst>(Val: NewCaller)->setCallingConv(CBI->getCallingConv());
4302	cast<CallBrInst>(Val: NewCaller)->setAttributes(NewPAL);
4303	} else {
4304	NewCaller = CallInst::Create(Ty: NewFTy, Func: NestF, Args: NewArgs, Bundles: OpBundles);
4305	cast<CallInst>(Val: NewCaller)->setTailCallKind(
4306	cast<CallInst>(Val&: Call).getTailCallKind());
4307	cast<CallInst>(Val: NewCaller)->setCallingConv(
4308	cast<CallInst>(Val&: Call).getCallingConv());
4309	cast<CallInst>(Val: NewCaller)->setAttributes(NewPAL);
4310	}
4311	NewCaller->setDebugLoc(Call.getDebugLoc());
4312
4313	return NewCaller;
4314	}
4315	}
4316
4317	// Replace the trampoline call with a direct call. Since there is no 'nest'
4318	// parameter, there is no need to adjust the argument list. Let the generic
4319	// code sort out any function type mismatches.
4320	Call.setCalledFunction(FTy, Fn: NestF);
4321	return &Call;
4322	}
4323

source code of llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp