1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/CodeGen/TargetLowering.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/VectorUtils.h"
16#include "llvm/CodeGen/CallingConvLower.h"
17#include "llvm/CodeGen/CodeGenCommonISel.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineJumpTableInfo.h"
21#include "llvm/CodeGen/MachineModuleInfoImpls.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/SelectionDAG.h"
24#include "llvm/CodeGen/TargetRegisterInfo.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/GlobalVariable.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/Support/DivisionByConstantInfo.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/KnownBits.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Target/TargetMachine.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
40TargetLowering::TargetLowering(const TargetMachine &tm)
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
47bool TargetLowering::isPositionIndependent() const {
48 return getTargetMachine().isPositionIndependent();
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
53bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54 SDValue &Chain) const {
55 const Function &F = DAG.getMachineFunction().getFunction();
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute(Kind: "disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
82bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(RegMask: CallerPreservedMask, PhysReg: Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(i: 0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Val: Value->getOperand(Num: 1))->getReg();
103 if (MRI.getLiveInPhysReg(VReg: ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
111void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SExt);
114 IsZExt = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ZExt);
115 IsInReg = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InReg);
116 IsSRet = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: StructRet);
117 IsNest = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Nest);
118 IsByVal = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: InAlloca);
121 IsReturned = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: SwiftError);
125 Alignment = Call->getParamStackAlign(ArgNo: ArgIdx);
126 IndirectType = nullptr;
127 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgNo: ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgNo: ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgNo: ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgNo: ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgNo: ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
145TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146 ArrayRef<SDValue> Ops,
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
153 TargetLowering::ArgListTy Args;
154 Args.reserve(n: Ops.size());
155
156 TargetLowering::ArgListEntry Entry;
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: NewOp.getValueType(),
162 IsSigned: CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
166 !shouldExtendTypeInLibCall(Type: CallOptions.OpsVTBeforeSoften[i])) {
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(x: Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error(reason: "Unsupported library call operation!");
174 SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
175 VT: getPointerTy(DL: DAG.getDataLayout()));
176
177 Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext());
178 TargetLowering::CallLoweringInfo CLI(DAG);
179 bool signExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned: CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
183 !shouldExtendTypeInLibCall(Type: CallOptions.RetVTBeforeSoften)) {
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
191 .setDiscardResult(!CallOptions.IsReturnValueUsed)
192 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
198bool TargetLowering::findOptimalMemOpLowering(
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, AddrSpace: DstAS, Alignment: Op.getDstAlign()))
215 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(VT: LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(VT: LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
242 if (isOperationLegalOrCustom(Op: ISD::STORE, VT: NewVT) &&
243 isSafeMemOpType(NewVT.getSimpleVT()))
244 Found = true;
245 else if (NewVT == MVT::i64 &&
246 isOperationLegalOrCustom(Op: ISD::STORE, MVT::VT: f64) &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 allowsMisalignedMemoryAccesses(
268 VT, AddrSpace: DstAS, Alignment: Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 Flags: MachineMemOperand::MONone, &Fast) &&
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(x: VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
290void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, DL: dl, OldLHS,
297 OldRHS, Chain);
298}
299
300void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
403 EVT RetVT = getCmpLibcallReturnType();
404 SDValue Ops[2] = {NewLHS, NewRHS};
405 TargetLowering::MakeLibCallOptions CallOptions;
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, Value: true);
409 auto Call = makeLibCall(DAG, LC: LC1, RetVT, Ops, CallOptions, dl, InChain: Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(Val: 0, DL: dl, VT: RetVT);
412
413 CCCode = getCmpLibcallCC(Call: LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: RetVT);
425 SDValue Tmp = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: NewLHS, RHS: NewRHS, Cond: CCCode);
426 auto Call2 = makeLibCall(DAG, LC: LC2, RetVT, Ops, CallOptions, dl, InChain: Chain);
427 CCCode = getCmpLibcallCC(Call: LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(Operation: CCCode, Type: RetVT);
430 NewLHS = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Call2.first, RHS: NewRHS, Cond: CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(Opcode: ShouldInvertCC ? ISD::AND : ISD::OR, DL: dl,
435 VT: Tmp.getValueType(), N1: Tmp, N2: NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
442unsigned TargetLowering::getJumpTableEncoding() const {
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
445 return MachineJumpTableInfo::EK_BlockAddress;
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451 // Otherwise, use a label difference.
452 return MachineJumpTableInfo::EK_LabelDifference32;
453}
454
455SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
460 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462 return DAG.getGLOBAL_OFFSET_TABLE(VT: getPointerTy(DL: DAG.getDataLayout()));
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
470TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(Symbol: MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
476SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
481 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, DL: dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
488TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(M: *GV->getParent(), GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
512bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(RHS: C))
542 return false;
543
544 if (!C.isSubsetOf(RHS: DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(Val: DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, N1: Op.getOperand(i: 0), N2: NewC);
548 return TLO.CombineTo(O: Op, N: NewOp);
549 }
550
551 break;
552 }
553 }
554
555 return false;
556}
557
558bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
559 const APInt &DemandedBits,
560 TargetLoweringOpt &TLO) const {
561 EVT VT = Op.getValueType();
562 APInt DemandedElts = VT.isVector()
563 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
564 : APInt(1, 1);
565 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566}
567
568/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570/// but it could be generalized for targets with other types of implicit
571/// widening casts.
572bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
573 const APInt &DemandedBits,
574 TargetLoweringOpt &TLO) const {
575 assert(Op.getNumOperands() == 2 &&
576 "ShrinkDemandedOp only supports binary operators!");
577 assert(Op.getNode()->getNumValues() == 1 &&
578 "ShrinkDemandedOp only supports nodes with one result!");
579
580 EVT VT = Op.getValueType();
581 SelectionDAG &DAG = TLO.DAG;
582 SDLoc dl(Op);
583
584 // Early return, as this function cannot handle vector types.
585 if (VT.isVector())
586 return false;
587
588 // Don't do this if the node has another user, which may require the
589 // full value.
590 if (!Op.getNode()->hasOneUse())
591 return false;
592
593 // Search for the smallest integer type with free casts to and from
594 // Op's type. For expedience, just check power-of-2 integer types.
595 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596 unsigned DemandedSize = DemandedBits.getActiveBits();
597 for (unsigned SmallVTBits = llvm::bit_ceil(Value: DemandedSize);
598 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(A: SmallVTBits)) {
599 EVT SmallVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: SmallVTBits);
600 if (TLI.isTruncateFree(FromVT: VT, ToVT: SmallVT) && TLI.isZExtFree(FromTy: SmallVT, ToTy: VT)) {
601 // We found a type with free casts.
602 SDValue X = DAG.getNode(
603 Opcode: Op.getOpcode(), DL: dl, VT: SmallVT,
604 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 0)),
605 N2: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: SmallVT, Operand: Op.getOperand(i: 1)));
606 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607 SDValue Z = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: X);
608 return TLO.CombineTo(O: Op, N: Z);
609 }
610 }
611 return false;
612}
613
614bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
615 DAGCombinerInfo &DCI) const {
616 SelectionDAG &DAG = DCI.DAG;
617 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618 !DCI.isBeforeLegalizeOps());
619 KnownBits Known;
620
621 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622 if (Simplified) {
623 DCI.AddToWorklist(N: Op.getNode());
624 DCI.CommitTargetLoweringOpt(TLO);
625 }
626 return Simplified;
627}
628
629bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
630 const APInt &DemandedElts,
631 DAGCombinerInfo &DCI) const {
632 SelectionDAG &DAG = DCI.DAG;
633 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634 !DCI.isBeforeLegalizeOps());
635 KnownBits Known;
636
637 bool Simplified =
638 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639 if (Simplified) {
640 DCI.AddToWorklist(N: Op.getNode());
641 DCI.CommitTargetLoweringOpt(TLO);
642 }
643 return Simplified;
644}
645
646bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
647 KnownBits &Known,
648 TargetLoweringOpt &TLO,
649 unsigned Depth,
650 bool AssumeSingleUse) const {
651 EVT VT = Op.getValueType();
652
653 // Since the number of lanes in a scalable vector is unknown at compile time,
654 // we track one bit which is implicitly broadcast to all lanes. This means
655 // that all lanes in a scalable vector are considered demanded.
656 APInt DemandedElts = VT.isFixedLengthVector()
657 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
658 : APInt(1, 1);
659 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660 AssumeSingleUse);
661}
662
663// TODO: Under what circumstances can we create nodes? Constant folding?
664SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
665 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666 SelectionDAG &DAG, unsigned Depth) const {
667 EVT VT = Op.getValueType();
668
669 // Limit search depth.
670 if (Depth >= SelectionDAG::MaxRecursionDepth)
671 return SDValue();
672
673 // Ignore UNDEFs.
674 if (Op.isUndef())
675 return SDValue();
676
677 // Not demanding any bits/elts from Op.
678 if (DemandedBits == 0 || DemandedElts == 0)
679 return DAG.getUNDEF(VT);
680
681 bool IsLE = DAG.getDataLayout().isLittleEndian();
682 unsigned NumElts = DemandedElts.getBitWidth();
683 unsigned BitWidth = DemandedBits.getBitWidth();
684 KnownBits LHSKnown, RHSKnown;
685 switch (Op.getOpcode()) {
686 case ISD::BITCAST: {
687 if (VT.isScalableVector())
688 return SDValue();
689
690 SDValue Src = peekThroughBitcasts(V: Op.getOperand(i: 0));
691 EVT SrcVT = Src.getValueType();
692 EVT DstVT = Op.getValueType();
693 if (SrcVT == DstVT)
694 return Src;
695
696 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698 if (NumSrcEltBits == NumDstEltBits)
699 if (SDValue V = SimplifyMultipleUseDemandedBits(
700 Op: Src, DemandedBits, DemandedElts, DAG, Depth: Depth + 1))
701 return DAG.getBitcast(VT: DstVT, V);
702
703 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
704 unsigned Scale = NumDstEltBits / NumSrcEltBits;
705 unsigned NumSrcElts = SrcVT.getVectorNumElements();
706 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
707 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
708 for (unsigned i = 0; i != Scale; ++i) {
709 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
710 unsigned BitOffset = EltOffset * NumSrcEltBits;
711 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
712 if (!Sub.isZero()) {
713 DemandedSrcBits |= Sub;
714 for (unsigned j = 0; j != NumElts; ++j)
715 if (DemandedElts[j])
716 DemandedSrcElts.setBit((j * Scale) + i);
717 }
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
722 return DAG.getBitcast(VT: DstVT, V);
723 }
724
725 // TODO - bigendian once we have test coverage.
726 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
727 unsigned Scale = NumSrcEltBits / NumDstEltBits;
728 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
729 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
730 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
731 for (unsigned i = 0; i != NumElts; ++i)
732 if (DemandedElts[i]) {
733 unsigned Offset = (i % Scale) * NumDstEltBits;
734 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
735 DemandedSrcElts.setBit(i / Scale);
736 }
737
738 if (SDValue V = SimplifyMultipleUseDemandedBits(
739 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG, Depth: Depth + 1))
740 return DAG.getBitcast(VT: DstVT, V);
741 }
742
743 break;
744 }
745 case ISD::AND: {
746 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
747 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
748
749 // If all of the demanded bits are known 1 on one side, return the other.
750 // These bits cannot contribute to the result of the 'and' in this
751 // context.
752 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero | RHSKnown.One))
753 return Op.getOperand(i: 0);
754 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero | LHSKnown.One))
755 return Op.getOperand(i: 1);
756 break;
757 }
758 case ISD::OR: {
759 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
761
762 // If all of the demanded bits are known zero on one side, return the
763 // other. These bits cannot contribute to the result of the 'or' in this
764 // context.
765 if (DemandedBits.isSubsetOf(RHS: LHSKnown.One | RHSKnown.Zero))
766 return Op.getOperand(i: 0);
767 if (DemandedBits.isSubsetOf(RHS: RHSKnown.One | LHSKnown.Zero))
768 return Op.getOperand(i: 1);
769 break;
770 }
771 case ISD::XOR: {
772 LHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 0), DemandedElts, Depth: Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op: Op.getOperand(i: 1), DemandedElts, Depth: Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other.
777 if (DemandedBits.isSubsetOf(RHS: RHSKnown.Zero))
778 return Op.getOperand(i: 0);
779 if (DemandedBits.isSubsetOf(RHS: LHSKnown.Zero))
780 return Op.getOperand(i: 1);
781 break;
782 }
783 case ISD::SHL: {
784 // If we are only demanding sign bits then we can use the shift source
785 // directly.
786 if (const APInt *MaxSA =
787 DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
788 SDValue Op0 = Op.getOperand(i: 0);
789 unsigned ShAmt = MaxSA->getZExtValue();
790 unsigned NumSignBits =
791 DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
792 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794 return Op0;
795 }
796 break;
797 }
798 case ISD::SETCC: {
799 SDValue Op0 = Op.getOperand(i: 0);
800 SDValue Op1 = Op.getOperand(i: 1);
801 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
802 // If (1) we only need the sign-bit, (2) the setcc operands are the same
803 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
804 // -1, we may be able to bypass the setcc.
805 if (DemandedBits.isSignMask() &&
806 Op0.getScalarValueSizeInBits() == BitWidth &&
807 getBooleanContents(Type: Op0.getValueType()) ==
808 BooleanContent::ZeroOrNegativeOneBooleanContent) {
809 // If we're testing X < 0, then this compare isn't needed - just use X!
810 // FIXME: We're limiting to integer types here, but this should also work
811 // if we don't care about FP signed-zero. The use of SETLT with FP means
812 // that we don't care about NaNs.
813 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
814 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
815 return Op0;
816 }
817 break;
818 }
819 case ISD::SIGN_EXTEND_INREG: {
820 // If none of the extended bits are demanded, eliminate the sextinreg.
821 SDValue Op0 = Op.getOperand(i: 0);
822 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
823 unsigned ExBits = ExVT.getScalarSizeInBits();
824 if (DemandedBits.getActiveBits() <= ExBits &&
825 shouldRemoveRedundantExtend(Op))
826 return Op0;
827 // If the input is already sign extended, just drop the extension.
828 unsigned NumSignBits = DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
829 if (NumSignBits >= (BitWidth - ExBits + 1))
830 return Op0;
831 break;
832 }
833 case ISD::ANY_EXTEND_VECTOR_INREG:
834 case ISD::SIGN_EXTEND_VECTOR_INREG:
835 case ISD::ZERO_EXTEND_VECTOR_INREG: {
836 if (VT.isScalableVector())
837 return SDValue();
838
839 // If we only want the lowest element and none of extended bits, then we can
840 // return the bitcasted source vector.
841 SDValue Src = Op.getOperand(i: 0);
842 EVT SrcVT = Src.getValueType();
843 EVT DstVT = Op.getValueType();
844 if (IsLE && DemandedElts == 1 &&
845 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847 return DAG.getBitcast(VT: DstVT, V: Src);
848 }
849 break;
850 }
851 case ISD::INSERT_VECTOR_ELT: {
852 if (VT.isScalableVector())
853 return SDValue();
854
855 // If we don't demand the inserted element, return the base vector.
856 SDValue Vec = Op.getOperand(i: 0);
857 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
858 EVT VecVT = Vec.getValueType();
859 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements()) &&
860 !DemandedElts[CIdx->getZExtValue()])
861 return Vec;
862 break;
863 }
864 case ISD::INSERT_SUBVECTOR: {
865 if (VT.isScalableVector())
866 return SDValue();
867
868 SDValue Vec = Op.getOperand(i: 0);
869 SDValue Sub = Op.getOperand(i: 1);
870 uint64_t Idx = Op.getConstantOperandVal(i: 2);
871 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
873 // If we don't demand the inserted subvector, return the base vector.
874 if (DemandedSubElts == 0)
875 return Vec;
876 break;
877 }
878 case ISD::VECTOR_SHUFFLE: {
879 assert(!VT.isScalableVector());
880 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
881
882 // If all the demanded elts are from one operand and are inline,
883 // then we can use the operand directly.
884 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885 for (unsigned i = 0; i != NumElts; ++i) {
886 int M = ShuffleMask[i];
887 if (M < 0 || !DemandedElts[i])
888 continue;
889 AllUndef = false;
890 IdentityLHS &= (M == (int)i);
891 IdentityRHS &= ((M - NumElts) == i);
892 }
893
894 if (AllUndef)
895 return DAG.getUNDEF(VT: Op.getValueType());
896 if (IdentityLHS)
897 return Op.getOperand(i: 0);
898 if (IdentityRHS)
899 return Op.getOperand(i: 1);
900 break;
901 }
902 default:
903 // TODO: Probably okay to remove after audit; here to reduce change size
904 // in initial enablement patch for scalable vectors
905 if (VT.isScalableVector())
906 return SDValue();
907
908 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910 Op, DemandedBits, DemandedElts, DAG, Depth))
911 return V;
912 break;
913 }
914 return SDValue();
915}
916
917SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
918 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
919 unsigned Depth) const {
920 EVT VT = Op.getValueType();
921 // Since the number of lanes in a scalable vector is unknown at compile time,
922 // we track one bit which is implicitly broadcast to all lanes. This means
923 // that all lanes in a scalable vector are considered demanded.
924 APInt DemandedElts = VT.isFixedLengthVector()
925 ? APInt::getAllOnes(numBits: VT.getVectorNumElements())
926 : APInt(1, 1);
927 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928 Depth);
929}
930
931SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
932 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933 unsigned Depth) const {
934 APInt DemandedBits = APInt::getAllOnes(numBits: Op.getScalarValueSizeInBits());
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937}
938
939// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
941static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
942 const TargetLowering &TLI,
943 const APInt &DemandedBits,
944 const APInt &DemandedElts,
945 unsigned Depth) {
946 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
947 "SRL or SRA node is required here!");
948 // Is the right shift using an immediate value of 1?
949 ConstantSDNode *N1C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
950 if (!N1C || !N1C->isOne())
951 return SDValue();
952
953 // We are looking for an avgfloor
954 // add(ext, ext)
955 // or one of these as a avgceil
956 // add(add(ext, ext), 1)
957 // add(add(ext, 1), ext)
958 // add(ext, add(ext, 1))
959 SDValue Add = Op.getOperand(i: 0);
960 if (Add.getOpcode() != ISD::ADD)
961 return SDValue();
962
963 SDValue ExtOpA = Add.getOperand(i: 0);
964 SDValue ExtOpB = Add.getOperand(i: 1);
965 SDValue Add2;
966 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967 ConstantSDNode *ConstOp;
968 if ((ConstOp = isConstOrConstSplat(N: Op2, DemandedElts)) &&
969 ConstOp->isOne()) {
970 ExtOpA = Op1;
971 ExtOpB = Op3;
972 Add2 = A;
973 return true;
974 }
975 if ((ConstOp = isConstOrConstSplat(N: Op3, DemandedElts)) &&
976 ConstOp->isOne()) {
977 ExtOpA = Op1;
978 ExtOpB = Op2;
979 Add2 = A;
980 return true;
981 }
982 return false;
983 };
984 bool IsCeil =
985 (ExtOpA.getOpcode() == ISD::ADD &&
986 MatchOperands(ExtOpA.getOperand(i: 0), ExtOpA.getOperand(i: 1), ExtOpB, ExtOpA)) ||
987 (ExtOpB.getOpcode() == ISD::ADD &&
988 MatchOperands(ExtOpB.getOperand(i: 0), ExtOpB.getOperand(i: 1), ExtOpA, ExtOpB));
989
990 // If the shift is signed (sra):
991 // - Needs >= 2 sign bit for both operands.
992 // - Needs >= 2 zero bits.
993 // If the shift is unsigned (srl):
994 // - Needs >= 1 zero bit for both operands.
995 // - Needs 1 demanded bit zero and >= 2 sign bits.
996 unsigned ShiftOpc = Op.getOpcode();
997 bool IsSigned = false;
998 unsigned KnownBits;
999 unsigned NumSignedA = DAG.ComputeNumSignBits(Op: ExtOpA, DemandedElts, Depth);
1000 unsigned NumSignedB = DAG.ComputeNumSignBits(Op: ExtOpB, DemandedElts, Depth);
1001 unsigned NumSigned = std::min(a: NumSignedA, b: NumSignedB) - 1;
1002 unsigned NumZeroA =
1003 DAG.computeKnownBits(Op: ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004 unsigned NumZeroB =
1005 DAG.computeKnownBits(Op: ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006 unsigned NumZero = std::min(a: NumZeroA, b: NumZeroB);
1007
1008 switch (ShiftOpc) {
1009 default:
1010 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011 case ISD::SRA: {
1012 if (NumZero >= 2 && NumSigned < NumZero) {
1013 IsSigned = false;
1014 KnownBits = NumZero;
1015 break;
1016 }
1017 if (NumSigned >= 1) {
1018 IsSigned = true;
1019 KnownBits = NumSigned;
1020 break;
1021 }
1022 return SDValue();
1023 }
1024 case ISD::SRL: {
1025 if (NumZero >= 1 && NumSigned < NumZero) {
1026 IsSigned = false;
1027 KnownBits = NumZero;
1028 break;
1029 }
1030 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1031 IsSigned = true;
1032 KnownBits = NumSigned;
1033 break;
1034 }
1035 return SDValue();
1036 }
1037 }
1038
1039 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042 // Find the smallest power-2 type that is legal for this vector size and
1043 // operation, given the original type size and the number of known sign/zero
1044 // bits.
1045 EVT VT = Op.getValueType();
1046 unsigned MinWidth =
1047 std::max<unsigned>(a: VT.getScalarSizeInBits() - KnownBits, b: 8);
1048 EVT NVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: llvm::bit_ceil(Value: MinWidth));
1049 if (VT.isVector())
1050 NVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: NVT, EC: VT.getVectorElementCount());
1051 if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT: NVT)) {
1052 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1053 // larger type size to do the transform.
1054 if (!TLI.isOperationLegalOrCustom(Op: AVGOpc, VT))
1055 return SDValue();
1056 if (DAG.willNotOverflowAdd(IsSigned, N0: Add.getOperand(i: 0),
1057 N1: Add.getOperand(i: 1)) &&
1058 (!Add2 || DAG.willNotOverflowAdd(IsSigned, N0: Add2.getOperand(i: 0),
1059 N1: Add2.getOperand(i: 1))))
1060 NVT = VT;
1061 else
1062 return SDValue();
1063 }
1064
1065 SDLoc DL(Op);
1066 SDValue ResultAVG =
1067 DAG.getNode(Opcode: AVGOpc, DL, VT: NVT, N1: DAG.getExtOrTrunc(IsSigned, Op: ExtOpA, DL, VT: NVT),
1068 N2: DAG.getExtOrTrunc(IsSigned, Op: ExtOpB, DL, VT: NVT));
1069 return DAG.getExtOrTrunc(IsSigned, Op: ResultAVG, DL, VT);
1070}
1071
1072/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073/// result of Op are ever used downstream. If we can use this information to
1074/// simplify Op, create a new simplified DAG node and return true, returning the
1075/// original and new nodes in Old and New. Otherwise, analyze the expression and
1076/// return a mask of Known bits for the expression (used to simplify the
1077/// caller). The Known bits may only be accurate for those bits in the
1078/// OriginalDemandedBits and OriginalDemandedElts.
1079bool TargetLowering::SimplifyDemandedBits(
1080 SDValue Op, const APInt &OriginalDemandedBits,
1081 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082 unsigned Depth, bool AssumeSingleUse) const {
1083 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085 "Mask size mismatches value type size!");
1086
1087 // Don't know anything.
1088 Known = KnownBits(BitWidth);
1089
1090 EVT VT = Op.getValueType();
1091 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094 "Unexpected vector size");
1095
1096 APInt DemandedBits = OriginalDemandedBits;
1097 APInt DemandedElts = OriginalDemandedElts;
1098 SDLoc dl(Op);
1099
1100 // Undef operand.
1101 if (Op.isUndef())
1102 return false;
1103
1104 // We can't simplify target constants.
1105 if (Op.getOpcode() == ISD::TargetConstant)
1106 return false;
1107
1108 if (Op.getOpcode() == ISD::Constant) {
1109 // We know all of the bits for a constant!
1110 Known = KnownBits::makeConstant(C: Op->getAsAPIntVal());
1111 return false;
1112 }
1113
1114 if (Op.getOpcode() == ISD::ConstantFP) {
1115 // We know all of the bits for a floating point constant!
1116 Known = KnownBits::makeConstant(
1117 C: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF().bitcastToAPInt());
1118 return false;
1119 }
1120
1121 // Other users may use these bits.
1122 bool HasMultiUse = false;
1123 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1124 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1125 // Limit search depth.
1126 return false;
1127 }
1128 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1129 DemandedBits = APInt::getAllOnes(numBits: BitWidth);
1130 DemandedElts = APInt::getAllOnes(numBits: NumElts);
1131 HasMultiUse = true;
1132 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1133 // Not demanding any bits/elts from Op.
1134 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1135 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1136 // Limit search depth.
1137 return false;
1138 }
1139
1140 KnownBits Known2;
1141 switch (Op.getOpcode()) {
1142 case ISD::SCALAR_TO_VECTOR: {
1143 if (VT.isScalableVector())
1144 return false;
1145 if (!DemandedElts[0])
1146 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
1147
1148 KnownBits SrcKnown;
1149 SDValue Src = Op.getOperand(i: 0);
1150 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1151 APInt SrcDemandedBits = DemandedBits.zext(width: SrcBitWidth);
1152 if (SimplifyDemandedBits(Op: Src, DemandedBits: SrcDemandedBits, Known&: SrcKnown, TLO, Depth: Depth + 1))
1153 return true;
1154
1155 // Upper elements are undef, so only get the knownbits if we just demand
1156 // the bottom element.
1157 if (DemandedElts == 1)
1158 Known = SrcKnown.anyextOrTrunc(BitWidth);
1159 break;
1160 }
1161 case ISD::BUILD_VECTOR:
1162 // Collect the known bits that are shared by every demanded element.
1163 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1164 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1165 return false; // Don't fall through, will infinitely loop.
1166 case ISD::SPLAT_VECTOR: {
1167 SDValue Scl = Op.getOperand(i: 0);
1168 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: Scl.getValueSizeInBits());
1169 KnownBits KnownScl;
1170 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1171 return true;
1172
1173 // Implicitly truncate the bits to match the official semantics of
1174 // SPLAT_VECTOR.
1175 Known = KnownScl.trunc(BitWidth);
1176 break;
1177 }
1178 case ISD::LOAD: {
1179 auto *LD = cast<LoadSDNode>(Val&: Op);
1180 if (getTargetConstantFromLoad(LD)) {
1181 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1182 return false; // Don't fall through, will infinitely loop.
1183 }
1184 if (ISD::isZEXTLoad(N: Op.getNode()) && Op.getResNo() == 0) {
1185 // If this is a ZEXTLoad and we are looking at the loaded value.
1186 EVT MemVT = LD->getMemoryVT();
1187 unsigned MemBits = MemVT.getScalarSizeInBits();
1188 Known.Zero.setBitsFrom(MemBits);
1189 return false; // Don't fall through, will infinitely loop.
1190 }
1191 break;
1192 }
1193 case ISD::INSERT_VECTOR_ELT: {
1194 if (VT.isScalableVector())
1195 return false;
1196 SDValue Vec = Op.getOperand(i: 0);
1197 SDValue Scl = Op.getOperand(i: 1);
1198 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
1199 EVT VecVT = Vec.getValueType();
1200
1201 // If index isn't constant, assume we need all vector elements AND the
1202 // inserted element.
1203 APInt DemandedVecElts(DemandedElts);
1204 if (CIdx && CIdx->getAPIntValue().ult(RHS: VecVT.getVectorNumElements())) {
1205 unsigned Idx = CIdx->getZExtValue();
1206 DemandedVecElts.clearBit(BitPosition: Idx);
1207
1208 // Inserted element is not required.
1209 if (!DemandedElts[Idx])
1210 return TLO.CombineTo(O: Op, N: Vec);
1211 }
1212
1213 KnownBits KnownScl;
1214 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1215 APInt DemandedSclBits = DemandedBits.zextOrTrunc(width: NumSclBits);
1216 if (SimplifyDemandedBits(Op: Scl, DemandedBits: DemandedSclBits, Known&: KnownScl, TLO, Depth: Depth + 1))
1217 return true;
1218
1219 Known = KnownScl.anyextOrTrunc(BitWidth);
1220
1221 KnownBits KnownVec;
1222 if (SimplifyDemandedBits(Op: Vec, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedVecElts, Known&: KnownVec, TLO,
1223 Depth: Depth + 1))
1224 return true;
1225
1226 if (!!DemandedVecElts)
1227 Known = Known.intersectWith(RHS: KnownVec);
1228
1229 return false;
1230 }
1231 case ISD::INSERT_SUBVECTOR: {
1232 if (VT.isScalableVector())
1233 return false;
1234 // Demand any elements from the subvector and the remainder from the src its
1235 // inserted into.
1236 SDValue Src = Op.getOperand(i: 0);
1237 SDValue Sub = Op.getOperand(i: 1);
1238 uint64_t Idx = Op.getConstantOperandVal(i: 2);
1239 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1240 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
1241 APInt DemandedSrcElts = DemandedElts;
1242 DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
1243
1244 KnownBits KnownSub, KnownSrc;
1245 if (SimplifyDemandedBits(Op: Sub, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts, Known&: KnownSub, TLO,
1246 Depth: Depth + 1))
1247 return true;
1248 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known&: KnownSrc, TLO,
1249 Depth: Depth + 1))
1250 return true;
1251
1252 Known.Zero.setAllBits();
1253 Known.One.setAllBits();
1254 if (!!DemandedSubElts)
1255 Known = Known.intersectWith(RHS: KnownSub);
1256 if (!!DemandedSrcElts)
1257 Known = Known.intersectWith(RHS: KnownSrc);
1258
1259 // Attempt to avoid multi-use src if we don't need anything from it.
1260 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1261 !DemandedSrcElts.isAllOnes()) {
1262 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1263 Op: Sub, DemandedBits, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
1264 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1265 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1266 if (NewSub || NewSrc) {
1267 NewSub = NewSub ? NewSub : Sub;
1268 NewSrc = NewSrc ? NewSrc : Src;
1269 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: NewSrc, N2: NewSub,
1270 N3: Op.getOperand(i: 2));
1271 return TLO.CombineTo(O: Op, N: NewOp);
1272 }
1273 }
1274 break;
1275 }
1276 case ISD::EXTRACT_SUBVECTOR: {
1277 if (VT.isScalableVector())
1278 return false;
1279 // Offset the demanded elts by the subvector index.
1280 SDValue Src = Op.getOperand(i: 0);
1281 if (Src.getValueType().isScalableVector())
1282 break;
1283 uint64_t Idx = Op.getConstantOperandVal(i: 1);
1284 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1285 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
1286
1287 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSrcElts, Known, TLO,
1288 Depth: Depth + 1))
1289 return true;
1290
1291 // Attempt to avoid multi-use src if we don't need anything from it.
1292 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1293 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1294 Op: Src, DemandedBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
1295 if (DemandedSrc) {
1296 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc,
1297 N2: Op.getOperand(i: 1));
1298 return TLO.CombineTo(O: Op, N: NewOp);
1299 }
1300 }
1301 break;
1302 }
1303 case ISD::CONCAT_VECTORS: {
1304 if (VT.isScalableVector())
1305 return false;
1306 Known.Zero.setAllBits();
1307 Known.One.setAllBits();
1308 EVT SubVT = Op.getOperand(i: 0).getValueType();
1309 unsigned NumSubVecs = Op.getNumOperands();
1310 unsigned NumSubElts = SubVT.getVectorNumElements();
1311 for (unsigned i = 0; i != NumSubVecs; ++i) {
1312 APInt DemandedSubElts =
1313 DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
1314 if (SimplifyDemandedBits(Op: Op.getOperand(i), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedSubElts,
1315 Known&: Known2, TLO, Depth: Depth + 1))
1316 return true;
1317 // Known bits are shared by every demanded subvector element.
1318 if (!!DemandedSubElts)
1319 Known = Known.intersectWith(RHS: Known2);
1320 }
1321 break;
1322 }
1323 case ISD::VECTOR_SHUFFLE: {
1324 assert(!VT.isScalableVector());
1325 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
1326
1327 // Collect demanded elements from shuffle operands..
1328 APInt DemandedLHS, DemandedRHS;
1329 if (!getShuffleDemandedElts(SrcWidth: NumElts, Mask: ShuffleMask, DemandedElts, DemandedLHS,
1330 DemandedRHS))
1331 break;
1332
1333 if (!!DemandedLHS || !!DemandedRHS) {
1334 SDValue Op0 = Op.getOperand(i: 0);
1335 SDValue Op1 = Op.getOperand(i: 1);
1336
1337 Known.Zero.setAllBits();
1338 Known.One.setAllBits();
1339 if (!!DemandedLHS) {
1340 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedLHS, Known&: Known2, TLO,
1341 Depth: Depth + 1))
1342 return true;
1343 Known = Known.intersectWith(RHS: Known2);
1344 }
1345 if (!!DemandedRHS) {
1346 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedRHS, Known&: Known2, TLO,
1347 Depth: Depth + 1))
1348 return true;
1349 Known = Known.intersectWith(RHS: Known2);
1350 }
1351
1352 // Attempt to avoid multi-use ops if we don't need anything from them.
1353 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1354 Op: Op0, DemandedBits, DemandedElts: DemandedLHS, DAG&: TLO.DAG, Depth: Depth + 1);
1355 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1356 Op: Op1, DemandedBits, DemandedElts: DemandedRHS, DAG&: TLO.DAG, Depth: Depth + 1);
1357 if (DemandedOp0 || DemandedOp1) {
1358 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1359 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1360 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, N1: Op0, N2: Op1, Mask: ShuffleMask);
1361 return TLO.CombineTo(O: Op, N: NewOp);
1362 }
1363 }
1364 break;
1365 }
1366 case ISD::AND: {
1367 SDValue Op0 = Op.getOperand(i: 0);
1368 SDValue Op1 = Op.getOperand(i: 1);
1369
1370 // If the RHS is a constant, check to see if the LHS would be zero without
1371 // using the bits from the RHS. Below, we use knowledge about the RHS to
1372 // simplify the LHS, here we're using information from the LHS to simplify
1373 // the RHS.
1374 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: Op1)) {
1375 // Do not increment Depth here; that can cause an infinite loop.
1376 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth);
1377 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1378 if ((LHSKnown.Zero & DemandedBits) ==
1379 (~RHSC->getAPIntValue() & DemandedBits))
1380 return TLO.CombineTo(O: Op, N: Op0);
1381
1382 // If any of the set bits in the RHS are known zero on the LHS, shrink
1383 // the constant.
1384 if (ShrinkDemandedConstant(Op, DemandedBits: ~LHSKnown.Zero & DemandedBits,
1385 DemandedElts, TLO))
1386 return true;
1387
1388 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1389 // constant, but if this 'and' is only clearing bits that were just set by
1390 // the xor, then this 'and' can be eliminated by shrinking the mask of
1391 // the xor. For example, for a 32-bit X:
1392 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1393 if (isBitwiseNot(V: Op0) && Op0.hasOneUse() &&
1394 LHSKnown.One == ~RHSC->getAPIntValue()) {
1395 SDValue Xor = TLO.DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1396 return TLO.CombineTo(O: Op, N: Xor);
1397 }
1398 }
1399
1400 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1401 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1402 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1403 (Op0.getOperand(i: 0).isUndef() ||
1404 ISD::isBuildVectorOfConstantSDNodes(N: Op0.getOperand(i: 0).getNode())) &&
1405 Op0->hasOneUse()) {
1406 unsigned NumSubElts =
1407 Op0.getOperand(i: 1).getValueType().getVectorNumElements();
1408 unsigned SubIdx = Op0.getConstantOperandVal(i: 2);
1409 APInt DemandedSub =
1410 APInt::getBitsSet(numBits: NumElts, loBit: SubIdx, hiBit: SubIdx + NumSubElts);
1411 KnownBits KnownSubMask =
1412 TLO.DAG.computeKnownBits(Op: Op1, DemandedElts: DemandedSub & DemandedElts, Depth: Depth + 1);
1413 if (DemandedBits.isSubsetOf(RHS: KnownSubMask.One)) {
1414 SDValue NewAnd =
1415 TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: Op1);
1416 SDValue NewInsert =
1417 TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL: dl, VT, N1: NewAnd,
1418 N2: Op0.getOperand(i: 1), N3: Op0.getOperand(i: 2));
1419 return TLO.CombineTo(O: Op, N: NewInsert);
1420 }
1421 }
1422
1423 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1424 Depth: Depth + 1))
1425 return true;
1426 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1427 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.Zero & DemandedBits, OriginalDemandedElts: DemandedElts,
1428 Known&: Known2, TLO, Depth: Depth + 1))
1429 return true;
1430 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1431
1432 // If all of the demanded bits are known one on one side, return the other.
1433 // These bits cannot contribute to the result of the 'and'.
1434 if (DemandedBits.isSubsetOf(RHS: Known2.Zero | Known.One))
1435 return TLO.CombineTo(O: Op, N: Op0);
1436 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.One))
1437 return TLO.CombineTo(O: Op, N: Op1);
1438 // If all of the demanded bits in the inputs are known zeros, return zero.
1439 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1440 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: dl, VT));
1441 // If the RHS is a constant, see if we can simplify it.
1442 if (ShrinkDemandedConstant(Op, DemandedBits: ~Known2.Zero & DemandedBits, DemandedElts,
1443 TLO))
1444 return true;
1445 // If the operation can be done in a smaller type, do so.
1446 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1447 return true;
1448
1449 // Attempt to avoid multi-use ops if we don't need anything from them.
1450 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1451 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1452 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1453 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1454 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1455 if (DemandedOp0 || DemandedOp1) {
1456 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1457 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1458 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1459 return TLO.CombineTo(O: Op, N: NewOp);
1460 }
1461 }
1462
1463 Known &= Known2;
1464 break;
1465 }
1466 case ISD::OR: {
1467 SDValue Op0 = Op.getOperand(i: 0);
1468 SDValue Op1 = Op.getOperand(i: 1);
1469 SDNodeFlags Flags = Op.getNode()->getFlags();
1470 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1471 Depth: Depth + 1)) {
1472 if (Flags.hasDisjoint()) {
1473 Flags.setDisjoint(false);
1474 Op->setFlags(Flags);
1475 }
1476 return true;
1477 }
1478 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1479 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: ~Known.One & DemandedBits, OriginalDemandedElts: DemandedElts,
1480 Known&: Known2, TLO, Depth: Depth + 1)) {
1481 if (Flags.hasDisjoint()) {
1482 Flags.setDisjoint(false);
1483 Op->setFlags(Flags);
1484 }
1485 return true;
1486 }
1487 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1488
1489 // If all of the demanded bits are known zero on one side, return the other.
1490 // These bits cannot contribute to the result of the 'or'.
1491 if (DemandedBits.isSubsetOf(RHS: Known2.One | Known.Zero))
1492 return TLO.CombineTo(O: Op, N: Op0);
1493 if (DemandedBits.isSubsetOf(RHS: Known.One | Known2.Zero))
1494 return TLO.CombineTo(O: Op, N: Op1);
1495 // If the RHS is a constant, see if we can simplify it.
1496 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1497 return true;
1498 // If the operation can be done in a smaller type, do so.
1499 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1500 return true;
1501
1502 // Attempt to avoid multi-use ops if we don't need anything from them.
1503 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1504 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1505 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1506 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1507 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1508 if (DemandedOp0 || DemandedOp1) {
1509 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1510 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1511 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1512 return TLO.CombineTo(O: Op, N: NewOp);
1513 }
1514 }
1515
1516 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1517 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1518 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1519 Op0->hasOneUse() && Op1->hasOneUse()) {
1520 // Attempt to match all commutations - m_c_Or would've been useful!
1521 for (int I = 0; I != 2; ++I) {
1522 SDValue X = Op.getOperand(i: I).getOperand(i: 0);
1523 SDValue C1 = Op.getOperand(i: I).getOperand(i: 1);
1524 SDValue Alt = Op.getOperand(i: 1 - I).getOperand(i: 0);
1525 SDValue C2 = Op.getOperand(i: 1 - I).getOperand(i: 1);
1526 if (Alt.getOpcode() == ISD::OR) {
1527 for (int J = 0; J != 2; ++J) {
1528 if (X == Alt.getOperand(i: J)) {
1529 SDValue Y = Alt.getOperand(i: 1 - J);
1530 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(Opcode: ISD::OR, DL: dl, VT,
1531 Ops: {C1, C2})) {
1532 SDValue MaskX = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: X, N2: C12);
1533 SDValue MaskY = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Y, N2: C2);
1534 return TLO.CombineTo(
1535 O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: MaskX, N2: MaskY));
1536 }
1537 }
1538 }
1539 }
1540 }
1541 }
1542
1543 Known |= Known2;
1544 break;
1545 }
1546 case ISD::XOR: {
1547 SDValue Op0 = Op.getOperand(i: 0);
1548 SDValue Op1 = Op.getOperand(i: 1);
1549
1550 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
1551 Depth: Depth + 1))
1552 return true;
1553 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1554 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
1555 Depth: Depth + 1))
1556 return true;
1557 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1558
1559 // If all of the demanded bits are known zero on one side, return the other.
1560 // These bits cannot contribute to the result of the 'xor'.
1561 if (DemandedBits.isSubsetOf(RHS: Known.Zero))
1562 return TLO.CombineTo(O: Op, N: Op0);
1563 if (DemandedBits.isSubsetOf(RHS: Known2.Zero))
1564 return TLO.CombineTo(O: Op, N: Op1);
1565 // If the operation can be done in a smaller type, do so.
1566 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1567 return true;
1568
1569 // If all of the unknown bits are known to be zero on one side or the other
1570 // turn this into an *inclusive* or.
1571 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1572 if (DemandedBits.isSubsetOf(RHS: Known.Zero | Known2.Zero))
1573 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op0, N2: Op1));
1574
1575 ConstantSDNode *C = isConstOrConstSplat(N: Op1, DemandedElts);
1576 if (C) {
1577 // If one side is a constant, and all of the set bits in the constant are
1578 // also known set on the other side, turn this into an AND, as we know
1579 // the bits will be cleared.
1580 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1581 // NB: it is okay if more bits are known than are requested
1582 if (C->getAPIntValue() == Known2.One) {
1583 SDValue ANDC =
1584 TLO.DAG.getConstant(Val: ~C->getAPIntValue() & DemandedBits, DL: dl, VT);
1585 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op0, N2: ANDC));
1586 }
1587
1588 // If the RHS is a constant, see if we can change it. Don't alter a -1
1589 // constant because that's a 'not' op, and that is better for combining
1590 // and codegen.
1591 if (!C->isAllOnes() && DemandedBits.isSubsetOf(RHS: C->getAPIntValue())) {
1592 // We're flipping all demanded bits. Flip the undemanded bits too.
1593 SDValue New = TLO.DAG.getNOT(DL: dl, Val: Op0, VT);
1594 return TLO.CombineTo(O: Op, N: New);
1595 }
1596
1597 unsigned Op0Opcode = Op0.getOpcode();
1598 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1599 if (ConstantSDNode *ShiftC =
1600 isConstOrConstSplat(N: Op0.getOperand(i: 1), DemandedElts)) {
1601 // Don't crash on an oversized shift. We can not guarantee that a
1602 // bogus shift has been simplified to undef.
1603 if (ShiftC->getAPIntValue().ult(RHS: BitWidth)) {
1604 uint64_t ShiftAmt = ShiftC->getZExtValue();
1605 APInt Ones = APInt::getAllOnes(numBits: BitWidth);
1606 Ones = Op0Opcode == ISD::SHL ? Ones.shl(shiftAmt: ShiftAmt)
1607 : Ones.lshr(shiftAmt: ShiftAmt);
1608 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1609 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1610 TLI.isDesirableToCommuteXorWithShift(N: Op.getNode())) {
1611 // If the xor constant is a demanded mask, do a 'not' before the
1612 // shift:
1613 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1614 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1615 SDValue Not = TLO.DAG.getNOT(DL: dl, Val: Op0.getOperand(i: 0), VT);
1616 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op0Opcode, DL: dl, VT, N1: Not,
1617 N2: Op0.getOperand(i: 1)));
1618 }
1619 }
1620 }
1621 }
1622 }
1623
1624 // If we can't turn this into a 'not', try to shrink the constant.
1625 if (!C || !C->isAllOnes())
1626 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1627 return true;
1628
1629 // Attempt to avoid multi-use ops if we don't need anything from them.
1630 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1631 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1632 Op: Op0, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1633 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1634 Op: Op1, DemandedBits, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1635 if (DemandedOp0 || DemandedOp1) {
1636 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1637 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1638 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1);
1639 return TLO.CombineTo(O: Op, N: NewOp);
1640 }
1641 }
1642
1643 Known ^= Known2;
1644 break;
1645 }
1646 case ISD::SELECT:
1647 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1648 Known, TLO, Depth: Depth + 1))
1649 return true;
1650 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1651 Known&: Known2, TLO, Depth: Depth + 1))
1652 return true;
1653 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1654 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1655
1656 // If the operands are constants, see if we can simplify them.
1657 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1658 return true;
1659
1660 // Only known if known in both the LHS and RHS.
1661 Known = Known.intersectWith(RHS: Known2);
1662 break;
1663 case ISD::VSELECT:
1664 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1665 Known, TLO, Depth: Depth + 1))
1666 return true;
1667 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1668 Known&: Known2, TLO, Depth: Depth + 1))
1669 return true;
1670 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1671 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1672
1673 // Only known if known in both the LHS and RHS.
1674 Known = Known.intersectWith(RHS: Known2);
1675 break;
1676 case ISD::SELECT_CC:
1677 if (SimplifyDemandedBits(Op: Op.getOperand(i: 3), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1678 Known, TLO, Depth: Depth + 1))
1679 return true;
1680 if (SimplifyDemandedBits(Op: Op.getOperand(i: 2), OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
1681 Known&: Known2, TLO, Depth: Depth + 1))
1682 return true;
1683 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1684 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1685
1686 // If the operands are constants, see if we can simplify them.
1687 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1688 return true;
1689
1690 // Only known if known in both the LHS and RHS.
1691 Known = Known.intersectWith(RHS: Known2);
1692 break;
1693 case ISD::SETCC: {
1694 SDValue Op0 = Op.getOperand(i: 0);
1695 SDValue Op1 = Op.getOperand(i: 1);
1696 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
1697 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1698 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1699 // -1, we may be able to bypass the setcc.
1700 if (DemandedBits.isSignMask() &&
1701 Op0.getScalarValueSizeInBits() == BitWidth &&
1702 getBooleanContents(Type: Op0.getValueType()) ==
1703 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1704 // If we're testing X < 0, then this compare isn't needed - just use X!
1705 // FIXME: We're limiting to integer types here, but this should also work
1706 // if we don't care about FP signed-zero. The use of SETLT with FP means
1707 // that we don't care about NaNs.
1708 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1709 (isNullConstant(V: Op1) || ISD::isBuildVectorAllZeros(N: Op1.getNode())))
1710 return TLO.CombineTo(O: Op, N: Op0);
1711
1712 // TODO: Should we check for other forms of sign-bit comparisons?
1713 // Examples: X <= -1, X >= 0
1714 }
1715 if (getBooleanContents(Type: Op0.getValueType()) ==
1716 TargetLowering::ZeroOrOneBooleanContent &&
1717 BitWidth > 1)
1718 Known.Zero.setBitsFrom(1);
1719 break;
1720 }
1721 case ISD::SHL: {
1722 SDValue Op0 = Op.getOperand(i: 0);
1723 SDValue Op1 = Op.getOperand(i: 1);
1724 EVT ShiftVT = Op1.getValueType();
1725
1726 if (const APInt *SA =
1727 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1728 unsigned ShAmt = SA->getZExtValue();
1729 if (ShAmt == 0)
1730 return TLO.CombineTo(O: Op, N: Op0);
1731
1732 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1733 // single shift. We can do this if the bottom bits (which are shifted
1734 // out) are never demanded.
1735 // TODO - support non-uniform vector amounts.
1736 if (Op0.getOpcode() == ISD::SRL) {
1737 if (!DemandedBits.intersects(RHS: APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ShAmt))) {
1738 if (const APInt *SA2 =
1739 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1740 unsigned C1 = SA2->getZExtValue();
1741 unsigned Opc = ISD::SHL;
1742 int Diff = ShAmt - C1;
1743 if (Diff < 0) {
1744 Diff = -Diff;
1745 Opc = ISD::SRL;
1746 }
1747 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1748 return TLO.CombineTo(
1749 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1750 }
1751 }
1752 }
1753
1754 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1755 // are not demanded. This will likely allow the anyext to be folded away.
1756 // TODO - support non-uniform vector amounts.
1757 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1758 SDValue InnerOp = Op0.getOperand(i: 0);
1759 EVT InnerVT = InnerOp.getValueType();
1760 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1761 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1762 isTypeDesirableForOp(ISD::SHL, VT: InnerVT)) {
1763 SDValue NarrowShl = TLO.DAG.getNode(
1764 Opcode: ISD::SHL, DL: dl, VT: InnerVT, N1: InnerOp,
1765 N2: TLO.DAG.getShiftAmountConstant(Val: ShAmt, VT: InnerVT, DL: dl));
1766 return TLO.CombineTo(
1767 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: NarrowShl));
1768 }
1769
1770 // Repeat the SHL optimization above in cases where an extension
1771 // intervenes: (shl (anyext (shr x, c1)), c2) to
1772 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1773 // aren't demanded (as above) and that the shifted upper c1 bits of
1774 // x aren't demanded.
1775 // TODO - support non-uniform vector amounts.
1776 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1777 InnerOp.hasOneUse()) {
1778 if (const APInt *SA2 =
1779 TLO.DAG.getValidShiftAmountConstant(V: InnerOp, DemandedElts)) {
1780 unsigned InnerShAmt = SA2->getZExtValue();
1781 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1782 DemandedBits.getActiveBits() <=
1783 (InnerBits - InnerShAmt + ShAmt) &&
1784 DemandedBits.countr_zero() >= ShAmt) {
1785 SDValue NewSA =
1786 TLO.DAG.getConstant(Val: ShAmt - InnerShAmt, DL: dl, VT: ShiftVT);
1787 SDValue NewExt = TLO.DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT,
1788 Operand: InnerOp.getOperand(i: 0));
1789 return TLO.CombineTo(
1790 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: NewExt, N2: NewSA));
1791 }
1792 }
1793 }
1794 }
1795
1796 APInt InDemandedMask = DemandedBits.lshr(shiftAmt: ShAmt);
1797 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1798 Depth: Depth + 1)) {
1799 SDNodeFlags Flags = Op.getNode()->getFlags();
1800 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1801 // Disable the nsw and nuw flags. We can no longer guarantee that we
1802 // won't wrap after simplification.
1803 Flags.setNoSignedWrap(false);
1804 Flags.setNoUnsignedWrap(false);
1805 Op->setFlags(Flags);
1806 }
1807 return true;
1808 }
1809 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1810 Known.Zero <<= ShAmt;
1811 Known.One <<= ShAmt;
1812 // low bits known zero.
1813 Known.Zero.setLowBits(ShAmt);
1814
1815 // Attempt to avoid multi-use ops if we don't need anything from them.
1816 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1817 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1818 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1819 if (DemandedOp0) {
1820 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1821 return TLO.CombineTo(O: Op, N: NewOp);
1822 }
1823 }
1824
1825 // Try shrinking the operation as long as the shift amount will still be
1826 // in range.
1827 if ((ShAmt < DemandedBits.getActiveBits()) &&
1828 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1829 return true;
1830
1831 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1832 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1833 // Only do this if we demand the upper half so the knownbits are correct.
1834 unsigned HalfWidth = BitWidth / 2;
1835 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1836 DemandedBits.countLeadingOnes() >= HalfWidth) {
1837 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: HalfWidth);
1838 if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1839 isTypeDesirableForOp(ISD::SHL, VT: HalfVT) &&
1840 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1841 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT: HalfVT))) {
1842 // If we're demanding the upper bits at all, we must ensure
1843 // that the upper bits of the shift result are known to be zero,
1844 // which is equivalent to the narrow shift being NUW.
1845 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1846 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1847 SDNodeFlags Flags;
1848 Flags.setNoSignedWrap(IsNSW);
1849 Flags.setNoUnsignedWrap(IsNUW);
1850 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1851 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1852 Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1853 SDValue NewShift = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HalfVT, N1: NewOp,
1854 N2: NewShiftAmt, Flags);
1855 SDValue NewExt =
1856 TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift);
1857 return TLO.CombineTo(O: Op, N: NewExt);
1858 }
1859 }
1860 }
1861 } else {
1862 // This is a variable shift, so we can't shift the demand mask by a known
1863 // amount. But if we are not demanding high bits, then we are not
1864 // demanding those bits from the pre-shifted operand either.
1865 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1866 APInt DemandedFromOp(APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - CTLZ));
1867 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: DemandedFromOp, OriginalDemandedElts: DemandedElts, Known, TLO,
1868 Depth: Depth + 1)) {
1869 SDNodeFlags Flags = Op.getNode()->getFlags();
1870 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1871 // Disable the nsw and nuw flags. We can no longer guarantee that we
1872 // won't wrap after simplification.
1873 Flags.setNoSignedWrap(false);
1874 Flags.setNoUnsignedWrap(false);
1875 Op->setFlags(Flags);
1876 }
1877 return true;
1878 }
1879 Known.resetAll();
1880 }
1881 }
1882
1883 // If we are only demanding sign bits then we can use the shift source
1884 // directly.
1885 if (const APInt *MaxSA =
1886 TLO.DAG.getValidMaximumShiftAmountConstant(V: Op, DemandedElts)) {
1887 unsigned ShAmt = MaxSA->getZExtValue();
1888 unsigned NumSignBits =
1889 TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1);
1890 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1891 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1892 return TLO.CombineTo(O: Op, N: Op0);
1893 }
1894 break;
1895 }
1896 case ISD::SRL: {
1897 SDValue Op0 = Op.getOperand(i: 0);
1898 SDValue Op1 = Op.getOperand(i: 1);
1899 EVT ShiftVT = Op1.getValueType();
1900
1901 // Try to match AVG patterns.
1902 if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
1903 DemandedElts, Depth: Depth + 1))
1904 return TLO.CombineTo(O: Op, N: AVG);
1905
1906 if (const APInt *SA =
1907 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
1908 unsigned ShAmt = SA->getZExtValue();
1909 if (ShAmt == 0)
1910 return TLO.CombineTo(O: Op, N: Op0);
1911
1912 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1913 // single shift. We can do this if the top bits (which are shifted out)
1914 // are never demanded.
1915 // TODO - support non-uniform vector amounts.
1916 if (Op0.getOpcode() == ISD::SHL) {
1917 if (!DemandedBits.intersects(RHS: APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: ShAmt))) {
1918 if (const APInt *SA2 =
1919 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
1920 unsigned C1 = SA2->getZExtValue();
1921 unsigned Opc = ISD::SRL;
1922 int Diff = ShAmt - C1;
1923 if (Diff < 0) {
1924 Diff = -Diff;
1925 Opc = ISD::SHL;
1926 }
1927 SDValue NewSA = TLO.DAG.getConstant(Val: Diff, DL: dl, VT: ShiftVT);
1928 return TLO.CombineTo(
1929 O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, N1: Op0.getOperand(i: 0), N2: NewSA));
1930 }
1931 }
1932 }
1933
1934 APInt InDemandedMask = (DemandedBits << ShAmt);
1935
1936 // If the shift is exact, then it does demand the low bits (and knows that
1937 // they are zero).
1938 if (Op->getFlags().hasExact())
1939 InDemandedMask.setLowBits(ShAmt);
1940
1941 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1942 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1943 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1944 APInt HiBits = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth / 2);
1945 EVT HalfVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: BitWidth / 2);
1946 if (isNarrowingProfitable(SrcVT: VT, DestVT: HalfVT) &&
1947 isTypeDesirableForOp(ISD::SRL, VT: HalfVT) &&
1948 isTruncateFree(FromVT: VT, ToVT: HalfVT) && isZExtFree(FromTy: HalfVT, ToTy: VT) &&
1949 (!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT: HalfVT)) &&
1950 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1951 TLO.DAG.MaskedValueIsZero(Op: Op0, Mask: HiBits))) {
1952 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HalfVT, Operand: Op0);
1953 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1954 Val: ShAmt, VT: HalfVT, DL: dl, LegalTypes: TLO.LegalTypes());
1955 SDValue NewShift =
1956 TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HalfVT, N1: NewOp, N2: NewShiftAmt);
1957 return TLO.CombineTo(
1958 O: Op, N: TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: NewShift));
1959 }
1960 }
1961
1962 // Compute the new bits that are at the top now.
1963 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
1964 Depth: Depth + 1))
1965 return true;
1966 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1967 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
1968 Known.One.lshrInPlace(ShiftAmt: ShAmt);
1969 // High bits known zero.
1970 Known.Zero.setHighBits(ShAmt);
1971
1972 // Attempt to avoid multi-use ops if we don't need anything from them.
1973 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1974 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1975 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
1976 if (DemandedOp0) {
1977 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: DemandedOp0, N2: Op1);
1978 return TLO.CombineTo(O: Op, N: NewOp);
1979 }
1980 }
1981 } else {
1982 // Use generic knownbits computation as it has support for non-uniform
1983 // shift amounts.
1984 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1985 }
1986 break;
1987 }
1988 case ISD::SRA: {
1989 SDValue Op0 = Op.getOperand(i: 0);
1990 SDValue Op1 = Op.getOperand(i: 1);
1991 EVT ShiftVT = Op1.getValueType();
1992
1993 // If we only want bits that already match the signbit then we don't need
1994 // to shift.
1995 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1996 if (TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1) >=
1997 NumHiDemandedBits)
1998 return TLO.CombineTo(O: Op, N: Op0);
1999
2000 // If this is an arithmetic shift right and only the low-bit is set, we can
2001 // always convert this into a logical shr, even if the shift amount is
2002 // variable. The low bit of the shift cannot be an input sign bit unless
2003 // the shift amount is >= the size of the datatype, which is undefined.
2004 if (DemandedBits.isOne())
2005 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2006
2007 // Try to match AVG patterns.
2008 if (SDValue AVG = combineShiftToAVG(Op, DAG&: TLO.DAG, TLI: *this, DemandedBits,
2009 DemandedElts, Depth: Depth + 1))
2010 return TLO.CombineTo(O: Op, N: AVG);
2011
2012 if (const APInt *SA =
2013 TLO.DAG.getValidShiftAmountConstant(V: Op, DemandedElts)) {
2014 unsigned ShAmt = SA->getZExtValue();
2015 if (ShAmt == 0)
2016 return TLO.CombineTo(O: Op, N: Op0);
2017
2018 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2019 // supports sext_inreg.
2020 if (Op0.getOpcode() == ISD::SHL) {
2021 if (const APInt *InnerSA =
2022 TLO.DAG.getValidShiftAmountConstant(V: Op0, DemandedElts)) {
2023 unsigned LowBits = BitWidth - ShAmt;
2024 EVT ExtVT = EVT::getIntegerVT(Context&: *TLO.DAG.getContext(), BitWidth: LowBits);
2025 if (VT.isVector())
2026 ExtVT = EVT::getVectorVT(Context&: *TLO.DAG.getContext(), VT: ExtVT,
2027 EC: VT.getVectorElementCount());
2028
2029 if (*InnerSA == ShAmt) {
2030 if (!TLO.LegalOperations() ||
2031 getOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: ExtVT) == Legal)
2032 return TLO.CombineTo(
2033 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT,
2034 N1: Op0.getOperand(i: 0),
2035 N2: TLO.DAG.getValueType(ExtVT)));
2036
2037 // Even if we can't convert to sext_inreg, we might be able to
2038 // remove this shift pair if the input is already sign extended.
2039 unsigned NumSignBits =
2040 TLO.DAG.ComputeNumSignBits(Op: Op0.getOperand(i: 0), DemandedElts);
2041 if (NumSignBits > ShAmt)
2042 return TLO.CombineTo(O: Op, N: Op0.getOperand(i: 0));
2043 }
2044 }
2045 }
2046
2047 APInt InDemandedMask = (DemandedBits << ShAmt);
2048
2049 // If the shift is exact, then it does demand the low bits (and knows that
2050 // they are zero).
2051 if (Op->getFlags().hasExact())
2052 InDemandedMask.setLowBits(ShAmt);
2053
2054 // If any of the demanded bits are produced by the sign extension, we also
2055 // demand the input sign bit.
2056 if (DemandedBits.countl_zero() < ShAmt)
2057 InDemandedMask.setSignBit();
2058
2059 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InDemandedMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2060 Depth: Depth + 1))
2061 return true;
2062 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2063 Known.Zero.lshrInPlace(ShiftAmt: ShAmt);
2064 Known.One.lshrInPlace(ShiftAmt: ShAmt);
2065
2066 // If the input sign bit is known to be zero, or if none of the top bits
2067 // are demanded, turn this into an unsigned shift right.
2068 if (Known.Zero[BitWidth - ShAmt - 1] ||
2069 DemandedBits.countl_zero() >= ShAmt) {
2070 SDNodeFlags Flags;
2071 Flags.setExact(Op->getFlags().hasExact());
2072 return TLO.CombineTo(
2073 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1, Flags));
2074 }
2075
2076 int Log2 = DemandedBits.exactLogBase2();
2077 if (Log2 >= 0) {
2078 // The bit must come from the sign.
2079 SDValue NewSA = TLO.DAG.getConstant(Val: BitWidth - 1 - Log2, DL: dl, VT: ShiftVT);
2080 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: NewSA));
2081 }
2082
2083 if (Known.One[BitWidth - ShAmt - 1])
2084 // New bits are known one.
2085 Known.One.setHighBits(ShAmt);
2086
2087 // Attempt to avoid multi-use ops if we don't need anything from them.
2088 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2089 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2090 Op: Op0, DemandedBits: InDemandedMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2091 if (DemandedOp0) {
2092 SDValue NewOp = TLO.DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: DemandedOp0, N2: Op1);
2093 return TLO.CombineTo(O: Op, N: NewOp);
2094 }
2095 }
2096 }
2097 break;
2098 }
2099 case ISD::FSHL:
2100 case ISD::FSHR: {
2101 SDValue Op0 = Op.getOperand(i: 0);
2102 SDValue Op1 = Op.getOperand(i: 1);
2103 SDValue Op2 = Op.getOperand(i: 2);
2104 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2105
2106 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op2, DemandedElts)) {
2107 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2108
2109 // For fshl, 0-shift returns the 1st arg.
2110 // For fshr, 0-shift returns the 2nd arg.
2111 if (Amt == 0) {
2112 if (SimplifyDemandedBits(Op: IsFSHL ? Op0 : Op1, OriginalDemandedBits: DemandedBits, OriginalDemandedElts: DemandedElts,
2113 Known, TLO, Depth: Depth + 1))
2114 return true;
2115 break;
2116 }
2117
2118 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2119 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2120 APInt Demanded0 = DemandedBits.lshr(shiftAmt: IsFSHL ? Amt : (BitWidth - Amt));
2121 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2122 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2123 Depth: Depth + 1))
2124 return true;
2125 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: Demanded1, OriginalDemandedElts: DemandedElts, Known, TLO,
2126 Depth: Depth + 1))
2127 return true;
2128
2129 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2130 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131 Known.One.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2132 Known.Zero.lshrInPlace(ShiftAmt: IsFSHL ? (BitWidth - Amt) : Amt);
2133 Known = Known.unionWith(RHS: Known2);
2134
2135 // Attempt to avoid multi-use ops if we don't need anything from them.
2136 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2137 !DemandedElts.isAllOnes()) {
2138 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2139 Op: Op0, DemandedBits: Demanded0, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2140 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2141 Op: Op1, DemandedBits: Demanded1, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2142 if (DemandedOp0 || DemandedOp1) {
2143 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2144 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2145 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedOp0,
2146 N2: DemandedOp1, N3: Op2);
2147 return TLO.CombineTo(O: Op, N: NewOp);
2148 }
2149 }
2150 }
2151
2152 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2153 if (isPowerOf2_32(Value: BitWidth)) {
2154 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2155 if (SimplifyDemandedBits(Op: Op2, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts,
2156 Known&: Known2, TLO, Depth: Depth + 1))
2157 return true;
2158 }
2159 break;
2160 }
2161 case ISD::ROTL:
2162 case ISD::ROTR: {
2163 SDValue Op0 = Op.getOperand(i: 0);
2164 SDValue Op1 = Op.getOperand(i: 1);
2165 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2166
2167 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2168 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1))
2169 return TLO.CombineTo(O: Op, N: Op0);
2170
2171 if (ConstantSDNode *SA = isConstOrConstSplat(N: Op1, DemandedElts)) {
2172 unsigned Amt = SA->getAPIntValue().urem(RHS: BitWidth);
2173 unsigned RevAmt = BitWidth - Amt;
2174
2175 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2176 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2177 APInt Demanded0 = DemandedBits.rotr(rotateAmt: IsROTL ? Amt : RevAmt);
2178 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: Demanded0, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2179 Depth: Depth + 1))
2180 return true;
2181
2182 // rot*(x, 0) --> x
2183 if (Amt == 0)
2184 return TLO.CombineTo(O: Op, N: Op0);
2185
2186 // See if we don't demand either half of the rotated bits.
2187 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SHL, VT)) &&
2188 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2189 Op1 = TLO.DAG.getConstant(Val: IsROTL ? Amt : RevAmt, DL: dl, VT: Op1.getValueType());
2190 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: Op1));
2191 }
2192 if ((!TLO.LegalOperations() || isOperationLegal(Op: ISD::SRL, VT)) &&
2193 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2194 Op1 = TLO.DAG.getConstant(Val: IsROTL ? RevAmt : Amt, DL: dl, VT: Op1.getValueType());
2195 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op0, N2: Op1));
2196 }
2197 }
2198
2199 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2200 if (isPowerOf2_32(Value: BitWidth)) {
2201 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2202 if (SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: DemandedAmtBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2203 Depth: Depth + 1))
2204 return true;
2205 }
2206 break;
2207 }
2208 case ISD::SMIN:
2209 case ISD::SMAX:
2210 case ISD::UMIN:
2211 case ISD::UMAX: {
2212 unsigned Opc = Op.getOpcode();
2213 SDValue Op0 = Op.getOperand(i: 0);
2214 SDValue Op1 = Op.getOperand(i: 1);
2215
2216 // If we're only demanding signbits, then we can simplify to OR/AND node.
2217 unsigned BitOp =
2218 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2219 unsigned NumSignBits =
2220 std::min(a: TLO.DAG.ComputeNumSignBits(Op: Op0, DemandedElts, Depth: Depth + 1),
2221 b: TLO.DAG.ComputeNumSignBits(Op: Op1, DemandedElts, Depth: Depth + 1));
2222 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2223 if (NumSignBits >= NumDemandedUpperBits)
2224 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: BitOp, DL: SDLoc(Op), VT, N1: Op0, N2: Op1));
2225
2226 // Check if one arg is always less/greater than (or equal) to the other arg.
2227 KnownBits Known0 = TLO.DAG.computeKnownBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2228 KnownBits Known1 = TLO.DAG.computeKnownBits(Op: Op1, DemandedElts, Depth: Depth + 1);
2229 switch (Opc) {
2230 case ISD::SMIN:
2231 if (std::optional<bool> IsSLE = KnownBits::sle(LHS: Known0, RHS: Known1))
2232 return TLO.CombineTo(O: Op, N: *IsSLE ? Op0 : Op1);
2233 if (std::optional<bool> IsSLT = KnownBits::slt(LHS: Known0, RHS: Known1))
2234 return TLO.CombineTo(O: Op, N: *IsSLT ? Op0 : Op1);
2235 Known = KnownBits::smin(LHS: Known0, RHS: Known1);
2236 break;
2237 case ISD::SMAX:
2238 if (std::optional<bool> IsSGE = KnownBits::sge(LHS: Known0, RHS: Known1))
2239 return TLO.CombineTo(O: Op, N: *IsSGE ? Op0 : Op1);
2240 if (std::optional<bool> IsSGT = KnownBits::sgt(LHS: Known0, RHS: Known1))
2241 return TLO.CombineTo(O: Op, N: *IsSGT ? Op0 : Op1);
2242 Known = KnownBits::smax(LHS: Known0, RHS: Known1);
2243 break;
2244 case ISD::UMIN:
2245 if (std::optional<bool> IsULE = KnownBits::ule(LHS: Known0, RHS: Known1))
2246 return TLO.CombineTo(O: Op, N: *IsULE ? Op0 : Op1);
2247 if (std::optional<bool> IsULT = KnownBits::ult(LHS: Known0, RHS: Known1))
2248 return TLO.CombineTo(O: Op, N: *IsULT ? Op0 : Op1);
2249 Known = KnownBits::umin(LHS: Known0, RHS: Known1);
2250 break;
2251 case ISD::UMAX:
2252 if (std::optional<bool> IsUGE = KnownBits::uge(LHS: Known0, RHS: Known1))
2253 return TLO.CombineTo(O: Op, N: *IsUGE ? Op0 : Op1);
2254 if (std::optional<bool> IsUGT = KnownBits::ugt(LHS: Known0, RHS: Known1))
2255 return TLO.CombineTo(O: Op, N: *IsUGT ? Op0 : Op1);
2256 Known = KnownBits::umax(LHS: Known0, RHS: Known1);
2257 break;
2258 }
2259 break;
2260 }
2261 case ISD::BITREVERSE: {
2262 SDValue Src = Op.getOperand(i: 0);
2263 APInt DemandedSrcBits = DemandedBits.reverseBits();
2264 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2265 Depth: Depth + 1))
2266 return true;
2267 Known.One = Known2.One.reverseBits();
2268 Known.Zero = Known2.Zero.reverseBits();
2269 break;
2270 }
2271 case ISD::BSWAP: {
2272 SDValue Src = Op.getOperand(i: 0);
2273
2274 // If the only bits demanded come from one byte of the bswap result,
2275 // just shift the input byte into position to eliminate the bswap.
2276 unsigned NLZ = DemandedBits.countl_zero();
2277 unsigned NTZ = DemandedBits.countr_zero();
2278
2279 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2280 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2281 // have 14 leading zeros, round to 8.
2282 NLZ = alignDown(Value: NLZ, Align: 8);
2283 NTZ = alignDown(Value: NTZ, Align: 8);
2284 // If we need exactly one byte, we can do this transformation.
2285 if (BitWidth - NLZ - NTZ == 8) {
2286 // Replace this with either a left or right shift to get the byte into
2287 // the right place.
2288 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2289 if (!TLO.LegalOperations() || isOperationLegal(Op: ShiftOpcode, VT)) {
2290 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2291 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
2292 SDValue NewOp = TLO.DAG.getNode(Opcode: ShiftOpcode, DL: dl, VT, N1: Src, N2: ShAmt);
2293 return TLO.CombineTo(O: Op, N: NewOp);
2294 }
2295 }
2296
2297 APInt DemandedSrcBits = DemandedBits.byteSwap();
2298 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedElts, Known&: Known2, TLO,
2299 Depth: Depth + 1))
2300 return true;
2301 Known.One = Known2.One.byteSwap();
2302 Known.Zero = Known2.Zero.byteSwap();
2303 break;
2304 }
2305 case ISD::CTPOP: {
2306 // If only 1 bit is demanded, replace with PARITY as long as we're before
2307 // op legalization.
2308 // FIXME: Limit to scalars for now.
2309 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2310 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::PARITY, DL: dl, VT,
2311 Operand: Op.getOperand(i: 0)));
2312
2313 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2314 break;
2315 }
2316 case ISD::SIGN_EXTEND_INREG: {
2317 SDValue Op0 = Op.getOperand(i: 0);
2318 EVT ExVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2319 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2320
2321 // If we only care about the highest bit, don't bother shifting right.
2322 if (DemandedBits.isSignMask()) {
2323 unsigned MinSignedBits =
2324 TLO.DAG.ComputeMaxSignificantBits(Op: Op0, DemandedElts, Depth: Depth + 1);
2325 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2326 // However if the input is already sign extended we expect the sign
2327 // extension to be dropped altogether later and do not simplify.
2328 if (!AlreadySignExtended) {
2329 // Compute the correct shift amount type, which must be getShiftAmountTy
2330 // for scalar types after legalization.
2331 SDValue ShiftAmt =
2332 TLO.DAG.getShiftAmountConstant(Val: BitWidth - ExVTBits, VT, DL: dl);
2333 return TLO.CombineTo(O: Op,
2334 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op0, N2: ShiftAmt));
2335 }
2336 }
2337
2338 // If none of the extended bits are demanded, eliminate the sextinreg.
2339 if (DemandedBits.getActiveBits() <= ExVTBits)
2340 return TLO.CombineTo(O: Op, N: Op0);
2341
2342 APInt InputDemandedBits = DemandedBits.getLoBits(numBits: ExVTBits);
2343
2344 // Since the sign extended bits are demanded, we know that the sign
2345 // bit is demanded.
2346 InputDemandedBits.setBit(ExVTBits - 1);
2347
2348 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: InputDemandedBits, OriginalDemandedElts: DemandedElts, Known, TLO,
2349 Depth: Depth + 1))
2350 return true;
2351 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2352
2353 // If the sign bit of the input is known set or clear, then we know the
2354 // top bits of the result.
2355
2356 // If the input sign bit is known zero, convert this into a zero extension.
2357 if (Known.Zero[ExVTBits - 1])
2358 return TLO.CombineTo(O: Op, N: TLO.DAG.getZeroExtendInReg(Op: Op0, DL: dl, VT: ExVT));
2359
2360 APInt Mask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ExVTBits);
2361 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2362 Known.One.setBitsFrom(ExVTBits);
2363 Known.Zero &= Mask;
2364 } else { // Input sign bit unknown
2365 Known.Zero &= Mask;
2366 Known.One &= Mask;
2367 }
2368 break;
2369 }
2370 case ISD::BUILD_PAIR: {
2371 EVT HalfVT = Op.getOperand(i: 0).getValueType();
2372 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2373
2374 APInt MaskLo = DemandedBits.getLoBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2375 APInt MaskHi = DemandedBits.getHiBits(numBits: HalfBitWidth).trunc(width: HalfBitWidth);
2376
2377 KnownBits KnownLo, KnownHi;
2378
2379 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: MaskLo, Known&: KnownLo, TLO, Depth: Depth + 1))
2380 return true;
2381
2382 if (SimplifyDemandedBits(Op: Op.getOperand(i: 1), DemandedBits: MaskHi, Known&: KnownHi, TLO, Depth: Depth + 1))
2383 return true;
2384
2385 Known = KnownHi.concat(Lo: KnownLo);
2386 break;
2387 }
2388 case ISD::ZERO_EXTEND_VECTOR_INREG:
2389 if (VT.isScalableVector())
2390 return false;
2391 [[fallthrough]];
2392 case ISD::ZERO_EXTEND: {
2393 SDValue Src = Op.getOperand(i: 0);
2394 EVT SrcVT = Src.getValueType();
2395 unsigned InBits = SrcVT.getScalarSizeInBits();
2396 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2397 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2398
2399 // If none of the top bits are demanded, convert this into an any_extend.
2400 if (DemandedBits.getActiveBits() <= InBits) {
2401 // If we only need the non-extended bits of the bottom element
2402 // then we can just bitcast to the result.
2403 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2404 VT.getSizeInBits() == SrcVT.getSizeInBits())
2405 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2406
2407 unsigned Opc =
2408 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2409 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2410 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2411 }
2412
2413 SDNodeFlags Flags = Op->getFlags();
2414 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2415 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2416 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2417 Depth: Depth + 1)) {
2418 if (Flags.hasNonNeg()) {
2419 Flags.setNonNeg(false);
2420 Op->setFlags(Flags);
2421 }
2422 return true;
2423 }
2424 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2425 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2426 Known = Known.zext(BitWidth);
2427
2428 // Attempt to avoid multi-use ops if we don't need anything from them.
2429 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2430 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2431 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2432 break;
2433 }
2434 case ISD::SIGN_EXTEND_VECTOR_INREG:
2435 if (VT.isScalableVector())
2436 return false;
2437 [[fallthrough]];
2438 case ISD::SIGN_EXTEND: {
2439 SDValue Src = Op.getOperand(i: 0);
2440 EVT SrcVT = Src.getValueType();
2441 unsigned InBits = SrcVT.getScalarSizeInBits();
2442 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2443 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2444
2445 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2446 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2447
2448 // Since some of the sign extended bits are demanded, we know that the sign
2449 // bit is demanded.
2450 InDemandedBits.setBit(InBits - 1);
2451
2452 // If none of the top bits are demanded, convert this into an any_extend.
2453 if (DemandedBits.getActiveBits() <= InBits) {
2454 // If we only need the non-extended bits of the bottom element
2455 // then we can just bitcast to the result.
2456 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2457 VT.getSizeInBits() == SrcVT.getSizeInBits())
2458 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2459
2460 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2461 if (getBooleanContents(Type: VT) != ZeroOrNegativeOneBooleanContent ||
2462 TLO.DAG.ComputeNumSignBits(Op: Src, DemandedElts: InDemandedElts, Depth: Depth + 1) !=
2463 InBits) {
2464 unsigned Opc =
2465 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2466 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT))
2467 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src));
2468 }
2469 }
2470
2471 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2472 Depth: Depth + 1))
2473 return true;
2474 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2475 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2476
2477 // If the sign bit is known one, the top bits match.
2478 Known = Known.sext(BitWidth);
2479
2480 // If the sign bit is known zero, convert this to a zero extend.
2481 if (Known.isNonNegative()) {
2482 unsigned Opc =
2483 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2484 if (!TLO.LegalOperations() || isOperationLegal(Op: Opc, VT)) {
2485 SDNodeFlags Flags;
2486 if (!IsVecInReg)
2487 Flags.setNonNeg(true);
2488 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Opc, DL: dl, VT, Operand: Src, Flags));
2489 }
2490 }
2491
2492 // Attempt to avoid multi-use ops if we don't need anything from them.
2493 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2494 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2495 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2496 break;
2497 }
2498 case ISD::ANY_EXTEND_VECTOR_INREG:
2499 if (VT.isScalableVector())
2500 return false;
2501 [[fallthrough]];
2502 case ISD::ANY_EXTEND: {
2503 SDValue Src = Op.getOperand(i: 0);
2504 EVT SrcVT = Src.getValueType();
2505 unsigned InBits = SrcVT.getScalarSizeInBits();
2506 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2507 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2508
2509 // If we only need the bottom element then we can just bitcast.
2510 // TODO: Handle ANY_EXTEND?
2511 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2512 VT.getSizeInBits() == SrcVT.getSizeInBits())
2513 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
2514
2515 APInt InDemandedBits = DemandedBits.trunc(width: InBits);
2516 APInt InDemandedElts = DemandedElts.zext(width: InElts);
2517 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: InDemandedBits, OriginalDemandedElts: InDemandedElts, Known, TLO,
2518 Depth: Depth + 1))
2519 return true;
2520 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2521 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2522 Known = Known.anyext(BitWidth);
2523
2524 // Attempt to avoid multi-use ops if we don't need anything from them.
2525 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2526 Op: Src, DemandedBits: InDemandedBits, DemandedElts: InDemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2527 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, Operand: NewSrc));
2528 break;
2529 }
2530 case ISD::TRUNCATE: {
2531 SDValue Src = Op.getOperand(i: 0);
2532
2533 // Simplify the input, using demanded bit information, and compute the known
2534 // zero/one bits live out.
2535 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2536 APInt TruncMask = DemandedBits.zext(width: OperandBitWidth);
2537 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: TruncMask, OriginalDemandedElts: DemandedElts, Known, TLO,
2538 Depth: Depth + 1))
2539 return true;
2540 Known = Known.trunc(BitWidth);
2541
2542 // Attempt to avoid multi-use ops if we don't need anything from them.
2543 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2544 Op: Src, DemandedBits: TruncMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1))
2545 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: NewSrc));
2546
2547 // If the input is only used by this truncate, see if we can shrink it based
2548 // on the known demanded bits.
2549 switch (Src.getOpcode()) {
2550 default:
2551 break;
2552 case ISD::SRL:
2553 // Shrink SRL by a constant if none of the high bits shifted in are
2554 // demanded.
2555 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2556 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2557 // undesirable.
2558 break;
2559
2560 if (Src.getNode()->hasOneUse()) {
2561 const APInt *ShAmtC =
2562 TLO.DAG.getValidShiftAmountConstant(V: Src, DemandedElts);
2563 if (!ShAmtC || ShAmtC->uge(RHS: BitWidth))
2564 break;
2565 uint64_t ShVal = ShAmtC->getZExtValue();
2566
2567 APInt HighBits =
2568 APInt::getHighBitsSet(numBits: OperandBitWidth, hiBitsSet: OperandBitWidth - BitWidth);
2569 HighBits.lshrInPlace(ShiftAmt: ShVal);
2570 HighBits = HighBits.trunc(width: BitWidth);
2571
2572 if (!(HighBits & DemandedBits)) {
2573 // None of the shifted in bits are needed. Add a truncate of the
2574 // shift input, then shift it.
2575 SDValue NewShAmt =
2576 TLO.DAG.getShiftAmountConstant(Val: ShVal, VT, DL: dl, LegalTypes: TLO.LegalTypes());
2577 SDValue NewTrunc =
2578 TLO.DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Src.getOperand(i: 0));
2579 return TLO.CombineTo(
2580 O: Op, N: TLO.DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NewTrunc, N2: NewShAmt));
2581 }
2582 }
2583 break;
2584 }
2585
2586 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2587 break;
2588 }
2589 case ISD::AssertZext: {
2590 // AssertZext demands all of the high bits, plus any of the low bits
2591 // demanded by its users.
2592 EVT ZVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2593 APInt InMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: ZVT.getSizeInBits());
2594 if (SimplifyDemandedBits(Op: Op.getOperand(i: 0), DemandedBits: ~InMask | DemandedBits, Known,
2595 TLO, Depth: Depth + 1))
2596 return true;
2597 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2598
2599 Known.Zero |= ~InMask;
2600 Known.One &= (~Known.Zero);
2601 break;
2602 }
2603 case ISD::EXTRACT_VECTOR_ELT: {
2604 SDValue Src = Op.getOperand(i: 0);
2605 SDValue Idx = Op.getOperand(i: 1);
2606 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2607 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2608
2609 if (SrcEltCnt.isScalable())
2610 return false;
2611
2612 // Demand the bits from every vector element without a constant index.
2613 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2614 APInt DemandedSrcElts = APInt::getAllOnes(numBits: NumSrcElts);
2615 if (auto *CIdx = dyn_cast<ConstantSDNode>(Val&: Idx))
2616 if (CIdx->getAPIntValue().ult(RHS: NumSrcElts))
2617 DemandedSrcElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: CIdx->getZExtValue());
2618
2619 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2620 // anything about the extended bits.
2621 APInt DemandedSrcBits = DemandedBits;
2622 if (BitWidth > EltBitWidth)
2623 DemandedSrcBits = DemandedSrcBits.trunc(width: EltBitWidth);
2624
2625 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts, Known&: Known2, TLO,
2626 Depth: Depth + 1))
2627 return true;
2628
2629 // Attempt to avoid multi-use ops if we don't need anything from them.
2630 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2631 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2632 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2633 SDValue NewOp =
2634 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: DemandedSrc, N2: Idx);
2635 return TLO.CombineTo(O: Op, N: NewOp);
2636 }
2637 }
2638
2639 Known = Known2;
2640 if (BitWidth > EltBitWidth)
2641 Known = Known.anyext(BitWidth);
2642 break;
2643 }
2644 case ISD::BITCAST: {
2645 if (VT.isScalableVector())
2646 return false;
2647 SDValue Src = Op.getOperand(i: 0);
2648 EVT SrcVT = Src.getValueType();
2649 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2650
2651 // If this is an FP->Int bitcast and if the sign bit is the only
2652 // thing demanded, turn this into a FGETSIGN.
2653 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2654 DemandedBits == APInt::getSignMask(BitWidth: Op.getValueSizeInBits()) &&
2655 SrcVT.isFloatingPoint()) {
2656 bool OpVTLegal = isOperationLegalOrCustom(Op: ISD::FGETSIGN, VT);
2657 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2658 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2659 SrcVT != MVT::f128) {
2660 // Cannot eliminate/lower SHL for f128 yet.
2661 EVT Ty = OpVTLegal ? VT : MVT::i32;
2662 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2663 // place. We expect the SHL to be eliminated by other optimizations.
2664 SDValue Sign = TLO.DAG.getNode(Opcode: ISD::FGETSIGN, DL: dl, VT: Ty, Operand: Src);
2665 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2666 if (!OpVTLegal && OpVTSizeInBits > 32)
2667 Sign = TLO.DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Sign);
2668 unsigned ShVal = Op.getValueSizeInBits() - 1;
2669 SDValue ShAmt = TLO.DAG.getConstant(Val: ShVal, DL: dl, VT);
2670 return TLO.CombineTo(O: Op,
2671 N: TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Sign, N2: ShAmt));
2672 }
2673 }
2674
2675 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2676 // Demand the elt/bit if any of the original elts/bits are demanded.
2677 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2678 unsigned Scale = BitWidth / NumSrcEltBits;
2679 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2680 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2681 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2682 for (unsigned i = 0; i != Scale; ++i) {
2683 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2684 unsigned BitOffset = EltOffset * NumSrcEltBits;
2685 APInt Sub = DemandedBits.extractBits(numBits: NumSrcEltBits, bitPosition: BitOffset);
2686 if (!Sub.isZero()) {
2687 DemandedSrcBits |= Sub;
2688 for (unsigned j = 0; j != NumElts; ++j)
2689 if (DemandedElts[j])
2690 DemandedSrcElts.setBit((j * Scale) + i);
2691 }
2692 }
2693
2694 APInt KnownSrcUndef, KnownSrcZero;
2695 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2696 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2697 return true;
2698
2699 KnownBits KnownSrcBits;
2700 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2701 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2702 return true;
2703 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2704 // TODO - bigendian once we have test coverage.
2705 unsigned Scale = NumSrcEltBits / BitWidth;
2706 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2707 APInt DemandedSrcBits = APInt::getZero(numBits: NumSrcEltBits);
2708 APInt DemandedSrcElts = APInt::getZero(numBits: NumSrcElts);
2709 for (unsigned i = 0; i != NumElts; ++i)
2710 if (DemandedElts[i]) {
2711 unsigned Offset = (i % Scale) * BitWidth;
2712 DemandedSrcBits.insertBits(SubBits: DemandedBits, bitPosition: Offset);
2713 DemandedSrcElts.setBit(i / Scale);
2714 }
2715
2716 if (SrcVT.isVector()) {
2717 APInt KnownSrcUndef, KnownSrcZero;
2718 if (SimplifyDemandedVectorElts(Op: Src, DemandedEltMask: DemandedSrcElts, KnownUndef&: KnownSrcUndef,
2719 KnownZero&: KnownSrcZero, TLO, Depth: Depth + 1))
2720 return true;
2721 }
2722
2723 KnownBits KnownSrcBits;
2724 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: DemandedSrcBits, OriginalDemandedElts: DemandedSrcElts,
2725 Known&: KnownSrcBits, TLO, Depth: Depth + 1))
2726 return true;
2727
2728 // Attempt to avoid multi-use ops if we don't need anything from them.
2729 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2730 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2731 Op: Src, DemandedBits: DemandedSrcBits, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1)) {
2732 SDValue NewOp = TLO.DAG.getBitcast(VT, V: DemandedSrc);
2733 return TLO.CombineTo(O: Op, N: NewOp);
2734 }
2735 }
2736 }
2737
2738 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2739 // recursive call where Known may be useful to the caller.
2740 if (Depth > 0) {
2741 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2742 return false;
2743 }
2744 break;
2745 }
2746 case ISD::MUL:
2747 if (DemandedBits.isPowerOf2()) {
2748 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2749 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2750 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2751 unsigned CTZ = DemandedBits.countr_zero();
2752 ConstantSDNode *C = isConstOrConstSplat(N: Op.getOperand(i: 1), DemandedElts);
2753 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2754 SDValue AmtC = TLO.DAG.getShiftAmountConstant(Val: CTZ, VT, DL: dl);
2755 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op.getOperand(i: 0), N2: AmtC);
2756 return TLO.CombineTo(O: Op, N: Shl);
2757 }
2758 }
2759 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2760 // X * X is odd iff X is odd.
2761 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2762 if (Op.getOperand(i: 0) == Op.getOperand(i: 1) && DemandedBits.ult(RHS: 4)) {
2763 SDValue One = TLO.DAG.getConstant(Val: 1, DL: dl, VT);
2764 SDValue And1 = TLO.DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op.getOperand(i: 0), N2: One);
2765 return TLO.CombineTo(O: Op, N: And1);
2766 }
2767 [[fallthrough]];
2768 case ISD::ADD:
2769 case ISD::SUB: {
2770 // Add, Sub, and Mul don't demand any bits in positions beyond that
2771 // of the highest bit demanded of them.
2772 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
2773 SDNodeFlags Flags = Op.getNode()->getFlags();
2774 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2775 APInt LoMask = APInt::getLowBitsSet(numBits: BitWidth, loBitsSet: BitWidth - DemandedBitsLZ);
2776 KnownBits KnownOp0, KnownOp1;
2777 if (SimplifyDemandedBits(Op: Op0, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp0, TLO,
2778 Depth: Depth + 1) ||
2779 SimplifyDemandedBits(Op: Op1, OriginalDemandedBits: LoMask, OriginalDemandedElts: DemandedElts, Known&: KnownOp1, TLO,
2780 Depth: Depth + 1) ||
2781 // See if the operation should be performed at a smaller bit width.
2782 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2783 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2784 // Disable the nsw and nuw flags. We can no longer guarantee that we
2785 // won't wrap after simplification.
2786 Flags.setNoSignedWrap(false);
2787 Flags.setNoUnsignedWrap(false);
2788 Op->setFlags(Flags);
2789 }
2790 return true;
2791 }
2792
2793 // neg x with only low bit demanded is simply x.
2794 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2795 isNullConstant(V: Op0))
2796 return TLO.CombineTo(O: Op, N: Op1);
2797
2798 // Attempt to avoid multi-use ops if we don't need anything from them.
2799 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2800 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2801 Op: Op0, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2802 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2803 Op: Op1, DemandedBits: LoMask, DemandedElts, DAG&: TLO.DAG, Depth: Depth + 1);
2804 if (DemandedOp0 || DemandedOp1) {
2805 Flags.setNoSignedWrap(false);
2806 Flags.setNoUnsignedWrap(false);
2807 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2808 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2809 SDValue NewOp =
2810 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Op1, Flags);
2811 return TLO.CombineTo(O: Op, N: NewOp);
2812 }
2813 }
2814
2815 // If we have a constant operand, we may be able to turn it into -1 if we
2816 // do not demand the high bits. This can make the constant smaller to
2817 // encode, allow more general folding, or match specialized instruction
2818 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2819 // is probably not useful (and could be detrimental).
2820 ConstantSDNode *C = isConstOrConstSplat(N: Op1);
2821 APInt HighMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: DemandedBitsLZ);
2822 if (C && !C->isAllOnes() && !C->isOne() &&
2823 (C->getAPIntValue() | HighMask).isAllOnes()) {
2824 SDValue Neg1 = TLO.DAG.getAllOnesConstant(DL: dl, VT);
2825 // Disable the nsw and nuw flags. We can no longer guarantee that we
2826 // won't wrap after simplification.
2827 Flags.setNoSignedWrap(false);
2828 Flags.setNoUnsignedWrap(false);
2829 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT, N1: Op0, N2: Neg1, Flags);
2830 return TLO.CombineTo(O: Op, N: NewOp);
2831 }
2832
2833 // Match a multiply with a disguised negated-power-of-2 and convert to a
2834 // an equivalent shift-left amount.
2835 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2836 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2837 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2838 return 0;
2839
2840 // Don't touch opaque constants. Also, ignore zero and power-of-2
2841 // multiplies. Those will get folded later.
2842 ConstantSDNode *MulC = isConstOrConstSplat(N: Mul.getOperand(i: 1));
2843 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2844 !MulC->getAPIntValue().isPowerOf2()) {
2845 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2846 if (UnmaskedC.isNegatedPowerOf2())
2847 return (-UnmaskedC).logBase2();
2848 }
2849 return 0;
2850 };
2851
2852 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2853 unsigned ShlAmt) {
2854 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(Val: ShlAmt, VT, DL: dl);
2855 SDValue Shl = TLO.DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: X, N2: ShlAmtC);
2856 SDValue Res = TLO.DAG.getNode(Opcode: NT, DL: dl, VT, N1: Y, N2: Shl);
2857 return TLO.CombineTo(O: Op, N: Res);
2858 };
2859
2860 if (isOperationLegalOrCustom(Op: ISD::SHL, VT)) {
2861 if (Op.getOpcode() == ISD::ADD) {
2862 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2863 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2864 return foldMul(ISD::SUB, Op0.getOperand(i: 0), Op1, ShAmt);
2865 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2866 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2867 return foldMul(ISD::SUB, Op1.getOperand(i: 0), Op0, ShAmt);
2868 }
2869 if (Op.getOpcode() == ISD::SUB) {
2870 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2871 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2872 return foldMul(ISD::ADD, Op1.getOperand(i: 0), Op0, ShAmt);
2873 }
2874 }
2875
2876 if (Op.getOpcode() == ISD::MUL) {
2877 Known = KnownBits::mul(LHS: KnownOp0, RHS: KnownOp1);
2878 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2879 Known = KnownBits::computeForAddSub(Add: Op.getOpcode() == ISD::ADD,
2880 NSW: Flags.hasNoSignedWrap(), LHS: KnownOp0,
2881 RHS: KnownOp1);
2882 }
2883 break;
2884 }
2885 default:
2886 // We also ask the target about intrinsics (which could be specific to it).
2887 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2888 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2889 // TODO: Probably okay to remove after audit; here to reduce change size
2890 // in initial enablement patch for scalable vectors
2891 if (Op.getValueType().isScalableVector())
2892 break;
2893 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2894 Known, TLO, Depth))
2895 return true;
2896 break;
2897 }
2898
2899 // Just use computeKnownBits to compute output bits.
2900 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2901 break;
2902 }
2903
2904 // If we know the value of all of the demanded bits, return this as a
2905 // constant.
2906 if (!isTargetCanonicalConstantNode(Op) &&
2907 DemandedBits.isSubsetOf(RHS: Known.Zero | Known.One)) {
2908 // Avoid folding to a constant if any OpaqueConstant is involved.
2909 const SDNode *N = Op.getNode();
2910 for (SDNode *Op :
2911 llvm::make_range(x: SDNodeIterator::begin(N), y: SDNodeIterator::end(N))) {
2912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: Op))
2913 if (C->isOpaque())
2914 return false;
2915 }
2916 if (VT.isInteger())
2917 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: Known.One, DL: dl, VT));
2918 if (VT.isFloatingPoint())
2919 return TLO.CombineTo(
2920 O: Op,
2921 N: TLO.DAG.getConstantFP(
2922 Val: APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), DL: dl, VT));
2923 }
2924
2925 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2926 // Try again just for the original demanded elts.
2927 // Ensure we do this AFTER constant folding above.
2928 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2929 Known = TLO.DAG.computeKnownBits(Op, DemandedElts: OriginalDemandedElts, Depth);
2930
2931 return false;
2932}
2933
2934bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2935 const APInt &DemandedElts,
2936 DAGCombinerInfo &DCI) const {
2937 SelectionDAG &DAG = DCI.DAG;
2938 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2939 !DCI.isBeforeLegalizeOps());
2940
2941 APInt KnownUndef, KnownZero;
2942 bool Simplified =
2943 SimplifyDemandedVectorElts(Op, DemandedEltMask: DemandedElts, KnownUndef, KnownZero, TLO);
2944 if (Simplified) {
2945 DCI.AddToWorklist(N: Op.getNode());
2946 DCI.CommitTargetLoweringOpt(TLO);
2947 }
2948
2949 return Simplified;
2950}
2951
2952/// Given a vector binary operation and known undefined elements for each input
2953/// operand, compute whether each element of the output is undefined.
2954static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2955 const APInt &UndefOp0,
2956 const APInt &UndefOp1) {
2957 EVT VT = BO.getValueType();
2958 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2959 "Vector binop only");
2960
2961 EVT EltVT = VT.getVectorElementType();
2962 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2963 assert(UndefOp0.getBitWidth() == NumElts &&
2964 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2965
2966 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2967 const APInt &UndefVals) {
2968 if (UndefVals[Index])
2969 return DAG.getUNDEF(VT: EltVT);
2970
2971 if (auto *BV = dyn_cast<BuildVectorSDNode>(Val&: V)) {
2972 // Try hard to make sure that the getNode() call is not creating temporary
2973 // nodes. Ignore opaque integers because they do not constant fold.
2974 SDValue Elt = BV->getOperand(Num: Index);
2975 auto *C = dyn_cast<ConstantSDNode>(Val&: Elt);
2976 if (isa<ConstantFPSDNode>(Val: Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2977 return Elt;
2978 }
2979
2980 return SDValue();
2981 };
2982
2983 APInt KnownUndef = APInt::getZero(numBits: NumElts);
2984 for (unsigned i = 0; i != NumElts; ++i) {
2985 // If both inputs for this element are either constant or undef and match
2986 // the element type, compute the constant/undef result for this element of
2987 // the vector.
2988 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2989 // not handle FP constants. The code within getNode() should be refactored
2990 // to avoid the danger of creating a bogus temporary node here.
2991 SDValue C0 = getUndefOrConstantElt(BO.getOperand(i: 0), i, UndefOp0);
2992 SDValue C1 = getUndefOrConstantElt(BO.getOperand(i: 1), i, UndefOp1);
2993 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2994 if (DAG.getNode(Opcode: BO.getOpcode(), DL: SDLoc(BO), VT: EltVT, N1: C0, N2: C1).isUndef())
2995 KnownUndef.setBit(i);
2996 }
2997 return KnownUndef;
2998}
2999
3000bool TargetLowering::SimplifyDemandedVectorElts(
3001 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3002 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3003 bool AssumeSingleUse) const {
3004 EVT VT = Op.getValueType();
3005 unsigned Opcode = Op.getOpcode();
3006 APInt DemandedElts = OriginalDemandedElts;
3007 unsigned NumElts = DemandedElts.getBitWidth();
3008 assert(VT.isVector() && "Expected vector op");
3009
3010 KnownUndef = KnownZero = APInt::getZero(numBits: NumElts);
3011
3012 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3013 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3014 return false;
3015
3016 // TODO: For now we assume we know nothing about scalable vectors.
3017 if (VT.isScalableVector())
3018 return false;
3019
3020 assert(VT.getVectorNumElements() == NumElts &&
3021 "Mask size mismatches value type element count!");
3022
3023 // Undef operand.
3024 if (Op.isUndef()) {
3025 KnownUndef.setAllBits();
3026 return false;
3027 }
3028
3029 // If Op has other users, assume that all elements are needed.
3030 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3031 DemandedElts.setAllBits();
3032
3033 // Not demanding any elements from Op.
3034 if (DemandedElts == 0) {
3035 KnownUndef.setAllBits();
3036 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3037 }
3038
3039 // Limit search depth.
3040 if (Depth >= SelectionDAG::MaxRecursionDepth)
3041 return false;
3042
3043 SDLoc DL(Op);
3044 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3045 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3046
3047 // Helper for demanding the specified elements and all the bits of both binary
3048 // operands.
3049 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3050 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op: Op0, DemandedElts,
3051 DAG&: TLO.DAG, Depth: Depth + 1);
3052 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op: Op1, DemandedElts,
3053 DAG&: TLO.DAG, Depth: Depth + 1);
3054 if (NewOp0 || NewOp1) {
3055 SDValue NewOp =
3056 TLO.DAG.getNode(Opcode, DL: SDLoc(Op), VT, N1: NewOp0 ? NewOp0 : Op0,
3057 N2: NewOp1 ? NewOp1 : Op1, Flags: Op->getFlags());
3058 return TLO.CombineTo(O: Op, N: NewOp);
3059 }
3060 return false;
3061 };
3062
3063 switch (Opcode) {
3064 case ISD::SCALAR_TO_VECTOR: {
3065 if (!DemandedElts[0]) {
3066 KnownUndef.setAllBits();
3067 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3068 }
3069 SDValue ScalarSrc = Op.getOperand(i: 0);
3070 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3071 SDValue Src = ScalarSrc.getOperand(i: 0);
3072 SDValue Idx = ScalarSrc.getOperand(i: 1);
3073 EVT SrcVT = Src.getValueType();
3074
3075 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3076
3077 if (SrcEltCnt.isScalable())
3078 return false;
3079
3080 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3081 if (isNullConstant(V: Idx)) {
3082 APInt SrcDemandedElts = APInt::getOneBitSet(numBits: NumSrcElts, BitNo: 0);
3083 APInt SrcUndef = KnownUndef.zextOrTrunc(width: NumSrcElts);
3084 APInt SrcZero = KnownZero.zextOrTrunc(width: NumSrcElts);
3085 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3086 TLO, Depth: Depth + 1))
3087 return true;
3088 }
3089 }
3090 KnownUndef.setHighBits(NumElts - 1);
3091 break;
3092 }
3093 case ISD::BITCAST: {
3094 SDValue Src = Op.getOperand(i: 0);
3095 EVT SrcVT = Src.getValueType();
3096
3097 // We only handle vectors here.
3098 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3099 if (!SrcVT.isVector())
3100 break;
3101
3102 // Fast handling of 'identity' bitcasts.
3103 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3104 if (NumSrcElts == NumElts)
3105 return SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedElts, KnownUndef,
3106 KnownZero, TLO, Depth: Depth + 1);
3107
3108 APInt SrcDemandedElts, SrcZero, SrcUndef;
3109
3110 // Bitcast from 'large element' src vector to 'small element' vector, we
3111 // must demand a source element if any DemandedElt maps to it.
3112 if ((NumElts % NumSrcElts) == 0) {
3113 unsigned Scale = NumElts / NumSrcElts;
3114 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3115 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3116 TLO, Depth: Depth + 1))
3117 return true;
3118
3119 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3120 // of the large element.
3121 // TODO - bigendian once we have test coverage.
3122 if (IsLE) {
3123 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3124 APInt SrcDemandedBits = APInt::getZero(numBits: SrcEltSizeInBits);
3125 for (unsigned i = 0; i != NumElts; ++i)
3126 if (DemandedElts[i]) {
3127 unsigned Ofs = (i % Scale) * EltSizeInBits;
3128 SrcDemandedBits.setBits(loBit: Ofs, hiBit: Ofs + EltSizeInBits);
3129 }
3130
3131 KnownBits Known;
3132 if (SimplifyDemandedBits(Op: Src, OriginalDemandedBits: SrcDemandedBits, OriginalDemandedElts: SrcDemandedElts, Known,
3133 TLO, Depth: Depth + 1))
3134 return true;
3135
3136 // The bitcast has split each wide element into a number of
3137 // narrow subelements. We have just computed the Known bits
3138 // for wide elements. See if element splitting results in
3139 // some subelements being zero. Only for demanded elements!
3140 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3141 if (!Known.Zero.extractBits(numBits: EltSizeInBits, bitPosition: SubElt * EltSizeInBits)
3142 .isAllOnes())
3143 continue;
3144 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3145 unsigned Elt = Scale * SrcElt + SubElt;
3146 if (DemandedElts[Elt])
3147 KnownZero.setBit(Elt);
3148 }
3149 }
3150 }
3151
3152 // If the src element is zero/undef then all the output elements will be -
3153 // only demanded elements are guaranteed to be correct.
3154 for (unsigned i = 0; i != NumSrcElts; ++i) {
3155 if (SrcDemandedElts[i]) {
3156 if (SrcZero[i])
3157 KnownZero.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3158 if (SrcUndef[i])
3159 KnownUndef.setBits(loBit: i * Scale, hiBit: (i + 1) * Scale);
3160 }
3161 }
3162 }
3163
3164 // Bitcast from 'small element' src vector to 'large element' vector, we
3165 // demand all smaller source elements covered by the larger demanded element
3166 // of this vector.
3167 if ((NumSrcElts % NumElts) == 0) {
3168 unsigned Scale = NumSrcElts / NumElts;
3169 SrcDemandedElts = APIntOps::ScaleBitMask(A: DemandedElts, NewBitWidth: NumSrcElts);
3170 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: SrcDemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero,
3171 TLO, Depth: Depth + 1))
3172 return true;
3173
3174 // If all the src elements covering an output element are zero/undef, then
3175 // the output element will be as well, assuming it was demanded.
3176 for (unsigned i = 0; i != NumElts; ++i) {
3177 if (DemandedElts[i]) {
3178 if (SrcZero.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3179 KnownZero.setBit(i);
3180 if (SrcUndef.extractBits(numBits: Scale, bitPosition: i * Scale).isAllOnes())
3181 KnownUndef.setBit(i);
3182 }
3183 }
3184 }
3185 break;
3186 }
3187 case ISD::BUILD_VECTOR: {
3188 // Check all elements and simplify any unused elements with UNDEF.
3189 if (!DemandedElts.isAllOnes()) {
3190 // Don't simplify BROADCASTS.
3191 if (llvm::any_of(Range: Op->op_values(),
3192 P: [&](SDValue Elt) { return Op.getOperand(i: 0) != Elt; })) {
3193 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3194 bool Updated = false;
3195 for (unsigned i = 0; i != NumElts; ++i) {
3196 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3197 Ops[i] = TLO.DAG.getUNDEF(VT: Ops[0].getValueType());
3198 KnownUndef.setBit(i);
3199 Updated = true;
3200 }
3201 }
3202 if (Updated)
3203 return TLO.CombineTo(O: Op, N: TLO.DAG.getBuildVector(VT, DL, Ops));
3204 }
3205 }
3206 for (unsigned i = 0; i != NumElts; ++i) {
3207 SDValue SrcOp = Op.getOperand(i);
3208 if (SrcOp.isUndef()) {
3209 KnownUndef.setBit(i);
3210 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3211 (isNullConstant(V: SrcOp) || isNullFPConstant(V: SrcOp))) {
3212 KnownZero.setBit(i);
3213 }
3214 }
3215 break;
3216 }
3217 case ISD::CONCAT_VECTORS: {
3218 EVT SubVT = Op.getOperand(i: 0).getValueType();
3219 unsigned NumSubVecs = Op.getNumOperands();
3220 unsigned NumSubElts = SubVT.getVectorNumElements();
3221 for (unsigned i = 0; i != NumSubVecs; ++i) {
3222 SDValue SubOp = Op.getOperand(i);
3223 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3224 APInt SubUndef, SubZero;
3225 if (SimplifyDemandedVectorElts(Op: SubOp, OriginalDemandedElts: SubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3226 Depth: Depth + 1))
3227 return true;
3228 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: i * NumSubElts);
3229 KnownZero.insertBits(SubBits: SubZero, bitPosition: i * NumSubElts);
3230 }
3231
3232 // Attempt to avoid multi-use ops if we don't need anything from them.
3233 if (!DemandedElts.isAllOnes()) {
3234 bool FoundNewSub = false;
3235 SmallVector<SDValue, 2> DemandedSubOps;
3236 for (unsigned i = 0; i != NumSubVecs; ++i) {
3237 SDValue SubOp = Op.getOperand(i);
3238 APInt SubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: i * NumSubElts);
3239 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3240 Op: SubOp, DemandedElts: SubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3241 DemandedSubOps.push_back(Elt: NewSubOp ? NewSubOp : SubOp);
3242 FoundNewSub = NewSubOp ? true : FoundNewSub;
3243 }
3244 if (FoundNewSub) {
3245 SDValue NewOp =
3246 TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, Ops: DemandedSubOps);
3247 return TLO.CombineTo(O: Op, N: NewOp);
3248 }
3249 }
3250 break;
3251 }
3252 case ISD::INSERT_SUBVECTOR: {
3253 // Demand any elements from the subvector and the remainder from the src its
3254 // inserted into.
3255 SDValue Src = Op.getOperand(i: 0);
3256 SDValue Sub = Op.getOperand(i: 1);
3257 uint64_t Idx = Op.getConstantOperandVal(i: 2);
3258 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3259 APInt DemandedSubElts = DemandedElts.extractBits(numBits: NumSubElts, bitPosition: Idx);
3260 APInt DemandedSrcElts = DemandedElts;
3261 DemandedSrcElts.insertBits(SubBits: APInt::getZero(numBits: NumSubElts), bitPosition: Idx);
3262
3263 APInt SubUndef, SubZero;
3264 if (SimplifyDemandedVectorElts(Op: Sub, OriginalDemandedElts: DemandedSubElts, KnownUndef&: SubUndef, KnownZero&: SubZero, TLO,
3265 Depth: Depth + 1))
3266 return true;
3267
3268 // If none of the src operand elements are demanded, replace it with undef.
3269 if (!DemandedSrcElts && !Src.isUndef())
3270 return TLO.CombineTo(O: Op, N: TLO.DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT,
3271 N1: TLO.DAG.getUNDEF(VT), N2: Sub,
3272 N3: Op.getOperand(i: 2)));
3273
3274 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef, KnownZero,
3275 TLO, Depth: Depth + 1))
3276 return true;
3277 KnownUndef.insertBits(SubBits: SubUndef, bitPosition: Idx);
3278 KnownZero.insertBits(SubBits: SubZero, bitPosition: Idx);
3279
3280 // Attempt to avoid multi-use ops if we don't need anything from them.
3281 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3282 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3283 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3284 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3285 Op: Sub, DemandedElts: DemandedSubElts, DAG&: TLO.DAG, Depth: Depth + 1);
3286 if (NewSrc || NewSub) {
3287 NewSrc = NewSrc ? NewSrc : Src;
3288 NewSub = NewSub ? NewSub : Sub;
3289 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3290 N2: NewSub, N3: Op.getOperand(i: 2));
3291 return TLO.CombineTo(O: Op, N: NewOp);
3292 }
3293 }
3294 break;
3295 }
3296 case ISD::EXTRACT_SUBVECTOR: {
3297 // Offset the demanded elts by the subvector index.
3298 SDValue Src = Op.getOperand(i: 0);
3299 if (Src.getValueType().isScalableVector())
3300 break;
3301 uint64_t Idx = Op.getConstantOperandVal(i: 1);
3302 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3303 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts).shl(shiftAmt: Idx);
3304
3305 APInt SrcUndef, SrcZero;
3306 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3307 Depth: Depth + 1))
3308 return true;
3309 KnownUndef = SrcUndef.extractBits(numBits: NumElts, bitPosition: Idx);
3310 KnownZero = SrcZero.extractBits(numBits: NumElts, bitPosition: Idx);
3311
3312 // Attempt to avoid multi-use ops if we don't need anything from them.
3313 if (!DemandedElts.isAllOnes()) {
3314 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3315 Op: Src, DemandedElts: DemandedSrcElts, DAG&: TLO.DAG, Depth: Depth + 1);
3316 if (NewSrc) {
3317 SDValue NewOp = TLO.DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT, N1: NewSrc,
3318 N2: Op.getOperand(i: 1));
3319 return TLO.CombineTo(O: Op, N: NewOp);
3320 }
3321 }
3322 break;
3323 }
3324 case ISD::INSERT_VECTOR_ELT: {
3325 SDValue Vec = Op.getOperand(i: 0);
3326 SDValue Scl = Op.getOperand(i: 1);
3327 auto *CIdx = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 2));
3328
3329 // For a legal, constant insertion index, if we don't need this insertion
3330 // then strip it, else remove it from the demanded elts.
3331 if (CIdx && CIdx->getAPIntValue().ult(RHS: NumElts)) {
3332 unsigned Idx = CIdx->getZExtValue();
3333 if (!DemandedElts[Idx])
3334 return TLO.CombineTo(O: Op, N: Vec);
3335
3336 APInt DemandedVecElts(DemandedElts);
3337 DemandedVecElts.clearBit(BitPosition: Idx);
3338 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedVecElts, KnownUndef,
3339 KnownZero, TLO, Depth: Depth + 1))
3340 return true;
3341
3342 KnownUndef.setBitVal(BitPosition: Idx, BitValue: Scl.isUndef());
3343
3344 KnownZero.setBitVal(BitPosition: Idx, BitValue: isNullConstant(V: Scl) || isNullFPConstant(V: Scl));
3345 break;
3346 }
3347
3348 APInt VecUndef, VecZero;
3349 if (SimplifyDemandedVectorElts(Op: Vec, OriginalDemandedElts: DemandedElts, KnownUndef&: VecUndef, KnownZero&: VecZero, TLO,
3350 Depth: Depth + 1))
3351 return true;
3352 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3353 break;
3354 }
3355 case ISD::VSELECT: {
3356 SDValue Sel = Op.getOperand(i: 0);
3357 SDValue LHS = Op.getOperand(i: 1);
3358 SDValue RHS = Op.getOperand(i: 2);
3359
3360 // Try to transform the select condition based on the current demanded
3361 // elements.
3362 APInt UndefSel, ZeroSel;
3363 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3364 Depth: Depth + 1))
3365 return true;
3366
3367 // See if we can simplify either vselect operand.
3368 APInt DemandedLHS(DemandedElts);
3369 APInt DemandedRHS(DemandedElts);
3370 APInt UndefLHS, ZeroLHS;
3371 APInt UndefRHS, ZeroRHS;
3372 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3373 Depth: Depth + 1))
3374 return true;
3375 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3376 Depth: Depth + 1))
3377 return true;
3378
3379 KnownUndef = UndefLHS & UndefRHS;
3380 KnownZero = ZeroLHS & ZeroRHS;
3381
3382 // If we know that the selected element is always zero, we don't need the
3383 // select value element.
3384 APInt DemandedSel = DemandedElts & ~KnownZero;
3385 if (DemandedSel != DemandedElts)
3386 if (SimplifyDemandedVectorElts(Op: Sel, OriginalDemandedElts: DemandedSel, KnownUndef&: UndefSel, KnownZero&: ZeroSel, TLO,
3387 Depth: Depth + 1))
3388 return true;
3389
3390 break;
3391 }
3392 case ISD::VECTOR_SHUFFLE: {
3393 SDValue LHS = Op.getOperand(i: 0);
3394 SDValue RHS = Op.getOperand(i: 1);
3395 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Val&: Op)->getMask();
3396
3397 // Collect demanded elements from shuffle operands..
3398 APInt DemandedLHS(NumElts, 0);
3399 APInt DemandedRHS(NumElts, 0);
3400 for (unsigned i = 0; i != NumElts; ++i) {
3401 int M = ShuffleMask[i];
3402 if (M < 0 || !DemandedElts[i])
3403 continue;
3404 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3405 if (M < (int)NumElts)
3406 DemandedLHS.setBit(M);
3407 else
3408 DemandedRHS.setBit(M - NumElts);
3409 }
3410
3411 // See if we can simplify either shuffle operand.
3412 APInt UndefLHS, ZeroLHS;
3413 APInt UndefRHS, ZeroRHS;
3414 if (SimplifyDemandedVectorElts(Op: LHS, OriginalDemandedElts: DemandedLHS, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3415 Depth: Depth + 1))
3416 return true;
3417 if (SimplifyDemandedVectorElts(Op: RHS, OriginalDemandedElts: DemandedRHS, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3418 Depth: Depth + 1))
3419 return true;
3420
3421 // Simplify mask using undef elements from LHS/RHS.
3422 bool Updated = false;
3423 bool IdentityLHS = true, IdentityRHS = true;
3424 SmallVector<int, 32> NewMask(ShuffleMask);
3425 for (unsigned i = 0; i != NumElts; ++i) {
3426 int &M = NewMask[i];
3427 if (M < 0)
3428 continue;
3429 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3430 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3431 Updated = true;
3432 M = -1;
3433 }
3434 IdentityLHS &= (M < 0) || (M == (int)i);
3435 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3436 }
3437
3438 // Update legal shuffle masks based on demanded elements if it won't reduce
3439 // to Identity which can cause premature removal of the shuffle mask.
3440 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3441 SDValue LegalShuffle =
3442 buildLegalVectorShuffle(VT, DL, N0: LHS, N1: RHS, Mask: NewMask, DAG&: TLO.DAG);
3443 if (LegalShuffle)
3444 return TLO.CombineTo(O: Op, N: LegalShuffle);
3445 }
3446
3447 // Propagate undef/zero elements from LHS/RHS.
3448 for (unsigned i = 0; i != NumElts; ++i) {
3449 int M = ShuffleMask[i];
3450 if (M < 0) {
3451 KnownUndef.setBit(i);
3452 } else if (M < (int)NumElts) {
3453 if (UndefLHS[M])
3454 KnownUndef.setBit(i);
3455 if (ZeroLHS[M])
3456 KnownZero.setBit(i);
3457 } else {
3458 if (UndefRHS[M - NumElts])
3459 KnownUndef.setBit(i);
3460 if (ZeroRHS[M - NumElts])
3461 KnownZero.setBit(i);
3462 }
3463 }
3464 break;
3465 }
3466 case ISD::ANY_EXTEND_VECTOR_INREG:
3467 case ISD::SIGN_EXTEND_VECTOR_INREG:
3468 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3469 APInt SrcUndef, SrcZero;
3470 SDValue Src = Op.getOperand(i: 0);
3471 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3472 APInt DemandedSrcElts = DemandedElts.zext(width: NumSrcElts);
3473 if (SimplifyDemandedVectorElts(Op: Src, OriginalDemandedElts: DemandedSrcElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3474 Depth: Depth + 1))
3475 return true;
3476 KnownZero = SrcZero.zextOrTrunc(width: NumElts);
3477 KnownUndef = SrcUndef.zextOrTrunc(width: NumElts);
3478
3479 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3480 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3481 DemandedSrcElts == 1) {
3482 // aext - if we just need the bottom element then we can bitcast.
3483 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Src));
3484 }
3485
3486 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3487 // zext(undef) upper bits are guaranteed to be zero.
3488 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3489 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3490 KnownUndef.clearAllBits();
3491
3492 // zext - if we just need the bottom element then we can mask:
3493 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3494 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3495 Op->isOnlyUserOf(N: Src.getNode()) &&
3496 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3497 SDLoc DL(Op);
3498 EVT SrcVT = Src.getValueType();
3499 EVT SrcSVT = SrcVT.getScalarType();
3500 SmallVector<SDValue> MaskElts;
3501 MaskElts.push_back(Elt: TLO.DAG.getAllOnesConstant(DL, VT: SrcSVT));
3502 MaskElts.append(NumInputs: NumSrcElts - 1, Elt: TLO.DAG.getConstant(Val: 0, DL, VT: SrcSVT));
3503 SDValue Mask = TLO.DAG.getBuildVector(VT: SrcVT, DL, Ops: MaskElts);
3504 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3505 Opcode: ISD::AND, DL, VT: SrcVT, Ops: {Src.getOperand(i: 1), Mask})) {
3506 Fold = TLO.DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src.getOperand(i: 0), N2: Fold);
3507 return TLO.CombineTo(O: Op, N: TLO.DAG.getBitcast(VT, V: Fold));
3508 }
3509 }
3510 }
3511 break;
3512 }
3513
3514 // TODO: There are more binop opcodes that could be handled here - MIN,
3515 // MAX, saturated math, etc.
3516 case ISD::ADD: {
3517 SDValue Op0 = Op.getOperand(i: 0);
3518 SDValue Op1 = Op.getOperand(i: 1);
3519 if (Op0 == Op1 && Op->isOnlyUserOf(N: Op0.getNode())) {
3520 APInt UndefLHS, ZeroLHS;
3521 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3522 Depth: Depth + 1, /*AssumeSingleUse*/ true))
3523 return true;
3524 }
3525 [[fallthrough]];
3526 }
3527 case ISD::OR:
3528 case ISD::XOR:
3529 case ISD::SUB:
3530 case ISD::FADD:
3531 case ISD::FSUB:
3532 case ISD::FMUL:
3533 case ISD::FDIV:
3534 case ISD::FREM: {
3535 SDValue Op0 = Op.getOperand(i: 0);
3536 SDValue Op1 = Op.getOperand(i: 1);
3537
3538 APInt UndefRHS, ZeroRHS;
3539 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3540 Depth: Depth + 1))
3541 return true;
3542 APInt UndefLHS, ZeroLHS;
3543 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3544 Depth: Depth + 1))
3545 return true;
3546
3547 KnownZero = ZeroLHS & ZeroRHS;
3548 KnownUndef = getKnownUndefForVectorBinop(BO: Op, DAG&: TLO.DAG, UndefOp0: UndefLHS, UndefOp1: UndefRHS);
3549
3550 // Attempt to avoid multi-use ops if we don't need anything from them.
3551 // TODO - use KnownUndef to relax the demandedelts?
3552 if (!DemandedElts.isAllOnes())
3553 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3554 return true;
3555 break;
3556 }
3557 case ISD::SHL:
3558 case ISD::SRL:
3559 case ISD::SRA:
3560 case ISD::ROTL:
3561 case ISD::ROTR: {
3562 SDValue Op0 = Op.getOperand(i: 0);
3563 SDValue Op1 = Op.getOperand(i: 1);
3564
3565 APInt UndefRHS, ZeroRHS;
3566 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefRHS, KnownZero&: ZeroRHS, TLO,
3567 Depth: Depth + 1))
3568 return true;
3569 APInt UndefLHS, ZeroLHS;
3570 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts, KnownUndef&: UndefLHS, KnownZero&: ZeroLHS, TLO,
3571 Depth: Depth + 1))
3572 return true;
3573
3574 KnownZero = ZeroLHS;
3575 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3576
3577 // Attempt to avoid multi-use ops if we don't need anything from them.
3578 // TODO - use KnownUndef to relax the demandedelts?
3579 if (!DemandedElts.isAllOnes())
3580 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3581 return true;
3582 break;
3583 }
3584 case ISD::MUL:
3585 case ISD::MULHU:
3586 case ISD::MULHS:
3587 case ISD::AND: {
3588 SDValue Op0 = Op.getOperand(i: 0);
3589 SDValue Op1 = Op.getOperand(i: 1);
3590
3591 APInt SrcUndef, SrcZero;
3592 if (SimplifyDemandedVectorElts(Op: Op1, OriginalDemandedElts: DemandedElts, KnownUndef&: SrcUndef, KnownZero&: SrcZero, TLO,
3593 Depth: Depth + 1))
3594 return true;
3595 // If we know that a demanded element was zero in Op1 we don't need to
3596 // demand it in Op0 - its guaranteed to be zero.
3597 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3598 if (SimplifyDemandedVectorElts(Op: Op0, OriginalDemandedElts: DemandedElts0, KnownUndef, KnownZero,
3599 TLO, Depth: Depth + 1))
3600 return true;
3601
3602 KnownUndef &= DemandedElts0;
3603 KnownZero &= DemandedElts0;
3604
3605 // If every element pair has a zero/undef then just fold to zero.
3606 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3607 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3608 if (DemandedElts.isSubsetOf(RHS: SrcZero | KnownZero | SrcUndef | KnownUndef))
3609 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3610
3611 // If either side has a zero element, then the result element is zero, even
3612 // if the other is an UNDEF.
3613 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3614 // and then handle 'and' nodes with the rest of the binop opcodes.
3615 KnownZero |= SrcZero;
3616 KnownUndef &= SrcUndef;
3617 KnownUndef &= ~KnownZero;
3618
3619 // Attempt to avoid multi-use ops if we don't need anything from them.
3620 if (!DemandedElts.isAllOnes())
3621 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3622 return true;
3623 break;
3624 }
3625 case ISD::TRUNCATE:
3626 case ISD::SIGN_EXTEND:
3627 case ISD::ZERO_EXTEND:
3628 if (SimplifyDemandedVectorElts(Op: Op.getOperand(i: 0), OriginalDemandedElts: DemandedElts, KnownUndef,
3629 KnownZero, TLO, Depth: Depth + 1))
3630 return true;
3631
3632 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3633 // zext(undef) upper bits are guaranteed to be zero.
3634 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3635 return TLO.CombineTo(O: Op, N: TLO.DAG.getConstant(Val: 0, DL: SDLoc(Op), VT));
3636 KnownUndef.clearAllBits();
3637 }
3638 break;
3639 default: {
3640 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3641 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3642 KnownZero, TLO, Depth))
3643 return true;
3644 } else {
3645 KnownBits Known;
3646 APInt DemandedBits = APInt::getAllOnes(numBits: EltSizeInBits);
3647 if (SimplifyDemandedBits(Op, OriginalDemandedBits: DemandedBits, OriginalDemandedElts, Known,
3648 TLO, Depth, AssumeSingleUse))
3649 return true;
3650 }
3651 break;
3652 }
3653 }
3654 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3655
3656 // Constant fold all undef cases.
3657 // TODO: Handle zero cases as well.
3658 if (DemandedElts.isSubsetOf(RHS: KnownUndef))
3659 return TLO.CombineTo(O: Op, N: TLO.DAG.getUNDEF(VT));
3660
3661 return false;
3662}
3663
3664/// Determine which of the bits specified in Mask are known to be either zero or
3665/// one and return them in the Known.
3666void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3667 KnownBits &Known,
3668 const APInt &DemandedElts,
3669 const SelectionDAG &DAG,
3670 unsigned Depth) const {
3671 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3672 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3673 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3674 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3675 "Should use MaskedValueIsZero if you don't know whether Op"
3676 " is a target node!");
3677 Known.resetAll();
3678}
3679
3680void TargetLowering::computeKnownBitsForTargetInstr(
3681 GISelKnownBits &Analysis, Register R, KnownBits &Known,
3682 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3683 unsigned Depth) const {
3684 Known.resetAll();
3685}
3686
3687void TargetLowering::computeKnownBitsForFrameIndex(
3688 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3689 // The low bits are known zero if the pointer is aligned.
3690 Known.Zero.setLowBits(Log2(A: MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx)));
3691}
3692
3693Align TargetLowering::computeKnownAlignForTargetInstr(
3694 GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3695 unsigned Depth) const {
3696 return Align(1);
3697}
3698
3699/// This method can be implemented by targets that want to expose additional
3700/// information about sign bits to the DAG Combiner.
3701unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3702 const APInt &,
3703 const SelectionDAG &,
3704 unsigned Depth) const {
3705 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3706 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3707 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3708 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3709 "Should use ComputeNumSignBits if you don't know whether Op"
3710 " is a target node!");
3711 return 1;
3712}
3713
3714unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3715 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3716 const MachineRegisterInfo &MRI, unsigned Depth) const {
3717 return 1;
3718}
3719
3720bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3721 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3722 TargetLoweringOpt &TLO, unsigned Depth) const {
3723 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3724 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3725 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3726 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3727 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3728 " is a target node!");
3729 return false;
3730}
3731
3732bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3733 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3734 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3735 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3736 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3737 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3738 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3739 "Should use SimplifyDemandedBits if you don't know whether Op"
3740 " is a target node!");
3741 computeKnownBitsForTargetNode(Op, Known, DemandedElts, DAG: TLO.DAG, Depth);
3742 return false;
3743}
3744
3745SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3746 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3747 SelectionDAG &DAG, unsigned Depth) const {
3748 assert(
3749 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3754 " is a target node!");
3755 return SDValue();
3756}
3757
3758SDValue
3759TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3760 SDValue N1, MutableArrayRef<int> Mask,
3761 SelectionDAG &DAG) const {
3762 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3763 if (!LegalMask) {
3764 std::swap(a&: N0, b&: N1);
3765 ShuffleVectorSDNode::commuteMask(Mask);
3766 LegalMask = isShuffleMaskLegal(Mask, VT);
3767 }
3768
3769 if (!LegalMask)
3770 return SDValue();
3771
3772 return DAG.getVectorShuffle(VT, dl: DL, N1: N0, N2: N1, Mask);
3773}
3774
3775const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3776 return nullptr;
3777}
3778
3779bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3780 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3781 bool PoisonOnly, unsigned Depth) const {
3782 assert(
3783 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3784 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3785 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3786 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3787 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3788 " is a target node!");
3789 return false;
3790}
3791
3792bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3793 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3794 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3795 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3796 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3797 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3798 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3799 "Should use canCreateUndefOrPoison if you don't know whether Op"
3800 " is a target node!");
3801 // Be conservative and return true.
3802 return true;
3803}
3804
3805bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3806 const SelectionDAG &DAG,
3807 bool SNaN,
3808 unsigned Depth) const {
3809 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3810 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3811 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3812 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813 "Should use isKnownNeverNaN if you don't know whether Op"
3814 " is a target node!");
3815 return false;
3816}
3817
3818bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3819 const APInt &DemandedElts,
3820 APInt &UndefElts,
3821 const SelectionDAG &DAG,
3822 unsigned Depth) const {
3823 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3824 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3825 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3826 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3827 "Should use isSplatValue if you don't know whether Op"
3828 " is a target node!");
3829 return false;
3830}
3831
3832// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3833// work with truncating build vectors and vectors with elements of less than
3834// 8 bits.
3835bool TargetLowering::isConstTrueVal(SDValue N) const {
3836 if (!N)
3837 return false;
3838
3839 unsigned EltWidth;
3840 APInt CVal;
3841 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3842 /*AllowTruncation=*/true)) {
3843 CVal = CN->getAPIntValue();
3844 EltWidth = N.getValueType().getScalarSizeInBits();
3845 } else
3846 return false;
3847
3848 // If this is a truncating splat, truncate the splat value.
3849 // Otherwise, we may fail to match the expected values below.
3850 if (EltWidth < CVal.getBitWidth())
3851 CVal = CVal.trunc(width: EltWidth);
3852
3853 switch (getBooleanContents(Type: N.getValueType())) {
3854 case UndefinedBooleanContent:
3855 return CVal[0];
3856 case ZeroOrOneBooleanContent:
3857 return CVal.isOne();
3858 case ZeroOrNegativeOneBooleanContent:
3859 return CVal.isAllOnes();
3860 }
3861
3862 llvm_unreachable("Invalid boolean contents");
3863}
3864
3865bool TargetLowering::isConstFalseVal(SDValue N) const {
3866 if (!N)
3867 return false;
3868
3869 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val&: N);
3870 if (!CN) {
3871 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Val&: N);
3872 if (!BV)
3873 return false;
3874
3875 // Only interested in constant splats, we don't care about undef
3876 // elements in identifying boolean constants and getConstantSplatNode
3877 // returns NULL if all ops are undef;
3878 CN = BV->getConstantSplatNode();
3879 if (!CN)
3880 return false;
3881 }
3882
3883 if (getBooleanContents(Type: N->getValueType(ResNo: 0)) == UndefinedBooleanContent)
3884 return !CN->getAPIntValue()[0];
3885
3886 return CN->isZero();
3887}
3888
3889bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3890 bool SExt) const {
3891 if (VT == MVT::i1)
3892 return N->isOne();
3893
3894 TargetLowering::BooleanContent Cnt = getBooleanContents(Type: VT);
3895 switch (Cnt) {
3896 case TargetLowering::ZeroOrOneBooleanContent:
3897 // An extended value of 1 is always true, unless its original type is i1,
3898 // in which case it will be sign extended to -1.
3899 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3900 case TargetLowering::UndefinedBooleanContent:
3901 case TargetLowering::ZeroOrNegativeOneBooleanContent:
3902 return N->isAllOnes() && SExt;
3903 }
3904 llvm_unreachable("Unexpected enumeration.");
3905}
3906
3907/// This helper function of SimplifySetCC tries to optimize the comparison when
3908/// either operand of the SetCC node is a bitwise-and instruction.
3909SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3910 ISD::CondCode Cond, const SDLoc &DL,
3911 DAGCombinerInfo &DCI) const {
3912 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3913 std::swap(a&: N0, b&: N1);
3914
3915 SelectionDAG &DAG = DCI.DAG;
3916 EVT OpVT = N0.getValueType();
3917 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3918 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3919 return SDValue();
3920
3921 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3922 // iff everything but LSB is known zero:
3923 if (Cond == ISD::SETNE && isNullConstant(V: N1) &&
3924 (getBooleanContents(Type: OpVT) == TargetLowering::UndefinedBooleanContent ||
3925 getBooleanContents(Type: OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3926 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3927 APInt UpperBits = APInt::getHighBitsSet(numBits: NumEltBits, hiBitsSet: NumEltBits - 1);
3928 if (DAG.MaskedValueIsZero(Op: N0, Mask: UpperBits))
3929 return DAG.getBoolExtOrTrunc(Op: N0, SL: DL, VT, OpVT);
3930 }
3931
3932 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3933 // test in a narrow type that we can truncate to with no cost. Examples:
3934 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3935 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3936 // TODO: This conservatively checks for type legality on the source and
3937 // destination types. That may inhibit optimizations, but it also
3938 // allows setcc->shift transforms that may be more beneficial.
3939 auto *AndC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1));
3940 if (AndC && isNullConstant(V: N1) && AndC->getAPIntValue().isPowerOf2() &&
3941 isTypeLegal(VT: OpVT) && N0.hasOneUse()) {
3942 EVT NarrowVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
3943 BitWidth: AndC->getAPIntValue().getActiveBits());
3944 if (isTruncateFree(FromVT: OpVT, ToVT: NarrowVT) && isTypeLegal(VT: NarrowVT)) {
3945 SDValue Trunc = DAG.getZExtOrTrunc(Op: N0.getOperand(i: 0), DL, VT: NarrowVT);
3946 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: NarrowVT);
3947 return DAG.getSetCC(DL, VT, LHS: Trunc, RHS: Zero,
3948 Cond: Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3949 }
3950 }
3951
3952 // Match these patterns in any of their permutations:
3953 // (X & Y) == Y
3954 // (X & Y) != Y
3955 SDValue X, Y;
3956 if (N0.getOperand(i: 0) == N1) {
3957 X = N0.getOperand(i: 1);
3958 Y = N0.getOperand(i: 0);
3959 } else if (N0.getOperand(i: 1) == N1) {
3960 X = N0.getOperand(i: 0);
3961 Y = N0.getOperand(i: 1);
3962 } else {
3963 return SDValue();
3964 }
3965
3966 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3967 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3968 // its liable to create and infinite loop.
3969 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: OpVT);
3970 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3971 DAG.isKnownToBeAPowerOfTwo(Val: Y)) {
3972 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3973 // Note that where Y is variable and is known to have at most one bit set
3974 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3975 // equivalent when Y == 0.
3976 assert(OpVT.isInteger());
3977 Cond = ISD::getSetCCInverse(Operation: Cond, Type: OpVT);
3978 if (DCI.isBeforeLegalizeOps() ||
3979 isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType()))
3980 return DAG.getSetCC(DL, VT, LHS: N0, RHS: Zero, Cond);
3981 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3982 // If the target supports an 'and-not' or 'and-complement' logic operation,
3983 // try to use that to make a comparison operation more efficient.
3984 // But don't do this transform if the mask is a single bit because there are
3985 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3986 // 'rlwinm' on PPC).
3987
3988 // Bail out if the compare operand that we want to turn into a zero is
3989 // already a zero (otherwise, infinite loop).
3990 if (isNullConstant(V: Y))
3991 return SDValue();
3992
3993 // Transform this into: ~X & Y == 0.
3994 SDValue NotX = DAG.getNOT(DL: SDLoc(X), Val: X, VT: OpVT);
3995 SDValue NewAnd = DAG.getNode(Opcode: ISD::AND, DL: SDLoc(N0), VT: OpVT, N1: NotX, N2: Y);
3996 return DAG.getSetCC(DL, VT, LHS: NewAnd, RHS: Zero, Cond);
3997 }
3998
3999 return SDValue();
4000}
4001
4002/// There are multiple IR patterns that could be checking whether certain
4003/// truncation of a signed number would be lossy or not. The pattern which is
4004/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4005/// We are looking for the following pattern: (KeptBits is a constant)
4006/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4007/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4008/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4009/// We will unfold it into the natural trunc+sext pattern:
4010/// ((%x << C) a>> C) dstcond %x
4011/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4012SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4013 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4014 const SDLoc &DL) const {
4015 // We must be comparing with a constant.
4016 ConstantSDNode *C1;
4017 if (!(C1 = dyn_cast<ConstantSDNode>(Val&: N1)))
4018 return SDValue();
4019
4020 // N0 should be: add %x, (1 << (KeptBits-1))
4021 if (N0->getOpcode() != ISD::ADD)
4022 return SDValue();
4023
4024 // And we must be 'add'ing a constant.
4025 ConstantSDNode *C01;
4026 if (!(C01 = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1))))
4027 return SDValue();
4028
4029 SDValue X = N0->getOperand(Num: 0);
4030 EVT XVT = X.getValueType();
4031
4032 // Validate constants ...
4033
4034 APInt I1 = C1->getAPIntValue();
4035
4036 ISD::CondCode NewCond;
4037 if (Cond == ISD::CondCode::SETULT) {
4038 NewCond = ISD::CondCode::SETEQ;
4039 } else if (Cond == ISD::CondCode::SETULE) {
4040 NewCond = ISD::CondCode::SETEQ;
4041 // But need to 'canonicalize' the constant.
4042 I1 += 1;
4043 } else if (Cond == ISD::CondCode::SETUGT) {
4044 NewCond = ISD::CondCode::SETNE;
4045 // But need to 'canonicalize' the constant.
4046 I1 += 1;
4047 } else if (Cond == ISD::CondCode::SETUGE) {
4048 NewCond = ISD::CondCode::SETNE;
4049 } else
4050 return SDValue();
4051
4052 APInt I01 = C01->getAPIntValue();
4053
4054 auto checkConstants = [&I1, &I01]() -> bool {
4055 // Both of them must be power-of-two, and the constant from setcc is bigger.
4056 return I1.ugt(RHS: I01) && I1.isPowerOf2() && I01.isPowerOf2();
4057 };
4058
4059 if (checkConstants()) {
4060 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4061 } else {
4062 // What if we invert constants? (and the target predicate)
4063 I1.negate();
4064 I01.negate();
4065 assert(XVT.isInteger());
4066 NewCond = getSetCCInverse(Operation: NewCond, Type: XVT);
4067 if (!checkConstants())
4068 return SDValue();
4069 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4070 }
4071
4072 // They are power-of-two, so which bit is set?
4073 const unsigned KeptBits = I1.logBase2();
4074 const unsigned KeptBitsMinusOne = I01.logBase2();
4075
4076 // Magic!
4077 if (KeptBits != (KeptBitsMinusOne + 1))
4078 return SDValue();
4079 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4080
4081 // We don't want to do this in every single case.
4082 SelectionDAG &DAG = DCI.DAG;
4083 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4084 XVT, KeptBits))
4085 return SDValue();
4086
4087 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
4088 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
4089
4090 // Unfold into: ((%x << C) a>> C) cond %x
4091 // Where 'cond' will be either 'eq' or 'ne'.
4092 SDValue ShiftAmt = DAG.getConstant(Val: MaskedBits, DL, VT: XVT);
4093 SDValue T0 = DAG.getNode(Opcode: ISD::SHL, DL, VT: XVT, N1: X, N2: ShiftAmt);
4094 SDValue T1 = DAG.getNode(Opcode: ISD::SRA, DL, VT: XVT, N1: T0, N2: ShiftAmt);
4095 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: X, Cond: NewCond);
4096
4097 return T2;
4098}
4099
4100// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4101SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4102 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4103 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4104 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4105 "Should be a comparison with 0.");
4106 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4107 "Valid only for [in]equality comparisons.");
4108
4109 unsigned NewShiftOpcode;
4110 SDValue X, C, Y;
4111
4112 SelectionDAG &DAG = DCI.DAG;
4113 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4114
4115 // Look for '(C l>>/<< Y)'.
4116 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4117 // The shift should be one-use.
4118 if (!V.hasOneUse())
4119 return false;
4120 unsigned OldShiftOpcode = V.getOpcode();
4121 switch (OldShiftOpcode) {
4122 case ISD::SHL:
4123 NewShiftOpcode = ISD::SRL;
4124 break;
4125 case ISD::SRL:
4126 NewShiftOpcode = ISD::SHL;
4127 break;
4128 default:
4129 return false; // must be a logical shift.
4130 }
4131 // We should be shifting a constant.
4132 // FIXME: best to use isConstantOrConstantVector().
4133 C = V.getOperand(i: 0);
4134 ConstantSDNode *CC =
4135 isConstOrConstSplat(N: C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4136 if (!CC)
4137 return false;
4138 Y = V.getOperand(i: 1);
4139
4140 ConstantSDNode *XC =
4141 isConstOrConstSplat(N: X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4142 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4143 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4144 };
4145
4146 // LHS of comparison should be an one-use 'and'.
4147 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4148 return SDValue();
4149
4150 X = N0.getOperand(i: 0);
4151 SDValue Mask = N0.getOperand(i: 1);
4152
4153 // 'and' is commutative!
4154 if (!Match(Mask)) {
4155 std::swap(a&: X, b&: Mask);
4156 if (!Match(Mask))
4157 return SDValue();
4158 }
4159
4160 EVT VT = X.getValueType();
4161
4162 // Produce:
4163 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4164 SDValue T0 = DAG.getNode(Opcode: NewShiftOpcode, DL, VT, N1: X, N2: Y);
4165 SDValue T1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: T0, N2: C);
4166 SDValue T2 = DAG.getSetCC(DL, VT: SCCVT, LHS: T1, RHS: N1C, Cond);
4167 return T2;
4168}
4169
4170/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4171/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4172/// handle the commuted versions of these patterns.
4173SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4174 ISD::CondCode Cond, const SDLoc &DL,
4175 DAGCombinerInfo &DCI) const {
4176 unsigned BOpcode = N0.getOpcode();
4177 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4178 "Unexpected binop");
4179 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4180
4181 // (X + Y) == X --> Y == 0
4182 // (X - Y) == X --> Y == 0
4183 // (X ^ Y) == X --> Y == 0
4184 SelectionDAG &DAG = DCI.DAG;
4185 EVT OpVT = N0.getValueType();
4186 SDValue X = N0.getOperand(i: 0);
4187 SDValue Y = N0.getOperand(i: 1);
4188 if (X == N1)
4189 return DAG.getSetCC(DL, VT, LHS: Y, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4190
4191 if (Y != N1)
4192 return SDValue();
4193
4194 // (X + Y) == Y --> X == 0
4195 // (X ^ Y) == Y --> X == 0
4196 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4197 return DAG.getSetCC(DL, VT, LHS: X, RHS: DAG.getConstant(Val: 0, DL, VT: OpVT), Cond);
4198
4199 // The shift would not be valid if the operands are boolean (i1).
4200 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4201 return SDValue();
4202
4203 // (X - Y) == Y --> X == Y << 1
4204 SDValue One =
4205 DAG.getShiftAmountConstant(Val: 1, VT: OpVT, DL, LegalTypes: !DCI.isBeforeLegalize());
4206 SDValue YShl1 = DAG.getNode(Opcode: ISD::SHL, DL, VT: N1.getValueType(), N1: Y, N2: One);
4207 if (!DCI.isCalledByLegalizer())
4208 DCI.AddToWorklist(N: YShl1.getNode());
4209 return DAG.getSetCC(DL, VT, LHS: X, RHS: YShl1, Cond);
4210}
4211
4212static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4213 SDValue N0, const APInt &C1,
4214 ISD::CondCode Cond, const SDLoc &dl,
4215 SelectionDAG &DAG) {
4216 // Look through truncs that don't change the value of a ctpop.
4217 // FIXME: Add vector support? Need to be careful with setcc result type below.
4218 SDValue CTPOP = N0;
4219 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4220 N0.getScalarValueSizeInBits() > Log2_32(Value: N0.getOperand(i: 0).getScalarValueSizeInBits()))
4221 CTPOP = N0.getOperand(i: 0);
4222
4223 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4224 return SDValue();
4225
4226 EVT CTVT = CTPOP.getValueType();
4227 SDValue CTOp = CTPOP.getOperand(i: 0);
4228
4229 // Expand a power-of-2-or-zero comparison based on ctpop:
4230 // (ctpop x) u< 2 -> (x & x-1) == 0
4231 // (ctpop x) u> 1 -> (x & x-1) != 0
4232 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4233 // Keep the CTPOP if it is a cheap vector op.
4234 if (CTVT.isVector() && TLI.isCtpopFast(VT: CTVT))
4235 return SDValue();
4236
4237 unsigned CostLimit = TLI.getCustomCtpopCost(VT: CTVT, Cond);
4238 if (C1.ugt(RHS: CostLimit + (Cond == ISD::SETULT)))
4239 return SDValue();
4240 if (C1 == 0 && (Cond == ISD::SETULT))
4241 return SDValue(); // This is handled elsewhere.
4242
4243 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4244
4245 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4246 SDValue Result = CTOp;
4247 for (unsigned i = 0; i < Passes; i++) {
4248 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: Result, N2: NegOne);
4249 Result = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: Result, N2: Add);
4250 }
4251 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4252 return DAG.getSetCC(DL: dl, VT, LHS: Result, RHS: DAG.getConstant(Val: 0, DL: dl, VT: CTVT), Cond: CC);
4253 }
4254
4255 // Expand a power-of-2 comparison based on ctpop
4256 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4257 // Keep the CTPOP if it is cheap.
4258 if (TLI.isCtpopFast(VT: CTVT))
4259 return SDValue();
4260
4261 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: CTVT);
4262 SDValue NegOne = DAG.getAllOnesConstant(DL: dl, VT: CTVT);
4263 assert(CTVT.isInteger());
4264 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: CTVT, N1: CTOp, N2: NegOne);
4265
4266 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4267 // check before emitting a potentially unnecessary op.
4268 if (DAG.isKnownNeverZero(Op: CTOp)) {
4269 // (ctpop x) == 1 --> (x & x-1) == 0
4270 // (ctpop x) != 1 --> (x & x-1) != 0
4271 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4272 SDValue RHS = DAG.getSetCC(DL: dl, VT, LHS: And, RHS: Zero, Cond);
4273 return RHS;
4274 }
4275
4276 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4277 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4278 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: CTVT, N1: CTOp, N2: Add);
4279 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4280 return DAG.getSetCC(DL: dl, VT, LHS: Xor, RHS: Add, Cond: CmpCond);
4281 }
4282
4283 return SDValue();
4284}
4285
4286static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4287 ISD::CondCode Cond, const SDLoc &dl,
4288 SelectionDAG &DAG) {
4289 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4290 return SDValue();
4291
4292 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4293 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4294 return SDValue();
4295
4296 auto getRotateSource = [](SDValue X) {
4297 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4298 return X.getOperand(i: 0);
4299 return SDValue();
4300 };
4301
4302 // Peek through a rotated value compared against 0 or -1:
4303 // (rot X, Y) == 0/-1 --> X == 0/-1
4304 // (rot X, Y) != 0/-1 --> X != 0/-1
4305 if (SDValue R = getRotateSource(N0))
4306 return DAG.getSetCC(DL: dl, VT, LHS: R, RHS: N1, Cond);
4307
4308 // Peek through an 'or' of a rotated value compared against 0:
4309 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4310 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4311 //
4312 // TODO: Add the 'and' with -1 sibling.
4313 // TODO: Recurse through a series of 'or' ops to find the rotate.
4314 EVT OpVT = N0.getValueType();
4315 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4316 if (SDValue R = getRotateSource(N0.getOperand(i: 0))) {
4317 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 1));
4318 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4319 }
4320 if (SDValue R = getRotateSource(N0.getOperand(i: 1))) {
4321 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: R, N2: N0.getOperand(i: 0));
4322 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4323 }
4324 }
4325
4326 return SDValue();
4327}
4328
4329static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4330 ISD::CondCode Cond, const SDLoc &dl,
4331 SelectionDAG &DAG) {
4332 // If we are testing for all-bits-clear, we might be able to do that with
4333 // less shifting since bit-order does not matter.
4334 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4335 return SDValue();
4336
4337 auto *C1 = isConstOrConstSplat(N: N1, /* AllowUndefs */ true);
4338 if (!C1 || !C1->isZero())
4339 return SDValue();
4340
4341 if (!N0.hasOneUse() ||
4342 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4343 return SDValue();
4344
4345 unsigned BitWidth = N0.getScalarValueSizeInBits();
4346 auto *ShAmtC = isConstOrConstSplat(N: N0.getOperand(i: 2));
4347 if (!ShAmtC || ShAmtC->getAPIntValue().uge(RHS: BitWidth))
4348 return SDValue();
4349
4350 // Canonicalize fshr as fshl to reduce pattern-matching.
4351 unsigned ShAmt = ShAmtC->getZExtValue();
4352 if (N0.getOpcode() == ISD::FSHR)
4353 ShAmt = BitWidth - ShAmt;
4354
4355 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4356 SDValue X, Y;
4357 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4358 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4359 return false;
4360 if (Or.getOperand(i: 0) == Other) {
4361 X = Or.getOperand(i: 0);
4362 Y = Or.getOperand(i: 1);
4363 return true;
4364 }
4365 if (Or.getOperand(i: 1) == Other) {
4366 X = Or.getOperand(i: 1);
4367 Y = Or.getOperand(i: 0);
4368 return true;
4369 }
4370 return false;
4371 };
4372
4373 EVT OpVT = N0.getValueType();
4374 EVT ShAmtVT = N0.getOperand(i: 2).getValueType();
4375 SDValue F0 = N0.getOperand(i: 0);
4376 SDValue F1 = N0.getOperand(i: 1);
4377 if (matchOr(F0, F1)) {
4378 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4379 SDValue NewShAmt = DAG.getConstant(Val: ShAmt, DL: dl, VT: ShAmtVT);
4380 SDValue Shift = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4381 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4382 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4383 }
4384 if (matchOr(F1, F0)) {
4385 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4386 SDValue NewShAmt = DAG.getConstant(Val: BitWidth - ShAmt, DL: dl, VT: ShAmtVT);
4387 SDValue Shift = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: OpVT, N1: Y, N2: NewShAmt);
4388 SDValue NewOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: Shift, N2: X);
4389 return DAG.getSetCC(DL: dl, VT, LHS: NewOr, RHS: N1, Cond);
4390 }
4391
4392 return SDValue();
4393}
4394
4395/// Try to simplify a setcc built with the specified operands and cc. If it is
4396/// unable to simplify it, return a null SDValue.
4397SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4398 ISD::CondCode Cond, bool foldBooleans,
4399 DAGCombinerInfo &DCI,
4400 const SDLoc &dl) const {
4401 SelectionDAG &DAG = DCI.DAG;
4402 const DataLayout &Layout = DAG.getDataLayout();
4403 EVT OpVT = N0.getValueType();
4404 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4405
4406 // Constant fold or commute setcc.
4407 if (SDValue Fold = DAG.FoldSetCC(VT, N1: N0, N2: N1, Cond, dl))
4408 return Fold;
4409
4410 bool N0ConstOrSplat =
4411 isConstOrConstSplat(N: N0, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4412 bool N1ConstOrSplat =
4413 isConstOrConstSplat(N: N1, /*AllowUndefs*/ false, /*AllowTruncate*/ AllowTruncation: true);
4414
4415 // Canonicalize toward having the constant on the RHS.
4416 // TODO: Handle non-splat vector constants. All undef causes trouble.
4417 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4418 // infinite loop here when we encounter one.
4419 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Operation: Cond);
4420 if (N0ConstOrSplat && !N1ConstOrSplat &&
4421 (DCI.isBeforeLegalizeOps() ||
4422 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())))
4423 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4424
4425 // If we have a subtract with the same 2 non-constant operands as this setcc
4426 // -- but in reverse order -- then try to commute the operands of this setcc
4427 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4428 // instruction on some targets.
4429 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4430 (DCI.isBeforeLegalizeOps() ||
4431 isCondCodeLegal(CC: SwappedCC, VT: N0.getSimpleValueType())) &&
4432 DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N1, N0}) &&
4433 !DAG.doesNodeExist(Opcode: ISD::SUB, VTList: DAG.getVTList(VT: OpVT), Ops: {N0, N1}))
4434 return DAG.getSetCC(DL: dl, VT, LHS: N1, RHS: N0, Cond: SwappedCC);
4435
4436 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4437 return V;
4438
4439 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4440 return V;
4441
4442 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4443 const APInt &C1 = N1C->getAPIntValue();
4444
4445 // Optimize some CTPOP cases.
4446 if (SDValue V = simplifySetCCWithCTPOP(TLI: *this, VT, N0, C1, Cond, dl, DAG))
4447 return V;
4448
4449 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4450 // X * Y == 0 --> (X == 0) || (Y == 0)
4451 // X * Y != 0 --> (X != 0) && (Y != 0)
4452 // TODO: This bails out if minsize is set, but if the target doesn't have a
4453 // single instruction multiply for this type, it would likely be
4454 // smaller to decompose.
4455 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4456 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4457 (N0->getFlags().hasNoUnsignedWrap() ||
4458 N0->getFlags().hasNoSignedWrap()) &&
4459 !Attr.hasFnAttr(Attribute::MinSize)) {
4460 SDValue IsXZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4461 SDValue IsYZero = DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1, Cond);
4462 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4463 return DAG.getNode(Opcode: LogicOp, DL: dl, VT, N1: IsXZero, N2: IsYZero);
4464 }
4465
4466 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4467 // equality comparison, then we're just comparing whether X itself is
4468 // zero.
4469 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4470 N0.getOperand(i: 0).getOpcode() == ISD::CTLZ &&
4471 llvm::has_single_bit<uint32_t>(Value: N0.getScalarValueSizeInBits())) {
4472 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N: N0.getOperand(i: 1))) {
4473 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4474 ShAmt->getAPIntValue() == Log2_32(Value: N0.getScalarValueSizeInBits())) {
4475 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4476 // (srl (ctlz x), 5) == 0 -> X != 0
4477 // (srl (ctlz x), 5) != 1 -> X != 0
4478 Cond = ISD::SETNE;
4479 } else {
4480 // (srl (ctlz x), 5) != 0 -> X == 0
4481 // (srl (ctlz x), 5) == 1 -> X == 0
4482 Cond = ISD::SETEQ;
4483 }
4484 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: N0.getValueType());
4485 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0).getOperand(i: 0), RHS: Zero,
4486 Cond);
4487 }
4488 }
4489 }
4490 }
4491
4492 // FIXME: Support vectors.
4493 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
4494 const APInt &C1 = N1C->getAPIntValue();
4495
4496 // (zext x) == C --> x == (trunc C)
4497 // (sext x) == C --> x == (trunc C)
4498 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4499 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4500 unsigned MinBits = N0.getValueSizeInBits();
4501 SDValue PreExt;
4502 bool Signed = false;
4503 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4504 // ZExt
4505 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4506 PreExt = N0->getOperand(Num: 0);
4507 } else if (N0->getOpcode() == ISD::AND) {
4508 // DAGCombine turns costly ZExts into ANDs
4509 if (auto *C = dyn_cast<ConstantSDNode>(Val: N0->getOperand(Num: 1)))
4510 if ((C->getAPIntValue()+1).isPowerOf2()) {
4511 MinBits = C->getAPIntValue().countr_one();
4512 PreExt = N0->getOperand(Num: 0);
4513 }
4514 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4515 // SExt
4516 MinBits = N0->getOperand(Num: 0).getValueSizeInBits();
4517 PreExt = N0->getOperand(Num: 0);
4518 Signed = true;
4519 } else if (auto *LN0 = dyn_cast<LoadSDNode>(Val&: N0)) {
4520 // ZEXTLOAD / SEXTLOAD
4521 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4522 MinBits = LN0->getMemoryVT().getSizeInBits();
4523 PreExt = N0;
4524 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4525 Signed = true;
4526 MinBits = LN0->getMemoryVT().getSizeInBits();
4527 PreExt = N0;
4528 }
4529 }
4530
4531 // Figure out how many bits we need to preserve this constant.
4532 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4533
4534 // Make sure we're not losing bits from the constant.
4535 if (MinBits > 0 &&
4536 MinBits < C1.getBitWidth() &&
4537 MinBits >= ReqdBits) {
4538 EVT MinVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MinBits);
4539 if (isTypeDesirableForOp(ISD::SETCC, VT: MinVT)) {
4540 // Will get folded away.
4541 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MinVT, Operand: PreExt);
4542 if (MinBits == 1 && C1 == 1)
4543 // Invert the condition.
4544 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4545 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4546 SDValue C = DAG.getConstant(Val: C1.trunc(width: MinBits), DL: dl, VT: MinVT);
4547 return DAG.getSetCC(DL: dl, VT, LHS: Trunc, RHS: C, Cond);
4548 }
4549
4550 // If truncating the setcc operands is not desirable, we can still
4551 // simplify the expression in some cases:
4552 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4553 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4554 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4555 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4556 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4557 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4558 SDValue TopSetCC = N0->getOperand(Num: 0);
4559 unsigned N0Opc = N0->getOpcode();
4560 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4561 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4562 TopSetCC.getOpcode() == ISD::SETCC &&
4563 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4564 (isConstFalseVal(N1) ||
4565 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4566
4567 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4568 (!N1C->isZero() && Cond == ISD::SETNE);
4569
4570 if (!Inverse)
4571 return TopSetCC;
4572
4573 ISD::CondCode InvCond = ISD::getSetCCInverse(
4574 Operation: cast<CondCodeSDNode>(Val: TopSetCC.getOperand(i: 2))->get(),
4575 Type: TopSetCC.getOperand(i: 0).getValueType());
4576 return DAG.getSetCC(DL: dl, VT, LHS: TopSetCC.getOperand(i: 0),
4577 RHS: TopSetCC.getOperand(i: 1),
4578 Cond: InvCond);
4579 }
4580 }
4581 }
4582
4583 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4584 // equality or unsigned, and all 1 bits of the const are in the same
4585 // partial word, see if we can shorten the load.
4586 if (DCI.isBeforeLegalize() &&
4587 !ISD::isSignedIntSetCC(Code: Cond) &&
4588 N0.getOpcode() == ISD::AND && C1 == 0 &&
4589 N0.getNode()->hasOneUse() &&
4590 isa<LoadSDNode>(Val: N0.getOperand(i: 0)) &&
4591 N0.getOperand(i: 0).getNode()->hasOneUse() &&
4592 isa<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
4593 LoadSDNode *Lod = cast<LoadSDNode>(Val: N0.getOperand(i: 0));
4594 APInt bestMask;
4595 unsigned bestWidth = 0, bestOffset = 0;
4596 if (Lod->isSimple() && Lod->isUnindexed()) {
4597 unsigned origWidth = N0.getValueSizeInBits();
4598 unsigned maskWidth = origWidth;
4599 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4600 // 8 bits, but have to be careful...
4601 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4602 origWidth = Lod->getMemoryVT().getSizeInBits();
4603 const APInt &Mask = N0.getConstantOperandAPInt(i: 1);
4604 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4605 APInt newMask = APInt::getLowBitsSet(numBits: maskWidth, loBitsSet: width);
4606 for (unsigned offset=0; offset<origWidth/width; offset++) {
4607 if (Mask.isSubsetOf(RHS: newMask)) {
4608 if (Layout.isLittleEndian())
4609 bestOffset = (uint64_t)offset * (width/8);
4610 else
4611 bestOffset = (origWidth/width - offset - 1) * (width/8);
4612 bestMask = Mask.lshr(shiftAmt: offset * (width/8) * 8);
4613 bestWidth = width;
4614 break;
4615 }
4616 newMask <<= width;
4617 }
4618 }
4619 }
4620 if (bestWidth) {
4621 EVT newVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: bestWidth);
4622 if (newVT.isRound() &&
4623 shouldReduceLoadWidth(Load: Lod, ExtTy: ISD::NON_EXTLOAD, NewVT: newVT)) {
4624 SDValue Ptr = Lod->getBasePtr();
4625 if (bestOffset != 0)
4626 Ptr = DAG.getMemBasePlusOffset(Base: Ptr, Offset: TypeSize::getFixed(ExactSize: bestOffset),
4627 DL: dl);
4628 SDValue NewLoad =
4629 DAG.getLoad(VT: newVT, dl, Chain: Lod->getChain(), Ptr,
4630 PtrInfo: Lod->getPointerInfo().getWithOffset(O: bestOffset),
4631 Alignment: Lod->getOriginalAlign());
4632 return DAG.getSetCC(DL: dl, VT,
4633 LHS: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: newVT, N1: NewLoad,
4634 N2: DAG.getConstant(Val: bestMask.trunc(width: bestWidth),
4635 DL: dl, VT: newVT)),
4636 RHS: DAG.getConstant(Val: 0LL, DL: dl, VT: newVT), Cond);
4637 }
4638 }
4639 }
4640
4641 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4642 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4643 unsigned InSize = N0.getOperand(i: 0).getValueSizeInBits();
4644
4645 // If the comparison constant has bits in the upper part, the
4646 // zero-extended value could never match.
4647 if (C1.intersects(RHS: APInt::getHighBitsSet(numBits: C1.getBitWidth(),
4648 hiBitsSet: C1.getBitWidth() - InSize))) {
4649 switch (Cond) {
4650 case ISD::SETUGT:
4651 case ISD::SETUGE:
4652 case ISD::SETEQ:
4653 return DAG.getConstant(Val: 0, DL: dl, VT);
4654 case ISD::SETULT:
4655 case ISD::SETULE:
4656 case ISD::SETNE:
4657 return DAG.getConstant(Val: 1, DL: dl, VT);
4658 case ISD::SETGT:
4659 case ISD::SETGE:
4660 // True if the sign bit of C1 is set.
4661 return DAG.getConstant(Val: C1.isNegative(), DL: dl, VT);
4662 case ISD::SETLT:
4663 case ISD::SETLE:
4664 // True if the sign bit of C1 isn't set.
4665 return DAG.getConstant(Val: C1.isNonNegative(), DL: dl, VT);
4666 default:
4667 break;
4668 }
4669 }
4670
4671 // Otherwise, we can perform the comparison with the low bits.
4672 switch (Cond) {
4673 case ISD::SETEQ:
4674 case ISD::SETNE:
4675 case ISD::SETUGT:
4676 case ISD::SETUGE:
4677 case ISD::SETULT:
4678 case ISD::SETULE: {
4679 EVT newVT = N0.getOperand(i: 0).getValueType();
4680 if (DCI.isBeforeLegalizeOps() ||
4681 (isOperationLegal(Op: ISD::SETCC, VT: newVT) &&
4682 isCondCodeLegal(CC: Cond, VT: newVT.getSimpleVT()))) {
4683 EVT NewSetCCVT = getSetCCResultType(DL: Layout, Context&: *DAG.getContext(), VT: newVT);
4684 SDValue NewConst = DAG.getConstant(Val: C1.trunc(width: InSize), DL: dl, VT: newVT);
4685
4686 SDValue NewSetCC = DAG.getSetCC(DL: dl, VT: NewSetCCVT, LHS: N0.getOperand(i: 0),
4687 RHS: NewConst, Cond);
4688 return DAG.getBoolExtOrTrunc(Op: NewSetCC, SL: dl, VT, OpVT: N0.getValueType());
4689 }
4690 break;
4691 }
4692 default:
4693 break; // todo, be more careful with signed comparisons
4694 }
4695 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4696 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4697 !isSExtCheaperThanZExt(FromTy: cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT(),
4698 ToTy: OpVT)) {
4699 EVT ExtSrcTy = cast<VTSDNode>(Val: N0.getOperand(i: 1))->getVT();
4700 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4701 EVT ExtDstTy = N0.getValueType();
4702 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4703
4704 // If the constant doesn't fit into the number of bits for the source of
4705 // the sign extension, it is impossible for both sides to be equal.
4706 if (C1.getSignificantBits() > ExtSrcTyBits)
4707 return DAG.getBoolConstant(V: Cond == ISD::SETNE, DL: dl, VT, OpVT);
4708
4709 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4710 ExtDstTy != ExtSrcTy && "Unexpected types!");
4711 APInt Imm = APInt::getLowBitsSet(numBits: ExtDstTyBits, loBitsSet: ExtSrcTyBits);
4712 SDValue ZextOp = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ExtDstTy, N1: N0.getOperand(i: 0),
4713 N2: DAG.getConstant(Val: Imm, DL: dl, VT: ExtDstTy));
4714 if (!DCI.isCalledByLegalizer())
4715 DCI.AddToWorklist(N: ZextOp.getNode());
4716 // Otherwise, make this a use of a zext.
4717 return DAG.getSetCC(DL: dl, VT, LHS: ZextOp,
4718 RHS: DAG.getConstant(Val: C1 & Imm, DL: dl, VT: ExtDstTy), Cond);
4719 } else if ((N1C->isZero() || N1C->isOne()) &&
4720 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4721 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4722 // excluded as they are handled below whilst checking for foldBooleans.
4723 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4724 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4725 (N0.getValueType() == MVT::i1 ||
4726 getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4727 DAG.MaskedValueIsZero(
4728 N0, APInt::getBitsSetFrom(N0.getValueSizeInBits(), 1))) {
4729 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4730 if (TrueWhenTrue)
4731 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: N0);
4732 // Invert the condition.
4733 if (N0.getOpcode() == ISD::SETCC) {
4734 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N0.getOperand(i: 2))->get();
4735 CC = ISD::getSetCCInverse(Operation: CC, Type: N0.getOperand(i: 0).getValueType());
4736 if (DCI.isBeforeLegalizeOps() ||
4737 isCondCodeLegal(CC, VT: N0.getOperand(i: 0).getSimpleValueType()))
4738 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N0.getOperand(i: 1), Cond: CC);
4739 }
4740 }
4741
4742 if ((N0.getOpcode() == ISD::XOR ||
4743 (N0.getOpcode() == ISD::AND &&
4744 N0.getOperand(i: 0).getOpcode() == ISD::XOR &&
4745 N0.getOperand(i: 1) == N0.getOperand(i: 0).getOperand(i: 1))) &&
4746 isOneConstant(V: N0.getOperand(i: 1))) {
4747 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4748 // can only do this if the top bits are known zero.
4749 unsigned BitWidth = N0.getValueSizeInBits();
4750 if (DAG.MaskedValueIsZero(Op: N0,
4751 Mask: APInt::getHighBitsSet(numBits: BitWidth,
4752 hiBitsSet: BitWidth-1))) {
4753 // Okay, get the un-inverted input value.
4754 SDValue Val;
4755 if (N0.getOpcode() == ISD::XOR) {
4756 Val = N0.getOperand(i: 0);
4757 } else {
4758 assert(N0.getOpcode() == ISD::AND &&
4759 N0.getOperand(0).getOpcode() == ISD::XOR);
4760 // ((X^1)&1)^1 -> X & 1
4761 Val = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: N0.getValueType(),
4762 N1: N0.getOperand(i: 0).getOperand(i: 0),
4763 N2: N0.getOperand(i: 1));
4764 }
4765
4766 return DAG.getSetCC(DL: dl, VT, LHS: Val, RHS: N1,
4767 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4768 }
4769 } else if (N1C->isOne()) {
4770 SDValue Op0 = N0;
4771 if (Op0.getOpcode() == ISD::TRUNCATE)
4772 Op0 = Op0.getOperand(i: 0);
4773
4774 if ((Op0.getOpcode() == ISD::XOR) &&
4775 Op0.getOperand(i: 0).getOpcode() == ISD::SETCC &&
4776 Op0.getOperand(i: 1).getOpcode() == ISD::SETCC) {
4777 SDValue XorLHS = Op0.getOperand(i: 0);
4778 SDValue XorRHS = Op0.getOperand(i: 1);
4779 // Ensure that the input setccs return an i1 type or 0/1 value.
4780 if (Op0.getValueType() == MVT::i1 ||
4781 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4782 ZeroOrOneBooleanContent &&
4783 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4784 ZeroOrOneBooleanContent)) {
4785 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4786 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4787 return DAG.getSetCC(DL: dl, VT, LHS: XorLHS, RHS: XorRHS, Cond);
4788 }
4789 }
4790 if (Op0.getOpcode() == ISD::AND && isOneConstant(V: Op0.getOperand(i: 1))) {
4791 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4792 if (Op0.getValueType().bitsGT(VT))
4793 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4794 N1: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
4795 N2: DAG.getConstant(Val: 1, DL: dl, VT));
4796 else if (Op0.getValueType().bitsLT(VT))
4797 Op0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
4798 N1: DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT, Operand: Op0.getOperand(i: 0)),
4799 N2: DAG.getConstant(Val: 1, DL: dl, VT));
4800
4801 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4802 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
4803 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4804 }
4805 if (Op0.getOpcode() == ISD::AssertZext &&
4806 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4807 return DAG.getSetCC(DL: dl, VT, LHS: Op0,
4808 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Op0.getValueType()),
4809 Cond: Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4810 }
4811 }
4812
4813 // Given:
4814 // icmp eq/ne (urem %x, %y), 0
4815 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4816 // icmp eq/ne %x, 0
4817 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4818 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4819 KnownBits XKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 0));
4820 KnownBits YKnown = DAG.computeKnownBits(Op: N0.getOperand(i: 1));
4821 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4822 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1, Cond);
4823 }
4824
4825 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4826 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4827 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4828 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(Val: N0.getOperand(i: 1)) &&
4829 N0.getConstantOperandAPInt(i: 1) == OpVT.getScalarSizeInBits() - 1 &&
4830 N1C && N1C->isAllOnes()) {
4831 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0),
4832 RHS: DAG.getConstant(Val: 0, DL: dl, VT: OpVT),
4833 Cond: Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4834 }
4835
4836 if (SDValue V =
4837 optimizeSetCCOfSignedTruncationCheck(SCCVT: VT, N0, N1, Cond, DCI, DL: dl))
4838 return V;
4839 }
4840
4841 // These simplifications apply to splat vectors as well.
4842 // TODO: Handle more splat vector cases.
4843 if (auto *N1C = isConstOrConstSplat(N: N1)) {
4844 const APInt &C1 = N1C->getAPIntValue();
4845
4846 APInt MinVal, MaxVal;
4847 unsigned OperandBitSize = N1C->getValueType(ResNo: 0).getScalarSizeInBits();
4848 if (ISD::isSignedIntSetCC(Code: Cond)) {
4849 MinVal = APInt::getSignedMinValue(numBits: OperandBitSize);
4850 MaxVal = APInt::getSignedMaxValue(numBits: OperandBitSize);
4851 } else {
4852 MinVal = APInt::getMinValue(numBits: OperandBitSize);
4853 MaxVal = APInt::getMaxValue(numBits: OperandBitSize);
4854 }
4855
4856 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4857 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4858 // X >= MIN --> true
4859 if (C1 == MinVal)
4860 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4861
4862 if (!VT.isVector()) { // TODO: Support this for vectors.
4863 // X >= C0 --> X > (C0 - 1)
4864 APInt C = C1 - 1;
4865 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4866 if ((DCI.isBeforeLegalizeOps() ||
4867 isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4868 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4869 isLegalICmpImmediate(C.getSExtValue())))) {
4870 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4871 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4872 Cond: NewCC);
4873 }
4874 }
4875 }
4876
4877 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4878 // X <= MAX --> true
4879 if (C1 == MaxVal)
4880 return DAG.getBoolConstant(V: true, DL: dl, VT, OpVT);
4881
4882 // X <= C0 --> X < (C0 + 1)
4883 if (!VT.isVector()) { // TODO: Support this for vectors.
4884 APInt C = C1 + 1;
4885 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4886 if ((DCI.isBeforeLegalizeOps() ||
4887 isCondCodeLegal(CC: NewCC, VT: VT.getSimpleVT())) &&
4888 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4889 isLegalICmpImmediate(C.getSExtValue())))) {
4890 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4891 RHS: DAG.getConstant(Val: C, DL: dl, VT: N1.getValueType()),
4892 Cond: NewCC);
4893 }
4894 }
4895 }
4896
4897 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4898 if (C1 == MinVal)
4899 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X < MIN --> false
4900
4901 // TODO: Support this for vectors after legalize ops.
4902 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4903 // Canonicalize setlt X, Max --> setne X, Max
4904 if (C1 == MaxVal)
4905 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4906
4907 // If we have setult X, 1, turn it into seteq X, 0
4908 if (C1 == MinVal+1)
4909 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4910 RHS: DAG.getConstant(Val: MinVal, DL: dl, VT: N0.getValueType()),
4911 Cond: ISD::SETEQ);
4912 }
4913 }
4914
4915 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4916 if (C1 == MaxVal)
4917 return DAG.getBoolConstant(V: false, DL: dl, VT, OpVT); // X > MAX --> false
4918
4919 // TODO: Support this for vectors after legalize ops.
4920 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4921 // Canonicalize setgt X, Min --> setne X, Min
4922 if (C1 == MinVal)
4923 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: ISD::SETNE);
4924
4925 // If we have setugt X, Max-1, turn it into seteq X, Max
4926 if (C1 == MaxVal-1)
4927 return DAG.getSetCC(DL: dl, VT, LHS: N0,
4928 RHS: DAG.getConstant(Val: MaxVal, DL: dl, VT: N0.getValueType()),
4929 Cond: ISD::SETEQ);
4930 }
4931 }
4932
4933 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4934 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4935 if (C1.isZero())
4936 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4937 SCCVT: VT, N0, N1C: N1, Cond, DCI, DL: dl))
4938 return CC;
4939
4940 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4941 // For example, when high 32-bits of i64 X are known clear:
4942 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4943 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4944 bool CmpZero = N1C->isZero();
4945 bool CmpNegOne = N1C->isAllOnes();
4946 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4947 // Match or(lo,shl(hi,bw/2)) pattern.
4948 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4949 unsigned EltBits = V.getScalarValueSizeInBits();
4950 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4951 return false;
4952 SDValue LHS = V.getOperand(i: 0);
4953 SDValue RHS = V.getOperand(i: 1);
4954 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: EltBits / 2);
4955 // Unshifted element must have zero upperbits.
4956 if (RHS.getOpcode() == ISD::SHL &&
4957 isa<ConstantSDNode>(Val: RHS.getOperand(i: 1)) &&
4958 RHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
4959 DAG.MaskedValueIsZero(Op: LHS, Mask: HiBits)) {
4960 Lo = LHS;
4961 Hi = RHS.getOperand(i: 0);
4962 return true;
4963 }
4964 if (LHS.getOpcode() == ISD::SHL &&
4965 isa<ConstantSDNode>(Val: LHS.getOperand(i: 1)) &&
4966 LHS.getConstantOperandAPInt(i: 1) == (EltBits / 2) &&
4967 DAG.MaskedValueIsZero(Op: RHS, Mask: HiBits)) {
4968 Lo = RHS;
4969 Hi = LHS.getOperand(i: 0);
4970 return true;
4971 }
4972 return false;
4973 };
4974
4975 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4976 unsigned EltBits = N0.getScalarValueSizeInBits();
4977 unsigned HalfBits = EltBits / 2;
4978 APInt HiBits = APInt::getHighBitsSet(numBits: EltBits, hiBitsSet: HalfBits);
4979 SDValue LoBits = DAG.getConstant(Val: ~HiBits, DL: dl, VT: OpVT);
4980 SDValue HiMask = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: Hi, N2: LoBits);
4981 SDValue NewN0 =
4982 DAG.getNode(Opcode: CmpZero ? ISD::OR : ISD::AND, DL: dl, VT: OpVT, N1: Lo, N2: HiMask);
4983 SDValue NewN1 = CmpZero ? DAG.getConstant(Val: 0, DL: dl, VT: OpVT) : LoBits;
4984 return DAG.getSetCC(DL: dl, VT, LHS: NewN0, RHS: NewN1, Cond);
4985 };
4986
4987 SDValue Lo, Hi;
4988 if (IsConcat(N0, Lo, Hi))
4989 return MergeConcat(Lo, Hi);
4990
4991 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4992 SDValue Lo0, Lo1, Hi0, Hi1;
4993 if (IsConcat(N0.getOperand(i: 0), Lo0, Hi0) &&
4994 IsConcat(N0.getOperand(i: 1), Lo1, Hi1)) {
4995 return MergeConcat(DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Lo0, N2: Lo1),
4996 DAG.getNode(Opcode: N0.getOpcode(), DL: dl, VT: OpVT, N1: Hi0, N2: Hi1));
4997 }
4998 }
4999 }
5000 }
5001
5002 // If we have "setcc X, C0", check to see if we can shrink the immediate
5003 // by changing cc.
5004 // TODO: Support this for vectors after legalize ops.
5005 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5006 // SETUGT X, SINTMAX -> SETLT X, 0
5007 // SETUGE X, SINTMIN -> SETLT X, 0
5008 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5009 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5010 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5011 RHS: DAG.getConstant(Val: 0, DL: dl, VT: N1.getValueType()),
5012 Cond: ISD::SETLT);
5013
5014 // SETULT X, SINTMIN -> SETGT X, -1
5015 // SETULE X, SINTMAX -> SETGT X, -1
5016 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5017 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5018 return DAG.getSetCC(DL: dl, VT, LHS: N0,
5019 RHS: DAG.getAllOnesConstant(DL: dl, VT: N1.getValueType()),
5020 Cond: ISD::SETGT);
5021 }
5022 }
5023
5024 // Back to non-vector simplifications.
5025 // TODO: Can we do these for vector splats?
5026 if (auto *N1C = dyn_cast<ConstantSDNode>(Val: N1.getNode())) {
5027 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5028 const APInt &C1 = N1C->getAPIntValue();
5029 EVT ShValTy = N0.getValueType();
5030
5031 // Fold bit comparisons when we can. This will result in an
5032 // incorrect value when boolean false is negative one, unless
5033 // the bitsize is 1 in which case the false value is the same
5034 // in practice regardless of the representation.
5035 if ((VT.getSizeInBits() == 1 ||
5036 getBooleanContents(Type: N0.getValueType()) == ZeroOrOneBooleanContent) &&
5037 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5038 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(VT: ShValTy))) &&
5039 N0.getOpcode() == ISD::AND) {
5040 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5041 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5042 // Perform the xform if the AND RHS is a single bit.
5043 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5044 if (AndRHS->getAPIntValue().isPowerOf2() &&
5045 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5046 return DAG.getNode(
5047 Opcode: ISD::TRUNCATE, DL: dl, VT,
5048 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5049 N2: DAG.getShiftAmountConstant(
5050 Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5051 }
5052 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5053 // (X & 8) == 8 --> (X & 8) >> 3
5054 // Perform the xform if C1 is a single bit.
5055 unsigned ShCt = C1.logBase2();
5056 if (C1.isPowerOf2() &&
5057 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShCt)) {
5058 return DAG.getNode(
5059 Opcode: ISD::TRUNCATE, DL: dl, VT,
5060 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5061 N2: DAG.getShiftAmountConstant(
5062 Val: ShCt, VT: ShValTy, DL: dl, LegalTypes: !DCI.isBeforeLegalize())));
5063 }
5064 }
5065 }
5066 }
5067
5068 if (C1.getSignificantBits() <= 64 &&
5069 !isLegalICmpImmediate(C1.getSExtValue())) {
5070 // (X & -256) == 256 -> (X >> 8) == 1
5071 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5072 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5073 if (auto *AndRHS = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5074 const APInt &AndRHSC = AndRHS->getAPIntValue();
5075 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5076 unsigned ShiftBits = AndRHSC.countr_zero();
5077 if (!TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5078 SDValue Shift = DAG.getNode(
5079 Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0.getOperand(i: 0),
5080 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5081 LegalTypes: !DCI.isBeforeLegalize()));
5082 SDValue CmpRHS = DAG.getConstant(Val: C1.lshr(shiftAmt: ShiftBits), DL: dl, VT: ShValTy);
5083 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond);
5084 }
5085 }
5086 }
5087 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5088 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5089 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5090 // X < 0x100000000 -> (X >> 32) < 1
5091 // X >= 0x100000000 -> (X >> 32) >= 1
5092 // X <= 0x0ffffffff -> (X >> 32) < 1
5093 // X > 0x0ffffffff -> (X >> 32) >= 1
5094 unsigned ShiftBits;
5095 APInt NewC = C1;
5096 ISD::CondCode NewCond = Cond;
5097 if (AdjOne) {
5098 ShiftBits = C1.countr_one();
5099 NewC = NewC + 1;
5100 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5101 } else {
5102 ShiftBits = C1.countr_zero();
5103 }
5104 NewC.lshrInPlace(ShiftAmt: ShiftBits);
5105 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5106 isLegalICmpImmediate(NewC.getSExtValue()) &&
5107 !TLI.shouldAvoidTransformToShift(VT: ShValTy, Amount: ShiftBits)) {
5108 SDValue Shift =
5109 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ShValTy, N1: N0,
5110 N2: DAG.getShiftAmountConstant(Val: ShiftBits, VT: ShValTy, DL: dl,
5111 LegalTypes: !DCI.isBeforeLegalize()));
5112 SDValue CmpRHS = DAG.getConstant(Val: NewC, DL: dl, VT: ShValTy);
5113 return DAG.getSetCC(DL: dl, VT, LHS: Shift, RHS: CmpRHS, Cond: NewCond);
5114 }
5115 }
5116 }
5117 }
5118
5119 if (!isa<ConstantFPSDNode>(Val: N0) && isa<ConstantFPSDNode>(Val: N1)) {
5120 auto *CFP = cast<ConstantFPSDNode>(Val&: N1);
5121 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5122
5123 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5124 // constant if knowing that the operand is non-nan is enough. We prefer to
5125 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5126 // materialize 0.0.
5127 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5128 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N0, Cond);
5129
5130 // setcc (fneg x), C -> setcc swap(pred) x, -C
5131 if (N0.getOpcode() == ISD::FNEG) {
5132 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Operation: Cond);
5133 if (DCI.isBeforeLegalizeOps() ||
5134 isCondCodeLegal(CC: SwapCond, VT: N0.getSimpleValueType())) {
5135 SDValue NegN1 = DAG.getNode(Opcode: ISD::FNEG, DL: dl, VT: N0.getValueType(), Operand: N1);
5136 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: NegN1, Cond: SwapCond);
5137 }
5138 }
5139
5140 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5141 if (isOperationLegalOrCustom(Op: ISD::IS_FPCLASS, VT: N0.getValueType()) &&
5142 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(ResNo: 0))) {
5143 bool IsFabs = N0.getOpcode() == ISD::FABS;
5144 SDValue Op = IsFabs ? N0.getOperand(i: 0) : N0;
5145 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5146 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5147 : (IsFabs ? fcInf : fcPosInf);
5148 if (Cond == ISD::SETUEQ)
5149 Flag |= fcNan;
5150 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5151 DAG.getTargetConstant(Flag, dl, MVT::i32));
5152 }
5153 }
5154
5155 // If the condition is not legal, see if we can find an equivalent one
5156 // which is legal.
5157 if (!isCondCodeLegal(CC: Cond, VT: N0.getSimpleValueType())) {
5158 // If the comparison was an awkward floating-point == or != and one of
5159 // the comparison operands is infinity or negative infinity, convert the
5160 // condition to a less-awkward <= or >=.
5161 if (CFP->getValueAPF().isInfinity()) {
5162 bool IsNegInf = CFP->getValueAPF().isNegative();
5163 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5164 switch (Cond) {
5165 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5166 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5167 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5168 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5169 default: break;
5170 }
5171 if (NewCond != ISD::SETCC_INVALID &&
5172 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType()))
5173 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5174 }
5175 }
5176 }
5177
5178 if (N0 == N1) {
5179 // The sext(setcc()) => setcc() optimization relies on the appropriate
5180 // constant being emitted.
5181 assert(!N0.getValueType().isInteger() &&
5182 "Integer types should be handled by FoldSetCC");
5183
5184 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5185 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5186 if (UOF == 2) // FP operators that are undefined on NaNs.
5187 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5188 if (UOF == unsigned(EqTrue))
5189 return DAG.getBoolConstant(V: EqTrue, DL: dl, VT, OpVT);
5190 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5191 // if it is not already.
5192 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5193 if (NewCond != Cond &&
5194 (DCI.isBeforeLegalizeOps() ||
5195 isCondCodeLegal(CC: NewCond, VT: N0.getSimpleValueType())))
5196 return DAG.getSetCC(DL: dl, VT, LHS: N0, RHS: N1, Cond: NewCond);
5197 }
5198
5199 // ~X > ~Y --> Y > X
5200 // ~X < ~Y --> Y < X
5201 // ~X < C --> X > ~C
5202 // ~X > C --> X < ~C
5203 if ((isSignedIntSetCC(Code: Cond) || isUnsignedIntSetCC(Code: Cond)) &&
5204 N0.getValueType().isInteger()) {
5205 if (isBitwiseNot(V: N0)) {
5206 if (isBitwiseNot(V: N1))
5207 return DAG.getSetCC(DL: dl, VT, LHS: N1.getOperand(i: 0), RHS: N0.getOperand(i: 0), Cond);
5208
5209 if (DAG.isConstantIntBuildVectorOrConstantInt(N: N1) &&
5210 !DAG.isConstantIntBuildVectorOrConstantInt(N: N0.getOperand(i: 0))) {
5211 SDValue Not = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5212 return DAG.getSetCC(DL: dl, VT, LHS: Not, RHS: N0.getOperand(i: 0), Cond);
5213 }
5214 }
5215 }
5216
5217 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5218 N0.getValueType().isInteger()) {
5219 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5220 N0.getOpcode() == ISD::XOR) {
5221 // Simplify (X+Y) == (X+Z) --> Y == Z
5222 if (N0.getOpcode() == N1.getOpcode()) {
5223 if (N0.getOperand(i: 0) == N1.getOperand(i: 0))
5224 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 1), Cond);
5225 if (N0.getOperand(i: 1) == N1.getOperand(i: 1))
5226 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 0), Cond);
5227 if (isCommutativeBinOp(Opcode: N0.getOpcode())) {
5228 // If X op Y == Y op X, try other combinations.
5229 if (N0.getOperand(i: 0) == N1.getOperand(i: 1))
5230 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 1), RHS: N1.getOperand(i: 0),
5231 Cond);
5232 if (N0.getOperand(i: 1) == N1.getOperand(i: 0))
5233 return DAG.getSetCC(DL: dl, VT, LHS: N0.getOperand(i: 0), RHS: N1.getOperand(i: 1),
5234 Cond);
5235 }
5236 }
5237
5238 // If RHS is a legal immediate value for a compare instruction, we need
5239 // to be careful about increasing register pressure needlessly.
5240 bool LegalRHSImm = false;
5241
5242 if (auto *RHSC = dyn_cast<ConstantSDNode>(Val&: N1)) {
5243 if (auto *LHSR = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1))) {
5244 // Turn (X+C1) == C2 --> X == C2-C1
5245 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5246 return DAG.getSetCC(
5247 DL: dl, VT, LHS: N0.getOperand(i: 0),
5248 RHS: DAG.getConstant(Val: RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5249 DL: dl, VT: N0.getValueType()),
5250 Cond);
5251
5252 // Turn (X^C1) == C2 --> X == C1^C2
5253 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5254 return DAG.getSetCC(
5255 DL: dl, VT, LHS: N0.getOperand(i: 0),
5256 RHS: DAG.getConstant(Val: LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5257 DL: dl, VT: N0.getValueType()),
5258 Cond);
5259 }
5260
5261 // Turn (C1-X) == C2 --> X == C1-C2
5262 if (auto *SUBC = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)))
5263 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5264 return DAG.getSetCC(
5265 DL: dl, VT, LHS: N0.getOperand(i: 1),
5266 RHS: DAG.getConstant(Val: SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5267 DL: dl, VT: N0.getValueType()),
5268 Cond);
5269
5270 // Could RHSC fold directly into a compare?
5271 if (RHSC->getValueType(ResNo: 0).getSizeInBits() <= 64)
5272 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5273 }
5274
5275 // (X+Y) == X --> Y == 0 and similar folds.
5276 // Don't do this if X is an immediate that can fold into a cmp
5277 // instruction and X+Y has other uses. It could be an induction variable
5278 // chain, and the transform would increase register pressure.
5279 if (!LegalRHSImm || N0.hasOneUse())
5280 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, DL: dl, DCI))
5281 return V;
5282 }
5283
5284 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5285 N1.getOpcode() == ISD::XOR)
5286 if (SDValue V = foldSetCCWithBinOp(VT, N0: N1, N1: N0, Cond, DL: dl, DCI))
5287 return V;
5288
5289 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, DL: dl, DCI))
5290 return V;
5291 }
5292
5293 // Fold remainder of division by a constant.
5294 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5295 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5296 // When division is cheap or optimizing for minimum size,
5297 // fall through to DIVREM creation by skipping this fold.
5298 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5299 if (N0.getOpcode() == ISD::UREM) {
5300 if (SDValue Folded = buildUREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5301 return Folded;
5302 } else if (N0.getOpcode() == ISD::SREM) {
5303 if (SDValue Folded = buildSREMEqFold(SETCCVT: VT, REMNode: N0, CompTargetNode: N1, Cond, DCI, DL: dl))
5304 return Folded;
5305 }
5306 }
5307 }
5308
5309 // Fold away ALL boolean setcc's.
5310 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5311 SDValue Temp;
5312 switch (Cond) {
5313 default: llvm_unreachable("Unknown integer setcc!");
5314 case ISD::SETEQ: // X == Y -> ~(X^Y)
5315 Temp = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5316 N0 = DAG.getNOT(DL: dl, Val: Temp, VT: OpVT);
5317 if (!DCI.isCalledByLegalizer())
5318 DCI.AddToWorklist(N: Temp.getNode());
5319 break;
5320 case ISD::SETNE: // X != Y --> (X^Y)
5321 N0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OpVT, N1: N0, N2: N1);
5322 break;
5323 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5324 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5325 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5326 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1, N2: Temp);
5327 if (!DCI.isCalledByLegalizer())
5328 DCI.AddToWorklist(N: Temp.getNode());
5329 break;
5330 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5331 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5332 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5333 N0 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5334 if (!DCI.isCalledByLegalizer())
5335 DCI.AddToWorklist(N: Temp.getNode());
5336 break;
5337 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5338 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5339 Temp = DAG.getNOT(DL: dl, Val: N0, VT: OpVT);
5340 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1, N2: Temp);
5341 if (!DCI.isCalledByLegalizer())
5342 DCI.AddToWorklist(N: Temp.getNode());
5343 break;
5344 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5345 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5346 Temp = DAG.getNOT(DL: dl, Val: N1, VT: OpVT);
5347 N0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: OpVT, N1: N0, N2: Temp);
5348 break;
5349 }
5350 if (VT.getScalarType() != MVT::i1) {
5351 if (!DCI.isCalledByLegalizer())
5352 DCI.AddToWorklist(N: N0.getNode());
5353 // FIXME: If running after legalize, we probably can't do this.
5354 ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: OpVT));
5355 N0 = DAG.getNode(Opcode: ExtendCode, DL: dl, VT, Operand: N0);
5356 }
5357 return N0;
5358 }
5359
5360 // Could not fold it.
5361 return SDValue();
5362}
5363
5364/// Returns true (and the GlobalValue and the offset) if the node is a
5365/// GlobalAddress + offset.
5366bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5367 int64_t &Offset) const {
5368
5369 SDNode *N = unwrapAddress(N: SDValue(WN, 0)).getNode();
5370
5371 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(Val: N)) {
5372 GA = GASD->getGlobal();
5373 Offset += GASD->getOffset();
5374 return true;
5375 }
5376
5377 if (N->getOpcode() == ISD::ADD) {
5378 SDValue N1 = N->getOperand(Num: 0);
5379 SDValue N2 = N->getOperand(Num: 1);
5380 if (isGAPlusOffset(WN: N1.getNode(), GA, Offset)) {
5381 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N2)) {
5382 Offset += V->getSExtValue();
5383 return true;
5384 }
5385 } else if (isGAPlusOffset(WN: N2.getNode(), GA, Offset)) {
5386 if (auto *V = dyn_cast<ConstantSDNode>(Val&: N1)) {
5387 Offset += V->getSExtValue();
5388 return true;
5389 }
5390 }
5391 }
5392
5393 return false;
5394}
5395
5396SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5397 DAGCombinerInfo &DCI) const {
5398 // Default implementation: no optimization.
5399 return SDValue();
5400}
5401
5402//===----------------------------------------------------------------------===//
5403// Inline Assembler Implementation Methods
5404//===----------------------------------------------------------------------===//
5405
5406TargetLowering::ConstraintType
5407TargetLowering::getConstraintType(StringRef Constraint) const {
5408 unsigned S = Constraint.size();
5409
5410 if (S == 1) {
5411 switch (Constraint[0]) {
5412 default: break;
5413 case 'r':
5414 return C_RegisterClass;
5415 case 'm': // memory
5416 case 'o': // offsetable
5417 case 'V': // not offsetable
5418 return C_Memory;
5419 case 'p': // Address.
5420 return C_Address;
5421 case 'n': // Simple Integer
5422 case 'E': // Floating Point Constant
5423 case 'F': // Floating Point Constant
5424 return C_Immediate;
5425 case 'i': // Simple Integer or Relocatable Constant
5426 case 's': // Relocatable Constant
5427 case 'X': // Allow ANY value.
5428 case 'I': // Target registers.
5429 case 'J':
5430 case 'K':
5431 case 'L':
5432 case 'M':
5433 case 'N':
5434 case 'O':
5435 case 'P':
5436 case '<':
5437 case '>':
5438 return C_Other;
5439 }
5440 }
5441
5442 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5443 if (S == 8 && Constraint.substr(Start: 1, N: 6) == "memory") // "{memory}"
5444 return C_Memory;
5445 return C_Register;
5446 }
5447 return C_Unknown;
5448}
5449
5450/// Try to replace an X constraint, which matches anything, with another that
5451/// has more specific requirements based on the type of the corresponding
5452/// operand.
5453const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5454 if (ConstraintVT.isInteger())
5455 return "r";
5456 if (ConstraintVT.isFloatingPoint())
5457 return "f"; // works for many targets
5458 return nullptr;
5459}
5460
5461SDValue TargetLowering::LowerAsmOutputForConstraint(
5462 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5463 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5464 return SDValue();
5465}
5466
5467/// Lower the specified operand into the Ops vector.
5468/// If it is invalid, don't add anything to Ops.
5469void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5470 StringRef Constraint,
5471 std::vector<SDValue> &Ops,
5472 SelectionDAG &DAG) const {
5473
5474 if (Constraint.size() > 1)
5475 return;
5476
5477 char ConstraintLetter = Constraint[0];
5478 switch (ConstraintLetter) {
5479 default: break;
5480 case 'X': // Allows any operand
5481 case 'i': // Simple Integer or Relocatable Constant
5482 case 'n': // Simple Integer
5483 case 's': { // Relocatable Constant
5484
5485 ConstantSDNode *C;
5486 uint64_t Offset = 0;
5487
5488 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5489 // etc., since getelementpointer is variadic. We can't use
5490 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5491 // while in this case the GA may be furthest from the root node which is
5492 // likely an ISD::ADD.
5493 while (true) {
5494 if ((C = dyn_cast<ConstantSDNode>(Val&: Op)) && ConstraintLetter != 's') {
5495 // gcc prints these as sign extended. Sign extend value to 64 bits
5496 // now; without this it would get ZExt'd later in
5497 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5498 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5499 BooleanContent BCont = getBooleanContents(MVT::i64);
5500 ISD::NodeType ExtOpc =
5501 IsBool ? getExtendForContent(Content: BCont) : ISD::SIGN_EXTEND;
5502 int64_t ExtVal =
5503 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5504 Ops.push_back(
5505 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5506 return;
5507 }
5508 if (ConstraintLetter != 'n') {
5509 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op)) {
5510 Ops.push_back(x: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL: SDLoc(Op),
5511 VT: GA->getValueType(ResNo: 0),
5512 offset: Offset + GA->getOffset()));
5513 return;
5514 }
5515 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Val&: Op)) {
5516 Ops.push_back(x: DAG.getTargetBlockAddress(
5517 BA: BA->getBlockAddress(), VT: BA->getValueType(ResNo: 0),
5518 Offset: Offset + BA->getOffset(), TargetFlags: BA->getTargetFlags()));
5519 return;
5520 }
5521 if (isa<BasicBlockSDNode>(Val: Op)) {
5522 Ops.push_back(x: Op);
5523 return;
5524 }
5525 }
5526 const unsigned OpCode = Op.getOpcode();
5527 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5528 if ((C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 0))))
5529 Op = Op.getOperand(i: 1);
5530 // Subtraction is not commutative.
5531 else if (OpCode == ISD::ADD &&
5532 (C = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))))
5533 Op = Op.getOperand(i: 0);
5534 else
5535 return;
5536 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5537 continue;
5538 }
5539 return;
5540 }
5541 break;
5542 }
5543 }
5544}
5545
5546void TargetLowering::CollectTargetIntrinsicOperands(
5547 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5548}
5549
5550std::pair<unsigned, const TargetRegisterClass *>
5551TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5552 StringRef Constraint,
5553 MVT VT) const {
5554 if (!Constraint.starts_with(Prefix: "{"))
5555 return std::make_pair(x: 0u, y: static_cast<TargetRegisterClass *>(nullptr));
5556 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5557
5558 // Remove the braces from around the name.
5559 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5560
5561 std::pair<unsigned, const TargetRegisterClass *> R =
5562 std::make_pair(x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr));
5563
5564 // Figure out which register class contains this reg.
5565 for (const TargetRegisterClass *RC : RI->regclasses()) {
5566 // If none of the value types for this register class are valid, we
5567 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5568 if (!isLegalRC(TRI: *RI, RC: *RC))
5569 continue;
5570
5571 for (const MCPhysReg &PR : *RC) {
5572 if (RegName.equals_insensitive(RHS: RI->getRegAsmName(Reg: PR))) {
5573 std::pair<unsigned, const TargetRegisterClass *> S =
5574 std::make_pair(x: PR, y&: RC);
5575
5576 // If this register class has the requested value type, return it,
5577 // otherwise keep searching and return the first class found
5578 // if no other is found which explicitly has the requested type.
5579 if (RI->isTypeLegalForClass(RC: *RC, T: VT))
5580 return S;
5581 if (!R.second)
5582 R = S;
5583 }
5584 }
5585 }
5586
5587 return R;
5588}
5589
5590//===----------------------------------------------------------------------===//
5591// Constraint Selection.
5592
5593/// Return true of this is an input operand that is a matching constraint like
5594/// "4".
5595bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5596 assert(!ConstraintCode.empty() && "No known constraint!");
5597 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5598}
5599
5600/// If this is an input matching constraint, this method returns the output
5601/// operand it matches.
5602unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5603 assert(!ConstraintCode.empty() && "No known constraint!");
5604 return atoi(nptr: ConstraintCode.c_str());
5605}
5606
5607/// Split up the constraint string from the inline assembly value into the
5608/// specific constraints and their prefixes, and also tie in the associated
5609/// operand values.
5610/// If this returns an empty vector, and if the constraint string itself
5611/// isn't empty, there was an error parsing.
5612TargetLowering::AsmOperandInfoVector
5613TargetLowering::ParseConstraints(const DataLayout &DL,
5614 const TargetRegisterInfo *TRI,
5615 const CallBase &Call) const {
5616 /// Information about all of the constraints.
5617 AsmOperandInfoVector ConstraintOperands;
5618 const InlineAsm *IA = cast<InlineAsm>(Val: Call.getCalledOperand());
5619 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5620
5621 // Do a prepass over the constraints, canonicalizing them, and building up the
5622 // ConstraintOperands list.
5623 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5624 unsigned ResNo = 0; // ResNo - The result number of the next output.
5625 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5626
5627 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5628 ConstraintOperands.emplace_back(args: std::move(CI));
5629 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5630
5631 // Update multiple alternative constraint count.
5632 if (OpInfo.multipleAlternatives.size() > maCount)
5633 maCount = OpInfo.multipleAlternatives.size();
5634
5635 OpInfo.ConstraintVT = MVT::Other;
5636
5637 // Compute the value type for each operand.
5638 switch (OpInfo.Type) {
5639 case InlineAsm::isOutput:
5640 // Indirect outputs just consume an argument.
5641 if (OpInfo.isIndirect) {
5642 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5643 break;
5644 }
5645
5646 // The return value of the call is this value. As such, there is no
5647 // corresponding argument.
5648 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5649 if (StructType *STy = dyn_cast<StructType>(Val: Call.getType())) {
5650 OpInfo.ConstraintVT =
5651 getSimpleValueType(DL, Ty: STy->getElementType(N: ResNo));
5652 } else {
5653 assert(ResNo == 0 && "Asm only has one result!");
5654 OpInfo.ConstraintVT =
5655 getAsmOperandValueType(DL, Ty: Call.getType()).getSimpleVT();
5656 }
5657 ++ResNo;
5658 break;
5659 case InlineAsm::isInput:
5660 OpInfo.CallOperandVal = Call.getArgOperand(i: ArgNo);
5661 break;
5662 case InlineAsm::isLabel:
5663 OpInfo.CallOperandVal = cast<CallBrInst>(Val: &Call)->getIndirectDest(i: LabelNo);
5664 ++LabelNo;
5665 continue;
5666 case InlineAsm::isClobber:
5667 // Nothing to do.
5668 break;
5669 }
5670
5671 if (OpInfo.CallOperandVal) {
5672 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5673 if (OpInfo.isIndirect) {
5674 OpTy = Call.getParamElementType(ArgNo);
5675 assert(OpTy && "Indirect operand must have elementtype attribute");
5676 }
5677
5678 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5679 if (StructType *STy = dyn_cast<StructType>(Val: OpTy))
5680 if (STy->getNumElements() == 1)
5681 OpTy = STy->getElementType(N: 0);
5682
5683 // If OpTy is not a single value, it may be a struct/union that we
5684 // can tile with integers.
5685 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5686 unsigned BitSize = DL.getTypeSizeInBits(Ty: OpTy);
5687 switch (BitSize) {
5688 default: break;
5689 case 1:
5690 case 8:
5691 case 16:
5692 case 32:
5693 case 64:
5694 case 128:
5695 OpTy = IntegerType::get(C&: OpTy->getContext(), NumBits: BitSize);
5696 break;
5697 }
5698 }
5699
5700 EVT VT = getAsmOperandValueType(DL, Ty: OpTy, AllowUnknown: true);
5701 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5702 ArgNo++;
5703 }
5704 }
5705
5706 // If we have multiple alternative constraints, select the best alternative.
5707 if (!ConstraintOperands.empty()) {
5708 if (maCount) {
5709 unsigned bestMAIndex = 0;
5710 int bestWeight = -1;
5711 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5712 int weight = -1;
5713 unsigned maIndex;
5714 // Compute the sums of the weights for each alternative, keeping track
5715 // of the best (highest weight) one so far.
5716 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5717 int weightSum = 0;
5718 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5719 cIndex != eIndex; ++cIndex) {
5720 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5721 if (OpInfo.Type == InlineAsm::isClobber)
5722 continue;
5723
5724 // If this is an output operand with a matching input operand,
5725 // look up the matching input. If their types mismatch, e.g. one
5726 // is an integer, the other is floating point, or their sizes are
5727 // different, flag it as an maCantMatch.
5728 if (OpInfo.hasMatchingInput()) {
5729 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5730 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5731 if ((OpInfo.ConstraintVT.isInteger() !=
5732 Input.ConstraintVT.isInteger()) ||
5733 (OpInfo.ConstraintVT.getSizeInBits() !=
5734 Input.ConstraintVT.getSizeInBits())) {
5735 weightSum = -1; // Can't match.
5736 break;
5737 }
5738 }
5739 }
5740 weight = getMultipleConstraintMatchWeight(info&: OpInfo, maIndex);
5741 if (weight == -1) {
5742 weightSum = -1;
5743 break;
5744 }
5745 weightSum += weight;
5746 }
5747 // Update best.
5748 if (weightSum > bestWeight) {
5749 bestWeight = weightSum;
5750 bestMAIndex = maIndex;
5751 }
5752 }
5753
5754 // Now select chosen alternative in each constraint.
5755 for (AsmOperandInfo &cInfo : ConstraintOperands)
5756 if (cInfo.Type != InlineAsm::isClobber)
5757 cInfo.selectAlternative(index: bestMAIndex);
5758 }
5759 }
5760
5761 // Check and hook up tied operands, choose constraint code to use.
5762 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5763 cIndex != eIndex; ++cIndex) {
5764 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5765
5766 // If this is an output operand with a matching input operand, look up the
5767 // matching input. If their types mismatch, e.g. one is an integer, the
5768 // other is floating point, or their sizes are different, flag it as an
5769 // error.
5770 if (OpInfo.hasMatchingInput()) {
5771 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5772
5773 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5774 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5775 getRegForInlineAsmConstraint(RI: TRI, Constraint: OpInfo.ConstraintCode,
5776 VT: OpInfo.ConstraintVT);
5777 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5778 getRegForInlineAsmConstraint(RI: TRI, Constraint: Input.ConstraintCode,
5779 VT: Input.ConstraintVT);
5780 if ((OpInfo.ConstraintVT.isInteger() !=
5781 Input.ConstraintVT.isInteger()) ||
5782 (MatchRC.second != InputRC.second)) {
5783 report_fatal_error(reason: "Unsupported asm: input constraint"
5784 " with a matching output constraint of"
5785 " incompatible type!");
5786 }
5787 }
5788 }
5789 }
5790
5791 return ConstraintOperands;
5792}
5793
5794/// Return a number indicating our preference for chosing a type of constraint
5795/// over another, for the purpose of sorting them. Immediates are almost always
5796/// preferrable (when they can be emitted). A higher return value means a
5797/// stronger preference for one constraint type relative to another.
5798/// FIXME: We should prefer registers over memory but doing so may lead to
5799/// unrecoverable register exhaustion later.
5800/// https://github.com/llvm/llvm-project/issues/20571
5801static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5802 switch (CT) {
5803 case TargetLowering::C_Immediate:
5804 case TargetLowering::C_Other:
5805 return 4;
5806 case TargetLowering::C_Memory:
5807 case TargetLowering::C_Address:
5808 return 3;
5809 case TargetLowering::C_RegisterClass:
5810 return 2;
5811 case TargetLowering::C_Register:
5812 return 1;
5813 case TargetLowering::C_Unknown:
5814 return 0;
5815 }
5816 llvm_unreachable("Invalid constraint type");
5817}
5818
5819/// Examine constraint type and operand type and determine a weight value.
5820/// This object must already have been set up with the operand type
5821/// and the current alternative constraint selected.
5822TargetLowering::ConstraintWeight
5823 TargetLowering::getMultipleConstraintMatchWeight(
5824 AsmOperandInfo &info, int maIndex) const {
5825 InlineAsm::ConstraintCodeVector *rCodes;
5826 if (maIndex >= (int)info.multipleAlternatives.size())
5827 rCodes = &info.Codes;
5828 else
5829 rCodes = &info.multipleAlternatives[maIndex].Codes;
5830 ConstraintWeight BestWeight = CW_Invalid;
5831
5832 // Loop over the options, keeping track of the most general one.
5833 for (const std::string &rCode : *rCodes) {
5834 ConstraintWeight weight =
5835 getSingleConstraintMatchWeight(info, constraint: rCode.c_str());
5836 if (weight > BestWeight)
5837 BestWeight = weight;
5838 }
5839
5840 return BestWeight;
5841}
5842
5843/// Examine constraint type and operand type and determine a weight value.
5844/// This object must already have been set up with the operand type
5845/// and the current alternative constraint selected.
5846TargetLowering::ConstraintWeight
5847 TargetLowering::getSingleConstraintMatchWeight(
5848 AsmOperandInfo &info, const char *constraint) const {
5849 ConstraintWeight weight = CW_Invalid;
5850 Value *CallOperandVal = info.CallOperandVal;
5851 // If we don't have a value, we can't do a match,
5852 // but allow it at the lowest weight.
5853 if (!CallOperandVal)
5854 return CW_Default;
5855 // Look at the constraint type.
5856 switch (*constraint) {
5857 case 'i': // immediate integer.
5858 case 'n': // immediate integer with a known value.
5859 if (isa<ConstantInt>(Val: CallOperandVal))
5860 weight = CW_Constant;
5861 break;
5862 case 's': // non-explicit intregal immediate.
5863 if (isa<GlobalValue>(Val: CallOperandVal))
5864 weight = CW_Constant;
5865 break;
5866 case 'E': // immediate float if host format.
5867 case 'F': // immediate float.
5868 if (isa<ConstantFP>(Val: CallOperandVal))
5869 weight = CW_Constant;
5870 break;
5871 case '<': // memory operand with autodecrement.
5872 case '>': // memory operand with autoincrement.
5873 case 'm': // memory operand.
5874 case 'o': // offsettable memory operand
5875 case 'V': // non-offsettable memory operand
5876 weight = CW_Memory;
5877 break;
5878 case 'r': // general register.
5879 case 'g': // general register, memory operand or immediate integer.
5880 // note: Clang converts "g" to "imr".
5881 if (CallOperandVal->getType()->isIntegerTy())
5882 weight = CW_Register;
5883 break;
5884 case 'X': // any operand.
5885 default:
5886 weight = CW_Default;
5887 break;
5888 }
5889 return weight;
5890}
5891
5892/// If there are multiple different constraints that we could pick for this
5893/// operand (e.g. "imr") try to pick the 'best' one.
5894/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5895/// into seven classes:
5896/// Register -> one specific register
5897/// RegisterClass -> a group of regs
5898/// Memory -> memory
5899/// Address -> a symbolic memory reference
5900/// Immediate -> immediate values
5901/// Other -> magic values (such as "Flag Output Operands")
5902/// Unknown -> something we don't recognize yet and can't handle
5903/// Ideally, we would pick the most specific constraint possible: if we have
5904/// something that fits into a register, we would pick it. The problem here
5905/// is that if we have something that could either be in a register or in
5906/// memory that use of the register could cause selection of *other*
5907/// operands to fail: they might only succeed if we pick memory. Because of
5908/// this the heuristic we use is:
5909///
5910/// 1) If there is an 'other' constraint, and if the operand is valid for
5911/// that constraint, use it. This makes us take advantage of 'i'
5912/// constraints when available.
5913/// 2) Otherwise, pick the most general constraint present. This prefers
5914/// 'm' over 'r', for example.
5915///
5916TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5917 TargetLowering::AsmOperandInfo &OpInfo) const {
5918 ConstraintGroup Ret;
5919
5920 Ret.reserve(N: OpInfo.Codes.size());
5921 for (StringRef Code : OpInfo.Codes) {
5922 TargetLowering::ConstraintType CType = getConstraintType(Constraint: Code);
5923
5924 // Indirect 'other' or 'immediate' constraints are not allowed.
5925 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5926 CType == TargetLowering::C_Register ||
5927 CType == TargetLowering::C_RegisterClass))
5928 continue;
5929
5930 // Things with matching constraints can only be registers, per gcc
5931 // documentation. This mainly affects "g" constraints.
5932 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5933 continue;
5934
5935 Ret.emplace_back(Args&: Code, Args&: CType);
5936 }
5937
5938 std::stable_sort(
5939 first: Ret.begin(), last: Ret.end(), comp: [](ConstraintPair a, ConstraintPair b) {
5940 return getConstraintPiority(CT: a.second) > getConstraintPiority(CT: b.second);
5941 });
5942
5943 return Ret;
5944}
5945
5946/// If we have an immediate, see if we can lower it. Return true if we can,
5947/// false otherwise.
5948static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5949 SDValue Op, SelectionDAG *DAG,
5950 const TargetLowering &TLI) {
5951
5952 assert((P.second == TargetLowering::C_Other ||
5953 P.second == TargetLowering::C_Immediate) &&
5954 "need immediate or other");
5955
5956 if (!Op.getNode())
5957 return false;
5958
5959 std::vector<SDValue> ResultOps;
5960 TLI.LowerAsmOperandForConstraint(Op, Constraint: P.first, Ops&: ResultOps, DAG&: *DAG);
5961 return !ResultOps.empty();
5962}
5963
5964/// Determines the constraint code and constraint type to use for the specific
5965/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5966void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5967 SDValue Op,
5968 SelectionDAG *DAG) const {
5969 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5970
5971 // Single-letter constraints ('r') are very common.
5972 if (OpInfo.Codes.size() == 1) {
5973 OpInfo.ConstraintCode = OpInfo.Codes[0];
5974 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
5975 } else {
5976 ConstraintGroup G = getConstraintPreferences(OpInfo);
5977 if (G.empty())
5978 return;
5979
5980 unsigned BestIdx = 0;
5981 for (const unsigned E = G.size();
5982 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
5983 G[BestIdx].second == TargetLowering::C_Immediate);
5984 ++BestIdx) {
5985 if (lowerImmediateIfPossible(P&: G[BestIdx], Op, DAG, TLI: *this))
5986 break;
5987 // If we're out of constraints, just pick the first one.
5988 if (BestIdx + 1 == E) {
5989 BestIdx = 0;
5990 break;
5991 }
5992 }
5993
5994 OpInfo.ConstraintCode = G[BestIdx].first;
5995 OpInfo.ConstraintType = G[BestIdx].second;
5996 }
5997
5998 // 'X' matches anything.
5999 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6000 // Constants are handled elsewhere. For Functions, the type here is the
6001 // type of the result, which is not what we want to look at; leave them
6002 // alone.
6003 Value *v = OpInfo.CallOperandVal;
6004 if (isa<ConstantInt>(Val: v) || isa<Function>(Val: v)) {
6005 return;
6006 }
6007
6008 if (isa<BasicBlock>(Val: v) || isa<BlockAddress>(Val: v)) {
6009 OpInfo.ConstraintCode = "i";
6010 return;
6011 }
6012
6013 // Otherwise, try to resolve it to something we know about by looking at
6014 // the actual operand type.
6015 if (const char *Repl = LowerXConstraint(ConstraintVT: OpInfo.ConstraintVT)) {
6016 OpInfo.ConstraintCode = Repl;
6017 OpInfo.ConstraintType = getConstraintType(Constraint: OpInfo.ConstraintCode);
6018 }
6019 }
6020}
6021
6022/// Given an exact SDIV by a constant, create a multiplication
6023/// with the multiplicative inverse of the constant.
6024static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6025 const SDLoc &dl, SelectionDAG &DAG,
6026 SmallVectorImpl<SDNode *> &Created) {
6027 SDValue Op0 = N->getOperand(Num: 0);
6028 SDValue Op1 = N->getOperand(Num: 1);
6029 EVT VT = N->getValueType(ResNo: 0);
6030 EVT SVT = VT.getScalarType();
6031 EVT ShVT = TLI.getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6032 EVT ShSVT = ShVT.getScalarType();
6033
6034 bool UseSRA = false;
6035 SmallVector<SDValue, 16> Shifts, Factors;
6036
6037 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6038 if (C->isZero())
6039 return false;
6040 APInt Divisor = C->getAPIntValue();
6041 unsigned Shift = Divisor.countr_zero();
6042 if (Shift) {
6043 Divisor.ashrInPlace(ShiftAmt: Shift);
6044 UseSRA = true;
6045 }
6046 // Calculate the multiplicative inverse, using Newton's method.
6047 APInt t;
6048 APInt Factor = Divisor;
6049 while ((t = Divisor * Factor) != 1)
6050 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
6051 Shifts.push_back(Elt: DAG.getConstant(Val: Shift, DL: dl, VT: ShSVT));
6052 Factors.push_back(Elt: DAG.getConstant(Val: Factor, DL: dl, VT: SVT));
6053 return true;
6054 };
6055
6056 // Collect all magic values from the build vector.
6057 if (!ISD::matchUnaryPredicate(Op: Op1, Match: BuildSDIVPattern))
6058 return SDValue();
6059
6060 SDValue Shift, Factor;
6061 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6062 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6063 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6064 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6065 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6066 "Expected matchUnaryPredicate to return one element for scalable "
6067 "vectors");
6068 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6069 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6070 } else {
6071 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6072 Shift = Shifts[0];
6073 Factor = Factors[0];
6074 }
6075
6076 SDValue Res = Op0;
6077
6078 // Shift the value upfront if it is even, so the LSB is one.
6079 if (UseSRA) {
6080 // TODO: For UDIV use SRL instead of SRA.
6081 SDNodeFlags Flags;
6082 Flags.setExact(true);
6083 Res = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Res, N2: Shift, Flags);
6084 Created.push_back(Elt: Res.getNode());
6085 }
6086
6087 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Res, N2: Factor);
6088}
6089
6090SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6091 SelectionDAG &DAG,
6092 SmallVectorImpl<SDNode *> &Created) const {
6093 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6094 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6095 if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6096 return SDValue(N, 0); // Lower SDIV as SDIV
6097 return SDValue();
6098}
6099
6100SDValue
6101TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6102 SelectionDAG &DAG,
6103 SmallVectorImpl<SDNode *> &Created) const {
6104 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6105 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6106 if (TLI.isIntDivCheap(VT: N->getValueType(ResNo: 0), Attr))
6107 return SDValue(N, 0); // Lower SREM as SREM
6108 return SDValue();
6109}
6110
6111/// Build sdiv by power-of-2 with conditional move instructions
6112/// Ref: "Hacker's Delight" by Henry Warren 10-1
6113/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6114/// bgez x, label
6115/// add x, x, 2**k-1
6116/// label:
6117/// sra res, x, k
6118/// neg res, res (when the divisor is negative)
6119SDValue TargetLowering::buildSDIVPow2WithCMov(
6120 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6121 SmallVectorImpl<SDNode *> &Created) const {
6122 unsigned Lg2 = Divisor.countr_zero();
6123 EVT VT = N->getValueType(ResNo: 0);
6124
6125 SDLoc DL(N);
6126 SDValue N0 = N->getOperand(Num: 0);
6127 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
6128 APInt Lg2Mask = APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: Lg2);
6129 SDValue Pow2MinusOne = DAG.getConstant(Val: Lg2Mask, DL, VT);
6130
6131 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6132 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6133 SDValue Cmp = DAG.getSetCC(DL, VT: CCVT, LHS: N0, RHS: Zero, Cond: ISD::SETLT);
6134 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: N0, N2: Pow2MinusOne);
6135 SDValue CMov = DAG.getNode(Opcode: ISD::SELECT, DL, VT, N1: Cmp, N2: Add, N3: N0);
6136
6137 Created.push_back(Elt: Cmp.getNode());
6138 Created.push_back(Elt: Add.getNode());
6139 Created.push_back(Elt: CMov.getNode());
6140
6141 // Divide by pow2.
6142 SDValue SRA =
6143 DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: CMov, N2: DAG.getConstant(Val: Lg2, DL, VT));
6144
6145 // If we're dividing by a positive value, we're done. Otherwise, we must
6146 // negate the result.
6147 if (Divisor.isNonNegative())
6148 return SRA;
6149
6150 Created.push_back(Elt: SRA.getNode());
6151 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: SRA);
6152}
6153
6154/// Given an ISD::SDIV node expressing a divide by constant,
6155/// return a DAG expression to select that will generate the same value by
6156/// multiplying by a magic number.
6157/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6158SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6159 bool IsAfterLegalization,
6160 SmallVectorImpl<SDNode *> &Created) const {
6161 SDLoc dl(N);
6162 EVT VT = N->getValueType(ResNo: 0);
6163 EVT SVT = VT.getScalarType();
6164 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6165 EVT ShSVT = ShVT.getScalarType();
6166 unsigned EltBits = VT.getScalarSizeInBits();
6167 EVT MulVT;
6168
6169 // Check to see if we can do this.
6170 // FIXME: We should be more aggressive here.
6171 if (!isTypeLegal(VT)) {
6172 // Limit this to simple scalars for now.
6173 if (VT.isVector() || !VT.isSimple())
6174 return SDValue();
6175
6176 // If this type will be promoted to a large enough type with a legal
6177 // multiply operation, we can go ahead and do this transform.
6178 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6179 return SDValue();
6180
6181 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6182 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6183 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6184 return SDValue();
6185 }
6186
6187 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6188 if (N->getFlags().hasExact())
6189 return BuildExactSDIV(TLI: *this, N, dl, DAG, Created);
6190
6191 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6192
6193 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6194 if (C->isZero())
6195 return false;
6196
6197 const APInt &Divisor = C->getAPIntValue();
6198 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(D: Divisor);
6199 int NumeratorFactor = 0;
6200 int ShiftMask = -1;
6201
6202 if (Divisor.isOne() || Divisor.isAllOnes()) {
6203 // If d is +1/-1, we just multiply the numerator by +1/-1.
6204 NumeratorFactor = Divisor.getSExtValue();
6205 magics.Magic = 0;
6206 magics.ShiftAmount = 0;
6207 ShiftMask = 0;
6208 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6209 // If d > 0 and m < 0, add the numerator.
6210 NumeratorFactor = 1;
6211 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6212 // If d < 0 and m > 0, subtract the numerator.
6213 NumeratorFactor = -1;
6214 }
6215
6216 MagicFactors.push_back(Elt: DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT));
6217 Factors.push_back(Elt: DAG.getConstant(Val: NumeratorFactor, DL: dl, VT: SVT));
6218 Shifts.push_back(Elt: DAG.getConstant(Val: magics.ShiftAmount, DL: dl, VT: ShSVT));
6219 ShiftMasks.push_back(Elt: DAG.getConstant(Val: ShiftMask, DL: dl, VT: SVT));
6220 return true;
6221 };
6222
6223 SDValue N0 = N->getOperand(Num: 0);
6224 SDValue N1 = N->getOperand(Num: 1);
6225
6226 // Collect the shifts / magic values from each element.
6227 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildSDIVPattern))
6228 return SDValue();
6229
6230 SDValue MagicFactor, Factor, Shift, ShiftMask;
6231 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6232 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6233 Factor = DAG.getBuildVector(VT, DL: dl, Ops: Factors);
6234 Shift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: Shifts);
6235 ShiftMask = DAG.getBuildVector(VT, DL: dl, Ops: ShiftMasks);
6236 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6237 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6238 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6239 "Expected matchUnaryPredicate to return one element for scalable "
6240 "vectors");
6241 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6242 Factor = DAG.getSplatVector(VT, DL: dl, Op: Factors[0]);
6243 Shift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: Shifts[0]);
6244 ShiftMask = DAG.getSplatVector(VT, DL: dl, Op: ShiftMasks[0]);
6245 } else {
6246 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6247 MagicFactor = MagicFactors[0];
6248 Factor = Factors[0];
6249 Shift = Shifts[0];
6250 ShiftMask = ShiftMasks[0];
6251 }
6252
6253 // Multiply the numerator (operand 0) by the magic value.
6254 // FIXME: We should support doing a MUL in a wider type.
6255 auto GetMULHS = [&](SDValue X, SDValue Y) {
6256 // If the type isn't legal, use a wider mul of the type calculated
6257 // earlier.
6258 if (!isTypeLegal(VT)) {
6259 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: X);
6260 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6261 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6262 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6263 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6264 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6265 }
6266
6267 if (isOperationLegalOrCustom(Op: ISD::MULHS, VT, LegalOnly: IsAfterLegalization))
6268 return DAG.getNode(Opcode: ISD::MULHS, DL: dl, VT, N1: X, N2: Y);
6269 if (isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6270 SDValue LoHi =
6271 DAG.getNode(Opcode: ISD::SMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6272 return SDValue(LoHi.getNode(), 1);
6273 }
6274 // If type twice as wide legal, widen and use a mul plus a shift.
6275 unsigned Size = VT.getScalarSizeInBits();
6276 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6277 if (VT.isVector())
6278 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6279 EC: VT.getVectorElementCount());
6280 if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6281 X = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: X);
6282 Y = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6283 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6284 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6285 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6286 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6287 }
6288 return SDValue();
6289 };
6290
6291 SDValue Q = GetMULHS(N0, MagicFactor);
6292 if (!Q)
6293 return SDValue();
6294
6295 Created.push_back(Elt: Q.getNode());
6296
6297 // (Optionally) Add/subtract the numerator using Factor.
6298 Factor = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: N0, N2: Factor);
6299 Created.push_back(Elt: Factor.getNode());
6300 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: Factor);
6301 Created.push_back(Elt: Q.getNode());
6302
6303 // Shift right algebraic by shift value.
6304 Q = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Q, N2: Shift);
6305 Created.push_back(Elt: Q.getNode());
6306
6307 // Extract the sign bit, mask it and add it to the quotient.
6308 SDValue SignShift = DAG.getConstant(Val: EltBits - 1, DL: dl, VT: ShVT);
6309 SDValue T = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: SignShift);
6310 Created.push_back(Elt: T.getNode());
6311 T = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: ShiftMask);
6312 Created.push_back(Elt: T.getNode());
6313 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Q, N2: T);
6314}
6315
6316/// Given an ISD::UDIV node expressing a divide by constant,
6317/// return a DAG expression to select that will generate the same value by
6318/// multiplying by a magic number.
6319/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6320SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6321 bool IsAfterLegalization,
6322 SmallVectorImpl<SDNode *> &Created) const {
6323 SDLoc dl(N);
6324 EVT VT = N->getValueType(ResNo: 0);
6325 EVT SVT = VT.getScalarType();
6326 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
6327 EVT ShSVT = ShVT.getScalarType();
6328 unsigned EltBits = VT.getScalarSizeInBits();
6329 EVT MulVT;
6330
6331 // Check to see if we can do this.
6332 // FIXME: We should be more aggressive here.
6333 if (!isTypeLegal(VT)) {
6334 // Limit this to simple scalars for now.
6335 if (VT.isVector() || !VT.isSimple())
6336 return SDValue();
6337
6338 // If this type will be promoted to a large enough type with a legal
6339 // multiply operation, we can go ahead and do this transform.
6340 if (getTypeAction(VT: VT.getSimpleVT()) != TypePromoteInteger)
6341 return SDValue();
6342
6343 MulVT = getTypeToTransformTo(Context&: *DAG.getContext(), VT);
6344 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6345 !isOperationLegal(Op: ISD::MUL, VT: MulVT))
6346 return SDValue();
6347 }
6348
6349 SDValue N0 = N->getOperand(Num: 0);
6350 SDValue N1 = N->getOperand(Num: 1);
6351
6352 // Try to use leading zeros of the dividend to reduce the multiplier and
6353 // avoid expensive fixups.
6354 // TODO: Support vectors.
6355 unsigned LeadingZeros = 0;
6356 if (!VT.isVector() && isa<ConstantSDNode>(Val: N1)) {
6357 assert(!isOneConstant(N1) && "Unexpected divisor");
6358 LeadingZeros = DAG.computeKnownBits(Op: N0).countMinLeadingZeros();
6359 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6360 // the dividend exceeds the leading zeros for the divisor.
6361 LeadingZeros = std::min(a: LeadingZeros, b: N1->getAsAPIntVal().countl_zero());
6362 }
6363
6364 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6365 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6366
6367 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6368 if (C->isZero())
6369 return false;
6370 const APInt& Divisor = C->getAPIntValue();
6371
6372 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6373
6374 // Magic algorithm doesn't work for division by 1. We need to emit a select
6375 // at the end.
6376 if (Divisor.isOne()) {
6377 PreShift = PostShift = DAG.getUNDEF(VT: ShSVT);
6378 MagicFactor = NPQFactor = DAG.getUNDEF(VT: SVT);
6379 } else {
6380 UnsignedDivisionByConstantInfo magics =
6381 UnsignedDivisionByConstantInfo::get(D: Divisor, LeadingZeros);
6382
6383 MagicFactor = DAG.getConstant(Val: magics.Magic, DL: dl, VT: SVT);
6384
6385 assert(magics.PreShift < Divisor.getBitWidth() &&
6386 "We shouldn't generate an undefined shift!");
6387 assert(magics.PostShift < Divisor.getBitWidth() &&
6388 "We shouldn't generate an undefined shift!");
6389 assert((!magics.IsAdd || magics.PreShift == 0) &&
6390 "Unexpected pre-shift");
6391 PreShift = DAG.getConstant(Val: magics.PreShift, DL: dl, VT: ShSVT);
6392 PostShift = DAG.getConstant(Val: magics.PostShift, DL: dl, VT: ShSVT);
6393 NPQFactor = DAG.getConstant(
6394 Val: magics.IsAdd ? APInt::getOneBitSet(numBits: EltBits, BitNo: EltBits - 1)
6395 : APInt::getZero(numBits: EltBits),
6396 DL: dl, VT: SVT);
6397 UseNPQ |= magics.IsAdd;
6398 UsePreShift |= magics.PreShift != 0;
6399 UsePostShift |= magics.PostShift != 0;
6400 }
6401
6402 PreShifts.push_back(Elt: PreShift);
6403 MagicFactors.push_back(Elt: MagicFactor);
6404 NPQFactors.push_back(Elt: NPQFactor);
6405 PostShifts.push_back(Elt: PostShift);
6406 return true;
6407 };
6408
6409 // Collect the shifts/magic values from each element.
6410 if (!ISD::matchUnaryPredicate(Op: N1, Match: BuildUDIVPattern))
6411 return SDValue();
6412
6413 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6414 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6415 PreShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PreShifts);
6416 MagicFactor = DAG.getBuildVector(VT, DL: dl, Ops: MagicFactors);
6417 NPQFactor = DAG.getBuildVector(VT, DL: dl, Ops: NPQFactors);
6418 PostShift = DAG.getBuildVector(VT: ShVT, DL: dl, Ops: PostShifts);
6419 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6420 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6421 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6422 "Expected matchUnaryPredicate to return one for scalable vectors");
6423 PreShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PreShifts[0]);
6424 MagicFactor = DAG.getSplatVector(VT, DL: dl, Op: MagicFactors[0]);
6425 NPQFactor = DAG.getSplatVector(VT, DL: dl, Op: NPQFactors[0]);
6426 PostShift = DAG.getSplatVector(VT: ShVT, DL: dl, Op: PostShifts[0]);
6427 } else {
6428 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6429 PreShift = PreShifts[0];
6430 MagicFactor = MagicFactors[0];
6431 PostShift = PostShifts[0];
6432 }
6433
6434 SDValue Q = N0;
6435 if (UsePreShift) {
6436 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PreShift);
6437 Created.push_back(Elt: Q.getNode());
6438 }
6439
6440 // FIXME: We should support doing a MUL in a wider type.
6441 auto GetMULHU = [&](SDValue X, SDValue Y) {
6442 // If the type isn't legal, use a wider mul of the type calculated
6443 // earlier.
6444 if (!isTypeLegal(VT)) {
6445 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: X);
6446 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: MulVT, Operand: Y);
6447 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MulVT, N1: X, N2: Y);
6448 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MulVT, N1: Y,
6449 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: MulVT, DL: dl));
6450 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6451 }
6452
6453 if (isOperationLegalOrCustom(Op: ISD::MULHU, VT, LegalOnly: IsAfterLegalization))
6454 return DAG.getNode(Opcode: ISD::MULHU, DL: dl, VT, N1: X, N2: Y);
6455 if (isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT, LegalOnly: IsAfterLegalization)) {
6456 SDValue LoHi =
6457 DAG.getNode(Opcode: ISD::UMUL_LOHI, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: X, N2: Y);
6458 return SDValue(LoHi.getNode(), 1);
6459 }
6460 // If type twice as wide legal, widen and use a mul plus a shift.
6461 unsigned Size = VT.getScalarSizeInBits();
6462 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: Size * 2);
6463 if (VT.isVector())
6464 WideVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT,
6465 EC: VT.getVectorElementCount());
6466 if (isOperationLegalOrCustom(Op: ISD::MUL, VT: WideVT)) {
6467 X = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: X);
6468 Y = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT: WideVT, Operand: Y);
6469 Y = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: X, N2: Y);
6470 Y = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Y,
6471 N2: DAG.getShiftAmountConstant(Val: EltBits, VT: WideVT, DL: dl));
6472 return DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Y);
6473 }
6474 return SDValue(); // No mulhu or equivalent
6475 };
6476
6477 // Multiply the numerator (operand 0) by the magic value.
6478 Q = GetMULHU(Q, MagicFactor);
6479 if (!Q)
6480 return SDValue();
6481
6482 Created.push_back(Elt: Q.getNode());
6483
6484 if (UseNPQ) {
6485 SDValue NPQ = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: N0, N2: Q);
6486 Created.push_back(Elt: NPQ.getNode());
6487
6488 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6489 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6490 if (VT.isVector())
6491 NPQ = GetMULHU(NPQ, NPQFactor);
6492 else
6493 NPQ = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: NPQ, N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT));
6494
6495 Created.push_back(Elt: NPQ.getNode());
6496
6497 Q = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: NPQ, N2: Q);
6498 Created.push_back(Elt: Q.getNode());
6499 }
6500
6501 if (UsePostShift) {
6502 Q = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Q, N2: PostShift);
6503 Created.push_back(Elt: Q.getNode());
6504 }
6505
6506 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
6507
6508 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT);
6509 SDValue IsOne = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: N1, RHS: One, Cond: ISD::SETEQ);
6510 return DAG.getSelect(DL: dl, VT, Cond: IsOne, LHS: N0, RHS: Q);
6511}
6512
6513/// If all values in Values that *don't* match the predicate are same 'splat'
6514/// value, then replace all values with that splat value.
6515/// Else, if AlternativeReplacement was provided, then replace all values that
6516/// do match predicate with AlternativeReplacement value.
6517static void
6518turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6519 std::function<bool(SDValue)> Predicate,
6520 SDValue AlternativeReplacement = SDValue()) {
6521 SDValue Replacement;
6522 // Is there a value for which the Predicate does *NOT* match? What is it?
6523 auto SplatValue = llvm::find_if_not(Range&: Values, P: Predicate);
6524 if (SplatValue != Values.end()) {
6525 // Does Values consist only of SplatValue's and values matching Predicate?
6526 if (llvm::all_of(Range&: Values, P: [Predicate, SplatValue](SDValue Value) {
6527 return Value == *SplatValue || Predicate(Value);
6528 })) // Then we shall replace values matching predicate with SplatValue.
6529 Replacement = *SplatValue;
6530 }
6531 if (!Replacement) {
6532 // Oops, we did not find the "baseline" splat value.
6533 if (!AlternativeReplacement)
6534 return; // Nothing to do.
6535 // Let's replace with provided value then.
6536 Replacement = AlternativeReplacement;
6537 }
6538 std::replace_if(first: Values.begin(), last: Values.end(), pred: Predicate, new_value: Replacement);
6539}
6540
6541/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6542/// where the divisor is constant and the comparison target is zero,
6543/// return a DAG expression that will generate the same comparison result
6544/// using only multiplications, additions and shifts/rotations.
6545/// Ref: "Hacker's Delight" 10-17.
6546SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6547 SDValue CompTargetNode,
6548 ISD::CondCode Cond,
6549 DAGCombinerInfo &DCI,
6550 const SDLoc &DL) const {
6551 SmallVector<SDNode *, 5> Built;
6552 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6553 DCI, DL, Created&: Built)) {
6554 for (SDNode *N : Built)
6555 DCI.AddToWorklist(N);
6556 return Folded;
6557 }
6558
6559 return SDValue();
6560}
6561
6562SDValue
6563TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6564 SDValue CompTargetNode, ISD::CondCode Cond,
6565 DAGCombinerInfo &DCI, const SDLoc &DL,
6566 SmallVectorImpl<SDNode *> &Created) const {
6567 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6568 // - D must be constant, with D = D0 * 2^K where D0 is odd
6569 // - P is the multiplicative inverse of D0 modulo 2^W
6570 // - Q = floor(((2^W) - 1) / D)
6571 // where W is the width of the common type of N and D.
6572 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6573 "Only applicable for (in)equality comparisons.");
6574
6575 SelectionDAG &DAG = DCI.DAG;
6576
6577 EVT VT = REMNode.getValueType();
6578 EVT SVT = VT.getScalarType();
6579 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6580 EVT ShSVT = ShVT.getScalarType();
6581
6582 // If MUL is unavailable, we cannot proceed in any case.
6583 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6584 return SDValue();
6585
6586 bool ComparingWithAllZeros = true;
6587 bool AllComparisonsWithNonZerosAreTautological = true;
6588 bool HadTautologicalLanes = false;
6589 bool AllLanesAreTautological = true;
6590 bool HadEvenDivisor = false;
6591 bool AllDivisorsArePowerOfTwo = true;
6592 bool HadTautologicalInvertedLanes = false;
6593 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6594
6595 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6596 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6597 if (CDiv->isZero())
6598 return false;
6599
6600 const APInt &D = CDiv->getAPIntValue();
6601 const APInt &Cmp = CCmp->getAPIntValue();
6602
6603 ComparingWithAllZeros &= Cmp.isZero();
6604
6605 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6606 // if C2 is not less than C1, the comparison is always false.
6607 // But we will only be able to produce the comparison that will give the
6608 // opposive tautological answer. So this lane would need to be fixed up.
6609 bool TautologicalInvertedLane = D.ule(RHS: Cmp);
6610 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6611
6612 // If all lanes are tautological (either all divisors are ones, or divisor
6613 // is not greater than the constant we are comparing with),
6614 // we will prefer to avoid the fold.
6615 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6616 HadTautologicalLanes |= TautologicalLane;
6617 AllLanesAreTautological &= TautologicalLane;
6618
6619 // If we are comparing with non-zero, we need'll need to subtract said
6620 // comparison value from the LHS. But there is no point in doing that if
6621 // every lane where we are comparing with non-zero is tautological..
6622 if (!Cmp.isZero())
6623 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6624
6625 // Decompose D into D0 * 2^K
6626 unsigned K = D.countr_zero();
6627 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6628 APInt D0 = D.lshr(shiftAmt: K);
6629
6630 // D is even if it has trailing zeros.
6631 HadEvenDivisor |= (K != 0);
6632 // D is a power-of-two if D0 is one.
6633 // If all divisors are power-of-two, we will prefer to avoid the fold.
6634 AllDivisorsArePowerOfTwo &= D0.isOne();
6635
6636 // P = inv(D0, 2^W)
6637 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6638 unsigned W = D.getBitWidth();
6639 APInt P = D0.zext(width: W + 1)
6640 .multiplicativeInverse(modulo: APInt::getSignedMinValue(numBits: W + 1))
6641 .trunc(width: W);
6642 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6643 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6644
6645 // Q = floor((2^W - 1) u/ D)
6646 // R = ((2^W - 1) u% D)
6647 APInt Q, R;
6648 APInt::udivrem(LHS: APInt::getAllOnes(numBits: W), RHS: D, Quotient&: Q, Remainder&: R);
6649
6650 // If we are comparing with zero, then that comparison constant is okay,
6651 // else it may need to be one less than that.
6652 if (Cmp.ugt(RHS: R))
6653 Q -= 1;
6654
6655 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6656 "We are expecting that K is always less than all-ones for ShSVT");
6657
6658 // If the lane is tautological the result can be constant-folded.
6659 if (TautologicalLane) {
6660 // Set P and K amount to a bogus values so we can try to splat them.
6661 P = 0;
6662 K = -1;
6663 // And ensure that comparison constant is tautological,
6664 // it will always compare true/false.
6665 Q = -1;
6666 }
6667
6668 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6669 KAmts.push_back(
6670 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6671 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6672 return true;
6673 };
6674
6675 SDValue N = REMNode.getOperand(i: 0);
6676 SDValue D = REMNode.getOperand(i: 1);
6677
6678 // Collect the values from each element.
6679 if (!ISD::matchBinaryPredicate(LHS: D, RHS: CompTargetNode, Match: BuildUREMPattern))
6680 return SDValue();
6681
6682 // If all lanes are tautological, the result can be constant-folded.
6683 if (AllLanesAreTautological)
6684 return SDValue();
6685
6686 // If this is a urem by a powers-of-two, avoid the fold since it can be
6687 // best implemented as a bit test.
6688 if (AllDivisorsArePowerOfTwo)
6689 return SDValue();
6690
6691 SDValue PVal, KVal, QVal;
6692 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6693 if (HadTautologicalLanes) {
6694 // Try to turn PAmts into a splat, since we don't care about the values
6695 // that are currently '0'. If we can't, just keep '0'`s.
6696 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6697 // Try to turn KAmts into a splat, since we don't care about the values
6698 // that are currently '-1'. If we can't, change them to '0'`s.
6699 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6700 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
6701 }
6702
6703 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6704 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6705 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6706 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6707 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6708 "Expected matchBinaryPredicate to return one element for "
6709 "SPLAT_VECTORs");
6710 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
6711 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
6712 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
6713 } else {
6714 PVal = PAmts[0];
6715 KVal = KAmts[0];
6716 QVal = QAmts[0];
6717 }
6718
6719 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6720 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::SUB, VT))
6721 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6722 assert(CompTargetNode.getValueType() == N.getValueType() &&
6723 "Expecting that the types on LHS and RHS of comparisons match.");
6724 N = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: N, N2: CompTargetNode);
6725 }
6726
6727 // (mul N, P)
6728 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6729 Created.push_back(Elt: Op0.getNode());
6730
6731 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6732 // divisors as a performance improvement, since rotating by 0 is a no-op.
6733 if (HadEvenDivisor) {
6734 // We need ROTR to do this.
6735 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
6736 return SDValue();
6737 // UREM: (rotr (mul N, P), K)
6738 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
6739 Created.push_back(Elt: Op0.getNode());
6740 }
6741
6742 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6743 SDValue NewCC =
6744 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
6745 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6746 if (!HadTautologicalInvertedLanes)
6747 return NewCC;
6748
6749 // If any lanes previously compared always-false, the NewCC will give
6750 // always-true result for them, so we need to fixup those lanes.
6751 // Or the other way around for inequality predicate.
6752 assert(VT.isVector() && "Can/should only get here for vectors.");
6753 Created.push_back(Elt: NewCC.getNode());
6754
6755 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6756 // if C2 is not less than C1, the comparison is always false.
6757 // But we have produced the comparison that will give the
6758 // opposive tautological answer. So these lanes would need to be fixed up.
6759 SDValue TautologicalInvertedChannels =
6760 DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: CompTargetNode, Cond: ISD::SETULE);
6761 Created.push_back(Elt: TautologicalInvertedChannels.getNode());
6762
6763 // NOTE: we avoid letting illegal types through even if we're before legalize
6764 // ops – legalization has a hard time producing good code for this.
6765 if (isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT)) {
6766 // If we have a vector select, let's replace the comparison results in the
6767 // affected lanes with the correct tautological result.
6768 SDValue Replacement = DAG.getBoolConstant(V: Cond == ISD::SETEQ ? false : true,
6769 DL, VT: SETCCVT, OpVT: SETCCVT);
6770 return DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: TautologicalInvertedChannels,
6771 N2: Replacement, N3: NewCC);
6772 }
6773
6774 // Else, we can just invert the comparison result in the appropriate lanes.
6775 //
6776 // NOTE: see the note above VSELECT above.
6777 if (isOperationLegalOrCustom(Op: ISD::XOR, VT: SETCCVT))
6778 return DAG.getNode(Opcode: ISD::XOR, DL, VT: SETCCVT, N1: NewCC,
6779 N2: TautologicalInvertedChannels);
6780
6781 return SDValue(); // Don't know how to lower.
6782}
6783
6784/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6785/// where the divisor is constant and the comparison target is zero,
6786/// return a DAG expression that will generate the same comparison result
6787/// using only multiplications, additions and shifts/rotations.
6788/// Ref: "Hacker's Delight" 10-17.
6789SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6790 SDValue CompTargetNode,
6791 ISD::CondCode Cond,
6792 DAGCombinerInfo &DCI,
6793 const SDLoc &DL) const {
6794 SmallVector<SDNode *, 7> Built;
6795 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6796 DCI, DL, Created&: Built)) {
6797 assert(Built.size() <= 7 && "Max size prediction failed.");
6798 for (SDNode *N : Built)
6799 DCI.AddToWorklist(N);
6800 return Folded;
6801 }
6802
6803 return SDValue();
6804}
6805
6806SDValue
6807TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6808 SDValue CompTargetNode, ISD::CondCode Cond,
6809 DAGCombinerInfo &DCI, const SDLoc &DL,
6810 SmallVectorImpl<SDNode *> &Created) const {
6811 // Fold:
6812 // (seteq/ne (srem N, D), 0)
6813 // To:
6814 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6815 //
6816 // - D must be constant, with D = D0 * 2^K where D0 is odd
6817 // - P is the multiplicative inverse of D0 modulo 2^W
6818 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6819 // - Q = floor((2 * A) / (2^K))
6820 // where W is the width of the common type of N and D.
6821 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6822 "Only applicable for (in)equality comparisons.");
6823
6824 SelectionDAG &DAG = DCI.DAG;
6825
6826 EVT VT = REMNode.getValueType();
6827 EVT SVT = VT.getScalarType();
6828 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout(), LegalTypes: !DCI.isBeforeLegalize());
6829 EVT ShSVT = ShVT.getScalarType();
6830
6831 // If we are after ops legalization, and MUL is unavailable, we can not
6832 // proceed.
6833 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::MUL, VT))
6834 return SDValue();
6835
6836 // TODO: Could support comparing with non-zero too.
6837 ConstantSDNode *CompTarget = isConstOrConstSplat(N: CompTargetNode);
6838 if (!CompTarget || !CompTarget->isZero())
6839 return SDValue();
6840
6841 bool HadIntMinDivisor = false;
6842 bool HadOneDivisor = false;
6843 bool AllDivisorsAreOnes = true;
6844 bool HadEvenDivisor = false;
6845 bool NeedToApplyOffset = false;
6846 bool AllDivisorsArePowerOfTwo = true;
6847 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6848
6849 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6850 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6851 if (C->isZero())
6852 return false;
6853
6854 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6855
6856 // WARNING: this fold is only valid for positive divisors!
6857 APInt D = C->getAPIntValue();
6858 if (D.isNegative())
6859 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6860
6861 HadIntMinDivisor |= D.isMinSignedValue();
6862
6863 // If all divisors are ones, we will prefer to avoid the fold.
6864 HadOneDivisor |= D.isOne();
6865 AllDivisorsAreOnes &= D.isOne();
6866
6867 // Decompose D into D0 * 2^K
6868 unsigned K = D.countr_zero();
6869 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6870 APInt D0 = D.lshr(shiftAmt: K);
6871
6872 if (!D.isMinSignedValue()) {
6873 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6874 // we don't care about this lane in this fold, we'll special-handle it.
6875 HadEvenDivisor |= (K != 0);
6876 }
6877
6878 // D is a power-of-two if D0 is one. This includes INT_MIN.
6879 // If all divisors are power-of-two, we will prefer to avoid the fold.
6880 AllDivisorsArePowerOfTwo &= D0.isOne();
6881
6882 // P = inv(D0, 2^W)
6883 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6884 unsigned W = D.getBitWidth();
6885 APInt P = D0.zext(width: W + 1)
6886 .multiplicativeInverse(modulo: APInt::getSignedMinValue(numBits: W + 1))
6887 .trunc(width: W);
6888 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6889 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6890
6891 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6892 APInt A = APInt::getSignedMaxValue(numBits: W).udiv(RHS: D0);
6893 A.clearLowBits(loBits: K);
6894
6895 if (!D.isMinSignedValue()) {
6896 // If divisor INT_MIN, then we don't care about this lane in this fold,
6897 // we'll special-handle it.
6898 NeedToApplyOffset |= A != 0;
6899 }
6900
6901 // Q = floor((2 * A) / (2^K))
6902 APInt Q = (2 * A).udiv(RHS: APInt::getOneBitSet(numBits: W, BitNo: K));
6903
6904 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6905 "We are expecting that A is always less than all-ones for SVT");
6906 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6907 "We are expecting that K is always less than all-ones for ShSVT");
6908
6909 // If the divisor is 1 the result can be constant-folded. Likewise, we
6910 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6911 if (D.isOne()) {
6912 // Set P, A and K to a bogus values so we can try to splat them.
6913 P = 0;
6914 A = -1;
6915 K = -1;
6916
6917 // x ?% 1 == 0 <--> true <--> x u<= -1
6918 Q = -1;
6919 }
6920
6921 PAmts.push_back(Elt: DAG.getConstant(Val: P, DL, VT: SVT));
6922 AAmts.push_back(Elt: DAG.getConstant(Val: A, DL, VT: SVT));
6923 KAmts.push_back(
6924 Elt: DAG.getConstant(Val: APInt(ShSVT.getSizeInBits(), K), DL, VT: ShSVT));
6925 QAmts.push_back(Elt: DAG.getConstant(Val: Q, DL, VT: SVT));
6926 return true;
6927 };
6928
6929 SDValue N = REMNode.getOperand(i: 0);
6930 SDValue D = REMNode.getOperand(i: 1);
6931
6932 // Collect the values from each element.
6933 if (!ISD::matchUnaryPredicate(Op: D, Match: BuildSREMPattern))
6934 return SDValue();
6935
6936 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6937 if (AllDivisorsAreOnes)
6938 return SDValue();
6939
6940 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6941 // since it can be best implemented as a bit test.
6942 if (AllDivisorsArePowerOfTwo)
6943 return SDValue();
6944
6945 SDValue PVal, AVal, KVal, QVal;
6946 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6947 if (HadOneDivisor) {
6948 // Try to turn PAmts into a splat, since we don't care about the values
6949 // that are currently '0'. If we can't, just keep '0'`s.
6950 turnVectorIntoSplatVector(Values: PAmts, Predicate: isNullConstant);
6951 // Try to turn AAmts into a splat, since we don't care about the
6952 // values that are currently '-1'. If we can't, change them to '0'`s.
6953 turnVectorIntoSplatVector(Values: AAmts, Predicate: isAllOnesConstant,
6954 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: SVT));
6955 // Try to turn KAmts into a splat, since we don't care about the values
6956 // that are currently '-1'. If we can't, change them to '0'`s.
6957 turnVectorIntoSplatVector(Values: KAmts, Predicate: isAllOnesConstant,
6958 AlternativeReplacement: DAG.getConstant(Val: 0, DL, VT: ShSVT));
6959 }
6960
6961 PVal = DAG.getBuildVector(VT, DL, Ops: PAmts);
6962 AVal = DAG.getBuildVector(VT, DL, Ops: AAmts);
6963 KVal = DAG.getBuildVector(VT: ShVT, DL, Ops: KAmts);
6964 QVal = DAG.getBuildVector(VT, DL, Ops: QAmts);
6965 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6966 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6967 QAmts.size() == 1 &&
6968 "Expected matchUnaryPredicate to return one element for scalable "
6969 "vectors");
6970 PVal = DAG.getSplatVector(VT, DL, Op: PAmts[0]);
6971 AVal = DAG.getSplatVector(VT, DL, Op: AAmts[0]);
6972 KVal = DAG.getSplatVector(VT: ShVT, DL, Op: KAmts[0]);
6973 QVal = DAG.getSplatVector(VT, DL, Op: QAmts[0]);
6974 } else {
6975 assert(isa<ConstantSDNode>(D) && "Expected a constant");
6976 PVal = PAmts[0];
6977 AVal = AAmts[0];
6978 KVal = KAmts[0];
6979 QVal = QAmts[0];
6980 }
6981
6982 // (mul N, P)
6983 SDValue Op0 = DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: N, N2: PVal);
6984 Created.push_back(Elt: Op0.getNode());
6985
6986 if (NeedToApplyOffset) {
6987 // We need ADD to do this.
6988 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ADD, VT))
6989 return SDValue();
6990
6991 // (add (mul N, P), A)
6992 Op0 = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0, N2: AVal);
6993 Created.push_back(Elt: Op0.getNode());
6994 }
6995
6996 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6997 // divisors as a performance improvement, since rotating by 0 is a no-op.
6998 if (HadEvenDivisor) {
6999 // We need ROTR to do this.
7000 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(Op: ISD::ROTR, VT))
7001 return SDValue();
7002 // SREM: (rotr (add (mul N, P), A), K)
7003 Op0 = DAG.getNode(Opcode: ISD::ROTR, DL, VT, N1: Op0, N2: KVal);
7004 Created.push_back(Elt: Op0.getNode());
7005 }
7006
7007 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7008 SDValue Fold =
7009 DAG.getSetCC(DL, VT: SETCCVT, LHS: Op0, RHS: QVal,
7010 Cond: ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7011
7012 // If we didn't have lanes with INT_MIN divisor, then we're done.
7013 if (!HadIntMinDivisor)
7014 return Fold;
7015
7016 // That fold is only valid for positive divisors. Which effectively means,
7017 // it is invalid for INT_MIN divisors. So if we have such a lane,
7018 // we must fix-up results for said lanes.
7019 assert(VT.isVector() && "Can/should only get here for vectors.");
7020
7021 // NOTE: we avoid letting illegal types through even if we're before legalize
7022 // ops – legalization has a hard time producing good code for the code that
7023 // follows.
7024 if (!isOperationLegalOrCustom(Op: ISD::SETCC, VT: SETCCVT) ||
7025 !isOperationLegalOrCustom(Op: ISD::AND, VT) ||
7026 !isCondCodeLegalOrCustom(CC: Cond, VT: VT.getSimpleVT()) ||
7027 !isOperationLegalOrCustom(Op: ISD::VSELECT, VT: SETCCVT))
7028 return SDValue();
7029
7030 Created.push_back(Elt: Fold.getNode());
7031
7032 SDValue IntMin = DAG.getConstant(
7033 Val: APInt::getSignedMinValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7034 SDValue IntMax = DAG.getConstant(
7035 Val: APInt::getSignedMaxValue(numBits: SVT.getScalarSizeInBits()), DL, VT);
7036 SDValue Zero =
7037 DAG.getConstant(Val: APInt::getZero(numBits: SVT.getScalarSizeInBits()), DL, VT);
7038
7039 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7040 SDValue DivisorIsIntMin = DAG.getSetCC(DL, VT: SETCCVT, LHS: D, RHS: IntMin, Cond: ISD::SETEQ);
7041 Created.push_back(Elt: DivisorIsIntMin.getNode());
7042
7043 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7044 SDValue Masked = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: N, N2: IntMax);
7045 Created.push_back(Elt: Masked.getNode());
7046 SDValue MaskedIsZero = DAG.getSetCC(DL, VT: SETCCVT, LHS: Masked, RHS: Zero, Cond);
7047 Created.push_back(Elt: MaskedIsZero.getNode());
7048
7049 // To produce final result we need to blend 2 vectors: 'SetCC' and
7050 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7051 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7052 // constant-folded, select can get lowered to a shuffle with constant mask.
7053 SDValue Blended = DAG.getNode(Opcode: ISD::VSELECT, DL, VT: SETCCVT, N1: DivisorIsIntMin,
7054 N2: MaskedIsZero, N3: Fold);
7055
7056 return Blended;
7057}
7058
7059bool TargetLowering::
7060verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7061 if (!isa<ConstantSDNode>(Val: Op.getOperand(i: 0))) {
7062 DAG.getContext()->emitError(ErrorStr: "argument to '__builtin_return_address' must "
7063 "be a constant integer");
7064 return true;
7065 }
7066
7067 return false;
7068}
7069
7070SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7071 const DenormalMode &Mode) const {
7072 SDLoc DL(Op);
7073 EVT VT = Op.getValueType();
7074 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7075 SDValue FPZero = DAG.getConstantFP(Val: 0.0, DL, VT);
7076
7077 // This is specifically a check for the handling of denormal inputs, not the
7078 // result.
7079 if (Mode.Input == DenormalMode::PreserveSign ||
7080 Mode.Input == DenormalMode::PositiveZero) {
7081 // Test = X == 0.0
7082 return DAG.getSetCC(DL, VT: CCVT, LHS: Op, RHS: FPZero, Cond: ISD::SETEQ);
7083 }
7084
7085 // Testing it with denormal inputs to avoid wrong estimate.
7086 //
7087 // Test = fabs(X) < SmallestNormal
7088 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7089 APFloat SmallestNorm = APFloat::getSmallestNormalized(Sem: FltSem);
7090 SDValue NormC = DAG.getConstantFP(Val: SmallestNorm, DL, VT);
7091 SDValue Fabs = DAG.getNode(Opcode: ISD::FABS, DL, VT, Operand: Op);
7092 return DAG.getSetCC(DL, VT: CCVT, LHS: Fabs, RHS: NormC, Cond: ISD::SETLT);
7093}
7094
7095SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7096 bool LegalOps, bool OptForSize,
7097 NegatibleCost &Cost,
7098 unsigned Depth) const {
7099 // fneg is removable even if it has multiple uses.
7100 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7101 Cost = NegatibleCost::Cheaper;
7102 return Op.getOperand(i: 0);
7103 }
7104
7105 // Don't recurse exponentially.
7106 if (Depth > SelectionDAG::MaxRecursionDepth)
7107 return SDValue();
7108
7109 // Pre-increment recursion depth for use in recursive calls.
7110 ++Depth;
7111 const SDNodeFlags Flags = Op->getFlags();
7112 const TargetOptions &Options = DAG.getTarget().Options;
7113 EVT VT = Op.getValueType();
7114 unsigned Opcode = Op.getOpcode();
7115
7116 // Don't allow anything with multiple uses unless we know it is free.
7117 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7118 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7119 isFPExtFree(DestVT: VT, SrcVT: Op.getOperand(i: 0).getValueType());
7120 if (!IsFreeExtend)
7121 return SDValue();
7122 }
7123
7124 auto RemoveDeadNode = [&](SDValue N) {
7125 if (N && N.getNode()->use_empty())
7126 DAG.RemoveDeadNode(N: N.getNode());
7127 };
7128
7129 SDLoc DL(Op);
7130
7131 // Because getNegatedExpression can delete nodes we need a handle to keep
7132 // temporary nodes alive in case the recursion manages to create an identical
7133 // node.
7134 std::list<HandleSDNode> Handles;
7135
7136 switch (Opcode) {
7137 case ISD::ConstantFP: {
7138 // Don't invert constant FP values after legalization unless the target says
7139 // the negated constant is legal.
7140 bool IsOpLegal =
7141 isOperationLegal(Op: ISD::ConstantFP, VT) ||
7142 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: Op)->getValueAPF()), VT,
7143 ForCodeSize: OptForSize);
7144
7145 if (LegalOps && !IsOpLegal)
7146 break;
7147
7148 APFloat V = cast<ConstantFPSDNode>(Val&: Op)->getValueAPF();
7149 V.changeSign();
7150 SDValue CFP = DAG.getConstantFP(Val: V, DL, VT);
7151
7152 // If we already have the use of the negated floating constant, it is free
7153 // to negate it even it has multiple uses.
7154 if (!Op.hasOneUse() && CFP.use_empty())
7155 break;
7156 Cost = NegatibleCost::Neutral;
7157 return CFP;
7158 }
7159 case ISD::BUILD_VECTOR: {
7160 // Only permit BUILD_VECTOR of constants.
7161 if (llvm::any_of(Range: Op->op_values(), P: [&](SDValue N) {
7162 return !N.isUndef() && !isa<ConstantFPSDNode>(Val: N);
7163 }))
7164 break;
7165
7166 bool IsOpLegal =
7167 (isOperationLegal(Op: ISD::ConstantFP, VT) &&
7168 isOperationLegal(Op: ISD::BUILD_VECTOR, VT)) ||
7169 llvm::all_of(Range: Op->op_values(), P: [&](SDValue N) {
7170 return N.isUndef() ||
7171 isFPImmLegal(neg(X: cast<ConstantFPSDNode>(Val&: N)->getValueAPF()), VT,
7172 ForCodeSize: OptForSize);
7173 });
7174
7175 if (LegalOps && !IsOpLegal)
7176 break;
7177
7178 SmallVector<SDValue, 4> Ops;
7179 for (SDValue C : Op->op_values()) {
7180 if (C.isUndef()) {
7181 Ops.push_back(Elt: C);
7182 continue;
7183 }
7184 APFloat V = cast<ConstantFPSDNode>(Val&: C)->getValueAPF();
7185 V.changeSign();
7186 Ops.push_back(Elt: DAG.getConstantFP(Val: V, DL, VT: C.getValueType()));
7187 }
7188 Cost = NegatibleCost::Neutral;
7189 return DAG.getBuildVector(VT, DL, Ops);
7190 }
7191 case ISD::FADD: {
7192 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7193 break;
7194
7195 // After operation legalization, it might not be legal to create new FSUBs.
7196 if (LegalOps && !isOperationLegalOrCustom(Op: ISD::FSUB, VT))
7197 break;
7198 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7199
7200 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7201 NegatibleCost CostX = NegatibleCost::Expensive;
7202 SDValue NegX =
7203 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7204 // Prevent this node from being deleted by the next call.
7205 if (NegX)
7206 Handles.emplace_back(args&: NegX);
7207
7208 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7209 NegatibleCost CostY = NegatibleCost::Expensive;
7210 SDValue NegY =
7211 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7212
7213 // We're done with the handles.
7214 Handles.clear();
7215
7216 // Negate the X if its cost is less or equal than Y.
7217 if (NegX && (CostX <= CostY)) {
7218 Cost = CostX;
7219 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegX, N2: Y, Flags);
7220 if (NegY != N)
7221 RemoveDeadNode(NegY);
7222 return N;
7223 }
7224
7225 // Negate the Y if it is not expensive.
7226 if (NegY) {
7227 Cost = CostY;
7228 SDValue N = DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: NegY, N2: X, Flags);
7229 if (NegX != N)
7230 RemoveDeadNode(NegX);
7231 return N;
7232 }
7233 break;
7234 }
7235 case ISD::FSUB: {
7236 // We can't turn -(A-B) into B-A when we honor signed zeros.
7237 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7238 break;
7239
7240 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7241 // fold (fneg (fsub 0, Y)) -> Y
7242 if (ConstantFPSDNode *C = isConstOrConstSplatFP(N: X, /*AllowUndefs*/ true))
7243 if (C->isZero()) {
7244 Cost = NegatibleCost::Cheaper;
7245 return Y;
7246 }
7247
7248 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7249 Cost = NegatibleCost::Neutral;
7250 return DAG.getNode(Opcode: ISD::FSUB, DL, VT, N1: Y, N2: X, Flags);
7251 }
7252 case ISD::FMUL:
7253 case ISD::FDIV: {
7254 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1);
7255
7256 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7257 NegatibleCost CostX = NegatibleCost::Expensive;
7258 SDValue NegX =
7259 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7260 // Prevent this node from being deleted by the next call.
7261 if (NegX)
7262 Handles.emplace_back(args&: NegX);
7263
7264 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7265 NegatibleCost CostY = NegatibleCost::Expensive;
7266 SDValue NegY =
7267 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7268
7269 // We're done with the handles.
7270 Handles.clear();
7271
7272 // Negate the X if its cost is less or equal than Y.
7273 if (NegX && (CostX <= CostY)) {
7274 Cost = CostX;
7275 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, Flags);
7276 if (NegY != N)
7277 RemoveDeadNode(NegY);
7278 return N;
7279 }
7280
7281 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7282 if (auto *C = isConstOrConstSplatFP(N: Op.getOperand(i: 1)))
7283 if (C->isExactlyValue(V: 2.0) && Op.getOpcode() == ISD::FMUL)
7284 break;
7285
7286 // Negate the Y if it is not expensive.
7287 if (NegY) {
7288 Cost = CostY;
7289 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, Flags);
7290 if (NegX != N)
7291 RemoveDeadNode(NegX);
7292 return N;
7293 }
7294 break;
7295 }
7296 case ISD::FMA:
7297 case ISD::FMAD: {
7298 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7299 break;
7300
7301 SDValue X = Op.getOperand(i: 0), Y = Op.getOperand(i: 1), Z = Op.getOperand(i: 2);
7302 NegatibleCost CostZ = NegatibleCost::Expensive;
7303 SDValue NegZ =
7304 getNegatedExpression(Op: Z, DAG, LegalOps, OptForSize, Cost&: CostZ, Depth);
7305 // Give up if fail to negate the Z.
7306 if (!NegZ)
7307 break;
7308
7309 // Prevent this node from being deleted by the next two calls.
7310 Handles.emplace_back(args&: NegZ);
7311
7312 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7313 NegatibleCost CostX = NegatibleCost::Expensive;
7314 SDValue NegX =
7315 getNegatedExpression(Op: X, DAG, LegalOps, OptForSize, Cost&: CostX, Depth);
7316 // Prevent this node from being deleted by the next call.
7317 if (NegX)
7318 Handles.emplace_back(args&: NegX);
7319
7320 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7321 NegatibleCost CostY = NegatibleCost::Expensive;
7322 SDValue NegY =
7323 getNegatedExpression(Op: Y, DAG, LegalOps, OptForSize, Cost&: CostY, Depth);
7324
7325 // We're done with the handles.
7326 Handles.clear();
7327
7328 // Negate the X if its cost is less or equal than Y.
7329 if (NegX && (CostX <= CostY)) {
7330 Cost = std::min(a: CostX, b: CostZ);
7331 SDValue N = DAG.getNode(Opcode, DL, VT, N1: NegX, N2: Y, N3: NegZ, Flags);
7332 if (NegY != N)
7333 RemoveDeadNode(NegY);
7334 return N;
7335 }
7336
7337 // Negate the Y if it is not expensive.
7338 if (NegY) {
7339 Cost = std::min(a: CostY, b: CostZ);
7340 SDValue N = DAG.getNode(Opcode, DL, VT, N1: X, N2: NegY, N3: NegZ, Flags);
7341 if (NegX != N)
7342 RemoveDeadNode(NegX);
7343 return N;
7344 }
7345 break;
7346 }
7347
7348 case ISD::FP_EXTEND:
7349 case ISD::FSIN:
7350 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7351 OptForSize, Cost, Depth))
7352 return DAG.getNode(Opcode, DL, VT, Operand: NegV);
7353 break;
7354 case ISD::FP_ROUND:
7355 if (SDValue NegV = getNegatedExpression(Op: Op.getOperand(i: 0), DAG, LegalOps,
7356 OptForSize, Cost, Depth))
7357 return DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT, N1: NegV, N2: Op.getOperand(i: 1));
7358 break;
7359 case ISD::SELECT:
7360 case ISD::VSELECT: {
7361 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7362 // iff at least one cost is cheaper and the other is neutral/cheaper
7363 SDValue LHS = Op.getOperand(i: 1);
7364 NegatibleCost CostLHS = NegatibleCost::Expensive;
7365 SDValue NegLHS =
7366 getNegatedExpression(Op: LHS, DAG, LegalOps, OptForSize, Cost&: CostLHS, Depth);
7367 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7368 RemoveDeadNode(NegLHS);
7369 break;
7370 }
7371
7372 // Prevent this node from being deleted by the next call.
7373 Handles.emplace_back(args&: NegLHS);
7374
7375 SDValue RHS = Op.getOperand(i: 2);
7376 NegatibleCost CostRHS = NegatibleCost::Expensive;
7377 SDValue NegRHS =
7378 getNegatedExpression(Op: RHS, DAG, LegalOps, OptForSize, Cost&: CostRHS, Depth);
7379
7380 // We're done with the handles.
7381 Handles.clear();
7382
7383 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7384 (CostLHS != NegatibleCost::Cheaper &&
7385 CostRHS != NegatibleCost::Cheaper)) {
7386 RemoveDeadNode(NegLHS);
7387 RemoveDeadNode(NegRHS);
7388 break;
7389 }
7390
7391 Cost = std::min(a: CostLHS, b: CostRHS);
7392 return DAG.getSelect(DL, VT, Cond: Op.getOperand(i: 0), LHS: NegLHS, RHS: NegRHS);
7393 }
7394 }
7395
7396 return SDValue();
7397}
7398
7399//===----------------------------------------------------------------------===//
7400// Legalization Utilities
7401//===----------------------------------------------------------------------===//
7402
7403bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7404 SDValue LHS, SDValue RHS,
7405 SmallVectorImpl<SDValue> &Result,
7406 EVT HiLoVT, SelectionDAG &DAG,
7407 MulExpansionKind Kind, SDValue LL,
7408 SDValue LH, SDValue RL, SDValue RH) const {
7409 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7410 Opcode == ISD::SMUL_LOHI);
7411
7412 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7413 isOperationLegalOrCustom(Op: ISD::MULHS, VT: HiLoVT);
7414 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7415 isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT);
7416 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7417 isOperationLegalOrCustom(Op: ISD::SMUL_LOHI, VT: HiLoVT);
7418 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7419 isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT);
7420
7421 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7422 return false;
7423
7424 unsigned OuterBitSize = VT.getScalarSizeInBits();
7425 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7426
7427 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7428 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7429 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7430
7431 SDVTList VTs = DAG.getVTList(VT1: HiLoVT, VT2: HiLoVT);
7432 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7433 bool Signed) -> bool {
7434 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7435 Lo = DAG.getNode(Opcode: Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, DL: dl, VTList: VTs, N1: L, N2: R);
7436 Hi = SDValue(Lo.getNode(), 1);
7437 return true;
7438 }
7439 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7440 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: L, N2: R);
7441 Hi = DAG.getNode(Opcode: Signed ? ISD::MULHS : ISD::MULHU, DL: dl, VT: HiLoVT, N1: L, N2: R);
7442 return true;
7443 }
7444 return false;
7445 };
7446
7447 SDValue Lo, Hi;
7448
7449 if (!LL.getNode() && !RL.getNode() &&
7450 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7451 LL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LHS);
7452 RL = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RHS);
7453 }
7454
7455 if (!LL.getNode())
7456 return false;
7457
7458 APInt HighMask = APInt::getHighBitsSet(numBits: OuterBitSize, hiBitsSet: InnerBitSize);
7459 if (DAG.MaskedValueIsZero(Op: LHS, Mask: HighMask) &&
7460 DAG.MaskedValueIsZero(Op: RHS, Mask: HighMask)) {
7461 // The inputs are both zero-extended.
7462 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7463 Result.push_back(Elt: Lo);
7464 Result.push_back(Elt: Hi);
7465 if (Opcode != ISD::MUL) {
7466 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7467 Result.push_back(Elt: Zero);
7468 Result.push_back(Elt: Zero);
7469 }
7470 return true;
7471 }
7472 }
7473
7474 if (!VT.isVector() && Opcode == ISD::MUL &&
7475 DAG.ComputeMaxSignificantBits(Op: LHS) <= InnerBitSize &&
7476 DAG.ComputeMaxSignificantBits(Op: RHS) <= InnerBitSize) {
7477 // The input values are both sign-extended.
7478 // TODO non-MUL case?
7479 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7480 Result.push_back(Elt: Lo);
7481 Result.push_back(Elt: Hi);
7482 return true;
7483 }
7484 }
7485
7486 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7487 SDValue Shift = DAG.getShiftAmountConstant(Val: ShiftAmount, VT, DL: dl);
7488
7489 if (!LH.getNode() && !RH.getNode() &&
7490 isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
7491 isOperationLegalOrCustom(Op: ISD::TRUNCATE, VT: HiLoVT)) {
7492 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LHS, N2: Shift);
7493 LH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: LH);
7494 RH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RHS, N2: Shift);
7495 RH = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: RH);
7496 }
7497
7498 if (!LH.getNode())
7499 return false;
7500
7501 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7502 return false;
7503
7504 Result.push_back(Elt: Lo);
7505
7506 if (Opcode == ISD::MUL) {
7507 RH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LL, N2: RH);
7508 LH = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: HiLoVT, N1: LH, N2: RL);
7509 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: RH);
7510 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Hi, N2: LH);
7511 Result.push_back(Elt: Hi);
7512 return true;
7513 }
7514
7515 // Compute the full width result.
7516 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7517 Lo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Lo);
7518 Hi = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7519 Hi = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: Shift);
7520 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Lo, N2: Hi);
7521 };
7522
7523 SDValue Next = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Hi);
7524 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7525 return false;
7526
7527 // This is effectively the add part of a multiply-add of half-sized operands,
7528 // so it cannot overflow.
7529 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7530
7531 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7532 return false;
7533
7534 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7535 EVT BoolType = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
7536
7537 bool UseGlue = (isOperationLegalOrCustom(Op: ISD::ADDC, VT) &&
7538 isOperationLegalOrCustom(Op: ISD::ADDE, VT));
7539 if (UseGlue)
7540 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7541 Merge(Lo, Hi));
7542 else
7543 Next = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolType), N1: Next,
7544 N2: Merge(Lo, Hi), N3: DAG.getConstant(Val: 0, DL: dl, VT: BoolType));
7545
7546 SDValue Carry = Next.getValue(R: 1);
7547 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7548 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7549
7550 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7551 return false;
7552
7553 if (UseGlue)
7554 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7555 Carry);
7556 else
7557 Hi = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList: DAG.getVTList(VT1: HiLoVT, VT2: BoolType), N1: Hi,
7558 N2: Zero, N3: Carry);
7559
7560 Next = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Next, N2: Merge(Lo, Hi));
7561
7562 if (Opcode == ISD::SMUL_LOHI) {
7563 SDValue NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7564 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: RL));
7565 Next = DAG.getSelectCC(DL: dl, LHS: LH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7566
7567 NextSub = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Next,
7568 N2: DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: LL));
7569 Next = DAG.getSelectCC(DL: dl, LHS: RH, RHS: Zero, True: NextSub, False: Next, Cond: ISD::SETLT);
7570 }
7571
7572 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7573 Next = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Next, N2: Shift);
7574 Result.push_back(Elt: DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: HiLoVT, Operand: Next));
7575 return true;
7576}
7577
7578bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7579 SelectionDAG &DAG, MulExpansionKind Kind,
7580 SDValue LL, SDValue LH, SDValue RL,
7581 SDValue RH) const {
7582 SmallVector<SDValue, 2> Result;
7583 bool Ok = expandMUL_LOHI(Opcode: N->getOpcode(), VT: N->getValueType(ResNo: 0), dl: SDLoc(N),
7584 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Result, HiLoVT,
7585 DAG, Kind, LL, LH, RL, RH);
7586 if (Ok) {
7587 assert(Result.size() == 2);
7588 Lo = Result[0];
7589 Hi = Result[1];
7590 }
7591 return Ok;
7592}
7593
7594// Optimize unsigned division or remainder by constants for types twice as large
7595// as a legal VT.
7596//
7597// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7598// can be computed
7599// as:
7600// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7601// Remainder = Sum % Constant
7602// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7603//
7604// For division, we can compute the remainder using the algorithm described
7605// above, subtract it from the dividend to get an exact multiple of Constant.
7606// Then multiply that extact multiply by the multiplicative inverse modulo
7607// (1 << (BitWidth / 2)) to get the quotient.
7608
7609// If Constant is even, we can shift right the dividend and the divisor by the
7610// number of trailing zeros in Constant before applying the remainder algorithm.
7611// If we're after the quotient, we can subtract this value from the shifted
7612// dividend and multiply by the multiplicative inverse of the shifted divisor.
7613// If we want the remainder, we shift the value left by the number of trailing
7614// zeros and add the bits that were shifted out of the dividend.
7615bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7616 SmallVectorImpl<SDValue> &Result,
7617 EVT HiLoVT, SelectionDAG &DAG,
7618 SDValue LL, SDValue LH) const {
7619 unsigned Opcode = N->getOpcode();
7620 EVT VT = N->getValueType(ResNo: 0);
7621
7622 // TODO: Support signed division/remainder.
7623 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7624 return false;
7625 assert(
7626 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7627 "Unexpected opcode");
7628
7629 auto *CN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
7630 if (!CN)
7631 return false;
7632
7633 APInt Divisor = CN->getAPIntValue();
7634 unsigned BitWidth = Divisor.getBitWidth();
7635 unsigned HBitWidth = BitWidth / 2;
7636 assert(VT.getScalarSizeInBits() == BitWidth &&
7637 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7638
7639 // Divisor needs to less than (1 << HBitWidth).
7640 APInt HalfMaxPlus1 = APInt::getOneBitSet(numBits: BitWidth, BitNo: HBitWidth);
7641 if (Divisor.uge(RHS: HalfMaxPlus1))
7642 return false;
7643
7644 // We depend on the UREM by constant optimization in DAGCombiner that requires
7645 // high multiply.
7646 if (!isOperationLegalOrCustom(Op: ISD::MULHU, VT: HiLoVT) &&
7647 !isOperationLegalOrCustom(Op: ISD::UMUL_LOHI, VT: HiLoVT))
7648 return false;
7649
7650 // Don't expand if optimizing for size.
7651 if (DAG.shouldOptForSize())
7652 return false;
7653
7654 // Early out for 0 or 1 divisors.
7655 if (Divisor.ule(RHS: 1))
7656 return false;
7657
7658 // If the divisor is even, shift it until it becomes odd.
7659 unsigned TrailingZeros = 0;
7660 if (!Divisor[0]) {
7661 TrailingZeros = Divisor.countr_zero();
7662 Divisor.lshrInPlace(ShiftAmt: TrailingZeros);
7663 }
7664
7665 SDLoc dl(N);
7666 SDValue Sum;
7667 SDValue PartialRem;
7668
7669 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7670 // then add in the carry.
7671 // TODO: If we can't split it in half, we might be able to split into 3 or
7672 // more pieces using a smaller bit width.
7673 if (HalfMaxPlus1.urem(RHS: Divisor).isOne()) {
7674 assert(!LL == !LH && "Expected both input halves or no input halves!");
7675 if (!LL)
7676 std::tie(args&: LL, args&: LH) = DAG.SplitScalar(N: N->getOperand(Num: 0), DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7677
7678 // Shift the input by the number of TrailingZeros in the divisor. The
7679 // shifted out bits will be added to the remainder later.
7680 if (TrailingZeros) {
7681 // Save the shifted off bits if we need the remainder.
7682 if (Opcode != ISD::UDIV) {
7683 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7684 PartialRem = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: HiLoVT, N1: LL,
7685 N2: DAG.getConstant(Val: Mask, DL: dl, VT: HiLoVT));
7686 }
7687
7688 LL = DAG.getNode(
7689 Opcode: ISD::OR, DL: dl, VT: HiLoVT,
7690 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LL,
7691 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl)),
7692 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: LH,
7693 N2: DAG.getShiftAmountConstant(Val: HBitWidth - TrailingZeros,
7694 VT: HiLoVT, DL: dl)));
7695 LH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: HiLoVT, N1: LH,
7696 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7697 }
7698
7699 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7700 EVT SetCCType =
7701 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: HiLoVT);
7702 if (isOperationLegalOrCustom(Op: ISD::UADDO_CARRY, VT: HiLoVT)) {
7703 SDVTList VTList = DAG.getVTList(VT1: HiLoVT, VT2: SetCCType);
7704 Sum = DAG.getNode(Opcode: ISD::UADDO, DL: dl, VTList, N1: LL, N2: LH);
7705 Sum = DAG.getNode(Opcode: ISD::UADDO_CARRY, DL: dl, VTList, N1: Sum,
7706 N2: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT), N3: Sum.getValue(R: 1));
7707 } else {
7708 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: LL, N2: LH);
7709 SDValue Carry = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Sum, RHS: LL, Cond: ISD::SETULT);
7710 // If the boolean for the target is 0 or 1, we can add the setcc result
7711 // directly.
7712 if (getBooleanContents(Type: HiLoVT) ==
7713 TargetLoweringBase::ZeroOrOneBooleanContent)
7714 Carry = DAG.getZExtOrTrunc(Op: Carry, DL: dl, VT: HiLoVT);
7715 else
7716 Carry = DAG.getSelect(DL: dl, VT: HiLoVT, Cond: Carry, LHS: DAG.getConstant(Val: 1, DL: dl, VT: HiLoVT),
7717 RHS: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
7718 Sum = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: Sum, N2: Carry);
7719 }
7720 }
7721
7722 // If we didn't find a sum, we can't do the expansion.
7723 if (!Sum)
7724 return false;
7725
7726 // Perform a HiLoVT urem on the Sum using truncated divisor.
7727 SDValue RemL =
7728 DAG.getNode(Opcode: ISD::UREM, DL: dl, VT: HiLoVT, N1: Sum,
7729 N2: DAG.getConstant(Val: Divisor.trunc(width: HBitWidth), DL: dl, VT: HiLoVT));
7730 SDValue RemH = DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT);
7731
7732 if (Opcode != ISD::UREM) {
7733 // Subtract the remainder from the shifted dividend.
7734 SDValue Dividend = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: LL, N2: LH);
7735 SDValue Rem = DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT, N1: RemL, N2: RemH);
7736
7737 Dividend = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Rem);
7738
7739 // Multiply by the multiplicative inverse of the divisor modulo
7740 // (1 << BitWidth).
7741 APInt Mod = APInt::getSignedMinValue(numBits: BitWidth + 1);
7742 APInt MulFactor = Divisor.zext(width: BitWidth + 1);
7743 MulFactor = MulFactor.multiplicativeInverse(modulo: Mod);
7744 MulFactor = MulFactor.trunc(width: BitWidth);
7745
7746 SDValue Quotient = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Dividend,
7747 N2: DAG.getConstant(Val: MulFactor, DL: dl, VT));
7748
7749 // Split the quotient into low and high parts.
7750 SDValue QuotL, QuotH;
7751 std::tie(args&: QuotL, args&: QuotH) = DAG.SplitScalar(N: Quotient, DL: dl, LoVT: HiLoVT, HiVT: HiLoVT);
7752 Result.push_back(Elt: QuotL);
7753 Result.push_back(Elt: QuotH);
7754 }
7755
7756 if (Opcode != ISD::UDIV) {
7757 // If we shifted the input, shift the remainder left and add the bits we
7758 // shifted off the input.
7759 if (TrailingZeros) {
7760 APInt Mask = APInt::getLowBitsSet(numBits: HBitWidth, loBitsSet: TrailingZeros);
7761 RemL = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: HiLoVT, N1: RemL,
7762 N2: DAG.getShiftAmountConstant(Val: TrailingZeros, VT: HiLoVT, DL: dl));
7763 RemL = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: HiLoVT, N1: RemL, N2: PartialRem);
7764 }
7765 Result.push_back(Elt: RemL);
7766 Result.push_back(Elt: DAG.getConstant(Val: 0, DL: dl, VT: HiLoVT));
7767 }
7768
7769 return true;
7770}
7771
7772// Check that (every element of) Z is undef or not an exact multiple of BW.
7773static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7774 return ISD::matchUnaryPredicate(
7775 Op: Z,
7776 Match: [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(RHS: BW) != 0; },
7777 AllowUndefs: true);
7778}
7779
7780static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7781 EVT VT = Node->getValueType(ResNo: 0);
7782 SDValue ShX, ShY;
7783 SDValue ShAmt, InvShAmt;
7784 SDValue X = Node->getOperand(Num: 0);
7785 SDValue Y = Node->getOperand(Num: 1);
7786 SDValue Z = Node->getOperand(Num: 2);
7787 SDValue Mask = Node->getOperand(Num: 3);
7788 SDValue VL = Node->getOperand(Num: 4);
7789
7790 unsigned BW = VT.getScalarSizeInBits();
7791 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7792 SDLoc DL(SDValue(Node, 0));
7793
7794 EVT ShVT = Z.getValueType();
7795 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7796 // fshl: X << C | Y >> (BW - C)
7797 // fshr: X << (BW - C) | Y >> C
7798 // where C = Z % BW is not zero
7799 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7800 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7801 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt, N3: Mask, N4: VL);
7802 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt, N3: Mask,
7803 N4: VL);
7804 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt, N3: Mask,
7805 N4: VL);
7806 } else {
7807 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7808 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7809 SDValue BitMask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
7810 if (isPowerOf2_32(Value: BW)) {
7811 // Z % BW -> Z & (BW - 1)
7812 ShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: Z, N2: BitMask, N3: Mask, N4: VL);
7813 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7814 SDValue NotZ = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: ShVT, N1: Z,
7815 N2: DAG.getAllOnesConstant(DL, VT: ShVT), N3: Mask, N4: VL);
7816 InvShAmt = DAG.getNode(Opcode: ISD::VP_AND, DL, VT: ShVT, N1: NotZ, N2: BitMask, N3: Mask, N4: VL);
7817 } else {
7818 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7819 ShAmt = DAG.getNode(Opcode: ISD::VP_UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC, N3: Mask, N4: VL);
7820 InvShAmt = DAG.getNode(Opcode: ISD::VP_SUB, DL, VT: ShVT, N1: BitMask, N2: ShAmt, N3: Mask, N4: VL);
7821 }
7822
7823 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7824 if (IsFSHL) {
7825 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: ShAmt, N3: Mask, N4: VL);
7826 SDValue ShY1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: One, N3: Mask, N4: VL);
7827 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: ShY1, N2: InvShAmt, N3: Mask, N4: VL);
7828 } else {
7829 SDValue ShX1 = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: X, N2: One, N3: Mask, N4: VL);
7830 ShX = DAG.getNode(Opcode: ISD::VP_SHL, DL, VT, N1: ShX1, N2: InvShAmt, N3: Mask, N4: VL);
7831 ShY = DAG.getNode(Opcode: ISD::VP_LSHR, DL, VT, N1: Y, N2: ShAmt, N3: Mask, N4: VL);
7832 }
7833 }
7834 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: ShX, N2: ShY, N3: Mask, N4: VL);
7835}
7836
7837SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7838 SelectionDAG &DAG) const {
7839 if (Node->isVPOpcode())
7840 return expandVPFunnelShift(Node, DAG);
7841
7842 EVT VT = Node->getValueType(ResNo: 0);
7843
7844 if (VT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
7845 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
7846 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
7847 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
7848 return SDValue();
7849
7850 SDValue X = Node->getOperand(Num: 0);
7851 SDValue Y = Node->getOperand(Num: 1);
7852 SDValue Z = Node->getOperand(Num: 2);
7853
7854 unsigned BW = VT.getScalarSizeInBits();
7855 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7856 SDLoc DL(SDValue(Node, 0));
7857
7858 EVT ShVT = Z.getValueType();
7859
7860 // If a funnel shift in the other direction is more supported, use it.
7861 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7862 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7863 isOperationLegalOrCustom(Op: RevOpcode, VT) && isPowerOf2_32(Value: BW)) {
7864 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7865 // fshl X, Y, Z -> fshr X, Y, -Z
7866 // fshr X, Y, Z -> fshl X, Y, -Z
7867 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
7868 Z = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Zero, N2: Z);
7869 } else {
7870 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7871 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7872 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7873 if (IsFSHL) {
7874 Y = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7875 X = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: X, N2: One);
7876 } else {
7877 X = DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: One);
7878 Y = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Y, N2: One);
7879 }
7880 Z = DAG.getNOT(DL, Val: Z, VT: ShVT);
7881 }
7882 return DAG.getNode(Opcode: RevOpcode, DL, VT, N1: X, N2: Y, N3: Z);
7883 }
7884
7885 SDValue ShX, ShY;
7886 SDValue ShAmt, InvShAmt;
7887 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7888 // fshl: X << C | Y >> (BW - C)
7889 // fshr: X << (BW - C) | Y >> C
7890 // where C = Z % BW is not zero
7891 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7892 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7893 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthC, N2: ShAmt);
7894 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: IsFSHL ? ShAmt : InvShAmt);
7895 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: IsFSHL ? InvShAmt : ShAmt);
7896 } else {
7897 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7898 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7899 SDValue Mask = DAG.getConstant(Val: BW - 1, DL, VT: ShVT);
7900 if (isPowerOf2_32(Value: BW)) {
7901 // Z % BW -> Z & (BW - 1)
7902 ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Z, N2: Mask);
7903 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7904 InvShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: DAG.getNOT(DL, Val: Z, VT: ShVT), N2: Mask);
7905 } else {
7906 SDValue BitWidthC = DAG.getConstant(Val: BW, DL, VT: ShVT);
7907 ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Z, N2: BitWidthC);
7908 InvShAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Mask, N2: ShAmt);
7909 }
7910
7911 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7912 if (IsFSHL) {
7913 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: ShAmt);
7914 SDValue ShY1 = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: One);
7915 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: ShY1, N2: InvShAmt);
7916 } else {
7917 SDValue ShX1 = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: X, N2: One);
7918 ShX = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: ShX1, N2: InvShAmt);
7919 ShY = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Y, N2: ShAmt);
7920 }
7921 }
7922 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShX, N2: ShY);
7923}
7924
7925// TODO: Merge with expandFunnelShift.
7926SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7927 SelectionDAG &DAG) const {
7928 EVT VT = Node->getValueType(ResNo: 0);
7929 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7930 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7931 SDValue Op0 = Node->getOperand(Num: 0);
7932 SDValue Op1 = Node->getOperand(Num: 1);
7933 SDLoc DL(SDValue(Node, 0));
7934
7935 EVT ShVT = Op1.getValueType();
7936 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: ShVT);
7937
7938 // If a rotate in the other direction is more supported, use it.
7939 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7940 if (!isOperationLegalOrCustom(Op: Node->getOpcode(), VT) &&
7941 isOperationLegalOrCustom(Op: RevRot, VT) && isPowerOf2_32(Value: EltSizeInBits)) {
7942 SDValue Sub = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7943 return DAG.getNode(Opcode: RevRot, DL, VT, N1: Op0, N2: Sub);
7944 }
7945
7946 if (!AllowVectorOps && VT.isVector() &&
7947 (!isOperationLegalOrCustom(Op: ISD::SHL, VT) ||
7948 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
7949 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
7950 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT) ||
7951 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT)))
7952 return SDValue();
7953
7954 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7955 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7956 SDValue BitWidthMinusOneC = DAG.getConstant(Val: EltSizeInBits - 1, DL, VT: ShVT);
7957 SDValue ShVal;
7958 SDValue HsVal;
7959 if (isPowerOf2_32(Value: EltSizeInBits)) {
7960 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7961 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7962 SDValue NegOp1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: Zero, N2: Op1);
7963 SDValue ShAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: Op1, N2: BitWidthMinusOneC);
7964 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
7965 SDValue HsAmt = DAG.getNode(Opcode: ISD::AND, DL, VT: ShVT, N1: NegOp1, N2: BitWidthMinusOneC);
7966 HsVal = DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: HsAmt);
7967 } else {
7968 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7969 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7970 SDValue BitWidthC = DAG.getConstant(Val: EltSizeInBits, DL, VT: ShVT);
7971 SDValue ShAmt = DAG.getNode(Opcode: ISD::UREM, DL, VT: ShVT, N1: Op1, N2: BitWidthC);
7972 ShVal = DAG.getNode(Opcode: ShOpc, DL, VT, N1: Op0, N2: ShAmt);
7973 SDValue HsAmt = DAG.getNode(Opcode: ISD::SUB, DL, VT: ShVT, N1: BitWidthMinusOneC, N2: ShAmt);
7974 SDValue One = DAG.getConstant(Val: 1, DL, VT: ShVT);
7975 HsVal =
7976 DAG.getNode(Opcode: HsOpc, DL, VT, N1: DAG.getNode(Opcode: HsOpc, DL, VT, N1: Op0, N2: One), N2: HsAmt);
7977 }
7978 return DAG.getNode(Opcode: ISD::OR, DL, VT, N1: ShVal, N2: HsVal);
7979}
7980
7981void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7982 SelectionDAG &DAG) const {
7983 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7984 EVT VT = Node->getValueType(ResNo: 0);
7985 unsigned VTBits = VT.getScalarSizeInBits();
7986 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7987
7988 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7989 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7990 SDValue ShOpLo = Node->getOperand(Num: 0);
7991 SDValue ShOpHi = Node->getOperand(Num: 1);
7992 SDValue ShAmt = Node->getOperand(Num: 2);
7993 EVT ShAmtVT = ShAmt.getValueType();
7994 EVT ShAmtCCVT =
7995 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ShAmtVT);
7996 SDLoc dl(Node);
7997
7998 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7999 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8000 // away during isel.
8001 SDValue SafeShAmt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8002 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT));
8003 SDValue Tmp1 = IsSRA ? DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: ShOpHi,
8004 N2: DAG.getConstant(Val: VTBits - 1, DL: dl, VT: ShAmtVT))
8005 : DAG.getConstant(Val: 0, DL: dl, VT);
8006
8007 SDValue Tmp2, Tmp3;
8008 if (IsSHL) {
8009 Tmp2 = DAG.getNode(Opcode: ISD::FSHL, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8010 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: ShOpLo, N2: SafeShAmt);
8011 } else {
8012 Tmp2 = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: ShOpHi, N2: ShOpLo, N3: ShAmt);
8013 Tmp3 = DAG.getNode(Opcode: IsSRA ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: ShOpHi, N2: SafeShAmt);
8014 }
8015
8016 // If the shift amount is larger or equal than the width of a part we don't
8017 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8018 // values for large shift amounts.
8019 SDValue AndNode = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ShAmtVT, N1: ShAmt,
8020 N2: DAG.getConstant(Val: VTBits, DL: dl, VT: ShAmtVT));
8021 SDValue Cond = DAG.getSetCC(DL: dl, VT: ShAmtCCVT, LHS: AndNode,
8022 RHS: DAG.getConstant(Val: 0, DL: dl, VT: ShAmtVT), Cond: ISD::SETNE);
8023
8024 if (IsSHL) {
8025 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8026 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8027 } else {
8028 Lo = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp3, N3: Tmp2);
8029 Hi = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT, N1: Cond, N2: Tmp1, N3: Tmp3);
8030 }
8031}
8032
8033bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8034 SelectionDAG &DAG) const {
8035 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8036 SDValue Src = Node->getOperand(Num: OpNo);
8037 EVT SrcVT = Src.getValueType();
8038 EVT DstVT = Node->getValueType(ResNo: 0);
8039 SDLoc dl(SDValue(Node, 0));
8040
8041 // FIXME: Only f32 to i64 conversions are supported.
8042 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8043 return false;
8044
8045 if (Node->isStrictFPOpcode())
8046 // When a NaN is converted to an integer a trap is allowed. We can't
8047 // use this expansion here because it would eliminate that trap. Other
8048 // traps are also allowed and cannot be eliminated. See
8049 // IEEE 754-2008 sec 5.8.
8050 return false;
8051
8052 // Expand f32 -> i64 conversion
8053 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8054 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8055 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8056 EVT IntVT = SrcVT.changeTypeToInteger();
8057 EVT IntShVT = getShiftAmountTy(LHSTy: IntVT, DL: DAG.getDataLayout());
8058
8059 SDValue ExponentMask = DAG.getConstant(Val: 0x7F800000, DL: dl, VT: IntVT);
8060 SDValue ExponentLoBit = DAG.getConstant(Val: 23, DL: dl, VT: IntVT);
8061 SDValue Bias = DAG.getConstant(Val: 127, DL: dl, VT: IntVT);
8062 SDValue SignMask = DAG.getConstant(Val: APInt::getSignMask(BitWidth: SrcEltBits), DL: dl, VT: IntVT);
8063 SDValue SignLowBit = DAG.getConstant(Val: SrcEltBits - 1, DL: dl, VT: IntVT);
8064 SDValue MantissaMask = DAG.getConstant(Val: 0x007FFFFF, DL: dl, VT: IntVT);
8065
8066 SDValue Bits = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: IntVT, Operand: Src);
8067
8068 SDValue ExponentBits = DAG.getNode(
8069 Opcode: ISD::SRL, DL: dl, VT: IntVT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: ExponentMask),
8070 N2: DAG.getZExtOrTrunc(Op: ExponentLoBit, DL: dl, VT: IntShVT));
8071 SDValue Exponent = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentBits, N2: Bias);
8072
8073 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: IntVT,
8074 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: SignMask),
8075 N2: DAG.getZExtOrTrunc(Op: SignLowBit, DL: dl, VT: IntShVT));
8076 Sign = DAG.getSExtOrTrunc(Op: Sign, DL: dl, VT: DstVT);
8077
8078 SDValue R = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: IntVT,
8079 N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntVT, N1: Bits, N2: MantissaMask),
8080 N2: DAG.getConstant(Val: 0x00800000, DL: dl, VT: IntVT));
8081
8082 R = DAG.getZExtOrTrunc(Op: R, DL: dl, VT: DstVT);
8083
8084 R = DAG.getSelectCC(
8085 DL: dl, LHS: Exponent, RHS: ExponentLoBit,
8086 True: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: DstVT, N1: R,
8087 N2: DAG.getZExtOrTrunc(
8088 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: Exponent, N2: ExponentLoBit),
8089 DL: dl, VT: IntShVT)),
8090 False: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: DstVT, N1: R,
8091 N2: DAG.getZExtOrTrunc(
8092 Op: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: IntVT, N1: ExponentLoBit, N2: Exponent),
8093 DL: dl, VT: IntShVT)),
8094 Cond: ISD::SETGT);
8095
8096 SDValue Ret = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: DstVT,
8097 N1: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: R, N2: Sign), N2: Sign);
8098
8099 Result = DAG.getSelectCC(DL: dl, LHS: Exponent, RHS: DAG.getConstant(Val: 0, DL: dl, VT: IntVT),
8100 True: DAG.getConstant(Val: 0, DL: dl, VT: DstVT), False: Ret, Cond: ISD::SETLT);
8101 return true;
8102}
8103
8104bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8105 SDValue &Chain,
8106 SelectionDAG &DAG) const {
8107 SDLoc dl(SDValue(Node, 0));
8108 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8109 SDValue Src = Node->getOperand(Num: OpNo);
8110
8111 EVT SrcVT = Src.getValueType();
8112 EVT DstVT = Node->getValueType(ResNo: 0);
8113 EVT SetCCVT =
8114 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
8115 EVT DstSetCCVT =
8116 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: DstVT);
8117
8118 // Only expand vector types if we have the appropriate vector bit operations.
8119 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8120 ISD::FP_TO_SINT;
8121 if (DstVT.isVector() && (!isOperationLegalOrCustom(Op: SIntOpcode, VT: DstVT) ||
8122 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT: SrcVT)))
8123 return false;
8124
8125 // If the maximum float value is smaller then the signed integer range,
8126 // the destination signmask can't be represented by the float, so we can
8127 // just use FP_TO_SINT directly.
8128 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(VT: SrcVT);
8129 APFloat APF(APFSem, APInt::getZero(numBits: SrcVT.getScalarSizeInBits()));
8130 APInt SignMask = APInt::getSignMask(BitWidth: DstVT.getScalarSizeInBits());
8131 if (APFloat::opOverflow &
8132 APF.convertFromAPInt(Input: SignMask, IsSigned: false, RM: APFloat::rmNearestTiesToEven)) {
8133 if (Node->isStrictFPOpcode()) {
8134 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8135 { Node->getOperand(0), Src });
8136 Chain = Result.getValue(R: 1);
8137 } else
8138 Result = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8139 return true;
8140 }
8141
8142 // Don't expand it if there isn't cheap fsub instruction.
8143 if (!isOperationLegalOrCustom(
8144 Op: Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, VT: SrcVT))
8145 return false;
8146
8147 SDValue Cst = DAG.getConstantFP(Val: APF, DL: dl, VT: SrcVT);
8148 SDValue Sel;
8149
8150 if (Node->isStrictFPOpcode()) {
8151 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT,
8152 Chain: Node->getOperand(Num: 0), /*IsSignaling*/ true);
8153 Chain = Sel.getValue(R: 1);
8154 } else {
8155 Sel = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Cst, Cond: ISD::SETLT);
8156 }
8157
8158 bool Strict = Node->isStrictFPOpcode() ||
8159 shouldUseStrictFP_TO_INT(FpVT: SrcVT, IntVT: DstVT, /*IsSigned*/ false);
8160
8161 if (Strict) {
8162 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8163 // signmask then offset (the result of which should be fully representable).
8164 // Sel = Src < 0x8000000000000000
8165 // FltOfs = select Sel, 0, 0x8000000000000000
8166 // IntOfs = select Sel, 0, 0x8000000000000000
8167 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8168
8169 // TODO: Should any fast-math-flags be set for the FSUB?
8170 SDValue FltOfs = DAG.getSelect(DL: dl, VT: SrcVT, Cond: Sel,
8171 LHS: DAG.getConstantFP(Val: 0.0, DL: dl, VT: SrcVT), RHS: Cst);
8172 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8173 SDValue IntOfs = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel,
8174 LHS: DAG.getConstant(Val: 0, DL: dl, VT: DstVT),
8175 RHS: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8176 SDValue SInt;
8177 if (Node->isStrictFPOpcode()) {
8178 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8179 { Chain, Src, FltOfs });
8180 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8181 { Val.getValue(1), Val });
8182 Chain = SInt.getValue(R: 1);
8183 } else {
8184 SDValue Val = DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: FltOfs);
8185 SInt = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Val);
8186 }
8187 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: SInt, N2: IntOfs);
8188 } else {
8189 // Expand based on maximum range of FP_TO_SINT:
8190 // True = fp_to_sint(Src)
8191 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8192 // Result = select (Src < 0x8000000000000000), True, False
8193
8194 SDValue True = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT, Operand: Src);
8195 // TODO: Should any fast-math-flags be set for the FSUB?
8196 SDValue False = DAG.getNode(Opcode: ISD::FP_TO_SINT, DL: dl, VT: DstVT,
8197 Operand: DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: SrcVT, N1: Src, N2: Cst));
8198 False = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: DstVT, N1: False,
8199 N2: DAG.getConstant(Val: SignMask, DL: dl, VT: DstVT));
8200 Sel = DAG.getBoolExtOrTrunc(Op: Sel, SL: dl, VT: DstSetCCVT, OpVT: DstVT);
8201 Result = DAG.getSelect(DL: dl, VT: DstVT, Cond: Sel, LHS: True, RHS: False);
8202 }
8203 return true;
8204}
8205
8206bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8207 SDValue &Chain,
8208 SelectionDAG &DAG) const {
8209 // This transform is not correct for converting 0 when rounding mode is set
8210 // to round toward negative infinity which will produce -0.0. So disable under
8211 // strictfp.
8212 if (Node->isStrictFPOpcode())
8213 return false;
8214
8215 SDValue Src = Node->getOperand(Num: 0);
8216 EVT SrcVT = Src.getValueType();
8217 EVT DstVT = Node->getValueType(ResNo: 0);
8218
8219 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8220 return false;
8221
8222 // Only expand vector types if we have the appropriate vector bit operations.
8223 if (SrcVT.isVector() && (!isOperationLegalOrCustom(Op: ISD::SRL, VT: SrcVT) ||
8224 !isOperationLegalOrCustom(Op: ISD::FADD, VT: DstVT) ||
8225 !isOperationLegalOrCustom(Op: ISD::FSUB, VT: DstVT) ||
8226 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: SrcVT) ||
8227 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: SrcVT)))
8228 return false;
8229
8230 SDLoc dl(SDValue(Node, 0));
8231 EVT ShiftVT = getShiftAmountTy(LHSTy: SrcVT, DL: DAG.getDataLayout());
8232
8233 // Implementation of unsigned i64 to f64 following the algorithm in
8234 // __floatundidf in compiler_rt. This implementation performs rounding
8235 // correctly in all rounding modes with the exception of converting 0
8236 // when rounding toward negative infinity. In that case the fsub will produce
8237 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8238 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), DL: dl, VT: SrcVT);
8239 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8240 Val: llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), DL: dl, VT: DstVT);
8241 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), DL: dl, VT: SrcVT);
8242 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), DL: dl, VT: SrcVT);
8243 SDValue HiShift = DAG.getConstant(Val: 32, DL: dl, VT: ShiftVT);
8244
8245 SDValue Lo = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: SrcVT, N1: Src, N2: LoMask);
8246 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: SrcVT, N1: Src, N2: HiShift);
8247 SDValue LoOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Lo, N2: TwoP52);
8248 SDValue HiOr = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: SrcVT, N1: Hi, N2: TwoP84);
8249 SDValue LoFlt = DAG.getBitcast(VT: DstVT, V: LoOr);
8250 SDValue HiFlt = DAG.getBitcast(VT: DstVT, V: HiOr);
8251 SDValue HiSub =
8252 DAG.getNode(Opcode: ISD::FSUB, DL: dl, VT: DstVT, N1: HiFlt, N2: TwoP84PlusTwoP52);
8253 Result = DAG.getNode(Opcode: ISD::FADD, DL: dl, VT: DstVT, N1: LoFlt, N2: HiSub);
8254 return true;
8255}
8256
8257SDValue
8258TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8259 SelectionDAG &DAG) const {
8260 unsigned Opcode = Node->getOpcode();
8261 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8262 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8263 "Wrong opcode");
8264
8265 if (Node->getFlags().hasNoNaNs()) {
8266 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8267 SDValue Op1 = Node->getOperand(Num: 0);
8268 SDValue Op2 = Node->getOperand(Num: 1);
8269 SDValue SelCC = DAG.getSelectCC(DL: SDLoc(Node), LHS: Op1, RHS: Op2, True: Op1, False: Op2, Cond: Pred);
8270 // Copy FMF flags, but always set the no-signed-zeros flag
8271 // as this is implied by the FMINNUM/FMAXNUM semantics.
8272 SDNodeFlags Flags = Node->getFlags();
8273 Flags.setNoSignedZeros(true);
8274 SelCC->setFlags(Flags);
8275 return SelCC;
8276 }
8277
8278 return SDValue();
8279}
8280
8281SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8282 SelectionDAG &DAG) const {
8283 SDLoc dl(Node);
8284 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8285 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8286 EVT VT = Node->getValueType(ResNo: 0);
8287
8288 if (VT.isScalableVector())
8289 report_fatal_error(
8290 reason: "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8291
8292 if (isOperationLegalOrCustom(Op: NewOp, VT)) {
8293 SDValue Quiet0 = Node->getOperand(Num: 0);
8294 SDValue Quiet1 = Node->getOperand(Num: 1);
8295
8296 if (!Node->getFlags().hasNoNaNs()) {
8297 // Insert canonicalizes if it's possible we need to quiet to get correct
8298 // sNaN behavior.
8299 if (!DAG.isKnownNeverSNaN(Op: Quiet0)) {
8300 Quiet0 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet0,
8301 Flags: Node->getFlags());
8302 }
8303 if (!DAG.isKnownNeverSNaN(Op: Quiet1)) {
8304 Quiet1 = DAG.getNode(Opcode: ISD::FCANONICALIZE, DL: dl, VT, Operand: Quiet1,
8305 Flags: Node->getFlags());
8306 }
8307 }
8308
8309 return DAG.getNode(Opcode: NewOp, DL: dl, VT, N1: Quiet0, N2: Quiet1, Flags: Node->getFlags());
8310 }
8311
8312 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8313 // instead if there are no NaNs and there can't be an incompatible zero
8314 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8315 if ((Node->getFlags().hasNoNaNs() ||
8316 (DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 0)) &&
8317 DAG.isKnownNeverNaN(Op: Node->getOperand(Num: 1)))) &&
8318 (Node->getFlags().hasNoSignedZeros() ||
8319 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 0)) ||
8320 DAG.isKnownNeverZeroFloat(Op: Node->getOperand(Num: 1)))) {
8321 unsigned IEEE2018Op =
8322 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8323 if (isOperationLegalOrCustom(Op: IEEE2018Op, VT))
8324 return DAG.getNode(Opcode: IEEE2018Op, DL: dl, VT, N1: Node->getOperand(Num: 0),
8325 N2: Node->getOperand(Num: 1), Flags: Node->getFlags());
8326 }
8327
8328 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8329 return SelCC;
8330
8331 return SDValue();
8332}
8333
8334/// Returns a true value if if this FPClassTest can be performed with an ordered
8335/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8336/// std::nullopt if it cannot be performed as a compare with 0.
8337static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8338 const fltSemantics &Semantics,
8339 const MachineFunction &MF) {
8340 FPClassTest OrderedMask = Test & ~fcNan;
8341 FPClassTest NanTest = Test & fcNan;
8342 bool IsOrdered = NanTest == fcNone;
8343 bool IsUnordered = NanTest == fcNan;
8344
8345 // Skip cases that are testing for only a qnan or snan.
8346 if (!IsOrdered && !IsUnordered)
8347 return std::nullopt;
8348
8349 if (OrderedMask == fcZero &&
8350 MF.getDenormalMode(FPType: Semantics).Input == DenormalMode::IEEE)
8351 return IsOrdered;
8352 if (OrderedMask == (fcZero | fcSubnormal) &&
8353 MF.getDenormalMode(FPType: Semantics).inputsAreZero())
8354 return IsOrdered;
8355 return std::nullopt;
8356}
8357
8358SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8359 FPClassTest Test, SDNodeFlags Flags,
8360 const SDLoc &DL,
8361 SelectionDAG &DAG) const {
8362 EVT OperandVT = Op.getValueType();
8363 assert(OperandVT.isFloatingPoint());
8364
8365 // Degenerated cases.
8366 if (Test == fcNone)
8367 return DAG.getBoolConstant(V: false, DL, VT: ResultVT, OpVT: OperandVT);
8368 if ((Test & fcAllFlags) == fcAllFlags)
8369 return DAG.getBoolConstant(V: true, DL, VT: ResultVT, OpVT: OperandVT);
8370
8371 // PPC double double is a pair of doubles, of which the higher part determines
8372 // the value class.
8373 if (OperandVT == MVT::ppcf128) {
8374 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8375 DAG.getConstant(1, DL, MVT::i32));
8376 OperandVT = MVT::f64;
8377 }
8378
8379 // Some checks may be represented as inversion of simpler check, for example
8380 // "inf|normal|subnormal|zero" => !"nan".
8381 bool IsInverted = false;
8382 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8383 IsInverted = true;
8384 Test = InvertedCheck;
8385 }
8386
8387 // Floating-point type properties.
8388 EVT ScalarFloatVT = OperandVT.getScalarType();
8389 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(Context&: *DAG.getContext());
8390 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8391 bool IsF80 = (ScalarFloatVT == MVT::f80);
8392
8393 // Some checks can be implemented using float comparisons, if floating point
8394 // exceptions are ignored.
8395 if (Flags.hasNoFPExcept() &&
8396 isOperationLegalOrCustom(Op: ISD::SETCC, VT: OperandVT.getScalarType())) {
8397 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8398 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8399
8400 if (std::optional<bool> IsCmp0 =
8401 isFCmpEqualZero(Test, Semantics, MF: DAG.getMachineFunction());
8402 IsCmp0 && (isCondCodeLegalOrCustom(
8403 CC: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8404 VT: OperandVT.getScalarType().getSimpleVT()))) {
8405
8406 // If denormals could be implicitly treated as 0, this is not equivalent
8407 // to a compare with 0 since it will also be true for denormals.
8408 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op,
8409 RHS: DAG.getConstantFP(Val: 0.0, DL, VT: OperandVT),
8410 Cond: *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8411 }
8412
8413 if (Test == fcNan &&
8414 isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETO : ISD::SETUO,
8415 VT: OperandVT.getScalarType().getSimpleVT())) {
8416 return DAG.getSetCC(DL, VT: ResultVT, LHS: Op, RHS: Op,
8417 Cond: IsInverted ? ISD::SETO : ISD::SETUO);
8418 }
8419
8420 if (Test == fcInf &&
8421 isCondCodeLegalOrCustom(CC: IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8422 VT: OperandVT.getScalarType().getSimpleVT()) &&
8423 isOperationLegalOrCustom(Op: ISD::FABS, VT: OperandVT.getScalarType())) {
8424 // isinf(x) --> fabs(x) == inf
8425 SDValue Abs = DAG.getNode(Opcode: ISD::FABS, DL, VT: OperandVT, Operand: Op);
8426 SDValue Inf =
8427 DAG.getConstantFP(Val: APFloat::getInf(Sem: Semantics), DL, VT: OperandVT);
8428 return DAG.getSetCC(DL, VT: ResultVT, LHS: Abs, RHS: Inf,
8429 Cond: IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8430 }
8431 }
8432
8433 // In the general case use integer operations.
8434 unsigned BitSize = OperandVT.getScalarSizeInBits();
8435 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: BitSize);
8436 if (OperandVT.isVector())
8437 IntVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: IntVT,
8438 EC: OperandVT.getVectorElementCount());
8439 SDValue OpAsInt = DAG.getBitcast(VT: IntVT, V: Op);
8440
8441 // Various masks.
8442 APInt SignBit = APInt::getSignMask(BitWidth: BitSize);
8443 APInt ValueMask = APInt::getSignedMaxValue(numBits: BitSize); // All bits but sign.
8444 APInt Inf = APFloat::getInf(Sem: Semantics).bitcastToAPInt(); // Exp and int bit.
8445 const unsigned ExplicitIntBitInF80 = 63;
8446 APInt ExpMask = Inf;
8447 if (IsF80)
8448 ExpMask.clearBit(BitPosition: ExplicitIntBitInF80);
8449 APInt AllOneMantissa = APFloat::getLargest(Sem: Semantics).bitcastToAPInt() & ~Inf;
8450 APInt QNaNBitMask =
8451 APInt::getOneBitSet(numBits: BitSize, BitNo: AllOneMantissa.getActiveBits() - 1);
8452 APInt InvertionMask = APInt::getAllOnes(numBits: ResultVT.getScalarSizeInBits());
8453
8454 SDValue ValueMaskV = DAG.getConstant(Val: ValueMask, DL, VT: IntVT);
8455 SDValue SignBitV = DAG.getConstant(Val: SignBit, DL, VT: IntVT);
8456 SDValue ExpMaskV = DAG.getConstant(Val: ExpMask, DL, VT: IntVT);
8457 SDValue ZeroV = DAG.getConstant(Val: 0, DL, VT: IntVT);
8458 SDValue InfV = DAG.getConstant(Val: Inf, DL, VT: IntVT);
8459 SDValue ResultInvertionMask = DAG.getConstant(Val: InvertionMask, DL, VT: ResultVT);
8460
8461 SDValue Res;
8462 const auto appendResult = [&](SDValue PartialRes) {
8463 if (PartialRes) {
8464 if (Res)
8465 Res = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: Res, N2: PartialRes);
8466 else
8467 Res = PartialRes;
8468 }
8469 };
8470
8471 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8472 const auto getIntBitIsSet = [&]() -> SDValue {
8473 if (!IntBitIsSetV) {
8474 APInt IntBitMask(BitSize, 0);
8475 IntBitMask.setBit(ExplicitIntBitInF80);
8476 SDValue IntBitMaskV = DAG.getConstant(Val: IntBitMask, DL, VT: IntVT);
8477 SDValue IntBitV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: IntBitMaskV);
8478 IntBitIsSetV = DAG.getSetCC(DL, VT: ResultVT, LHS: IntBitV, RHS: ZeroV, Cond: ISD::SETNE);
8479 }
8480 return IntBitIsSetV;
8481 };
8482
8483 // Split the value into sign bit and absolute value.
8484 SDValue AbsV = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ValueMaskV);
8485 SDValue SignV = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt,
8486 RHS: DAG.getConstant(Val: 0.0, DL, VT: IntVT), Cond: ISD::SETLT);
8487
8488 // Tests that involve more than one class should be processed first.
8489 SDValue PartialRes;
8490
8491 if (IsF80)
8492 ; // Detect finite numbers of f80 by checking individual classes because
8493 // they have different settings of the explicit integer bit.
8494 else if ((Test & fcFinite) == fcFinite) {
8495 // finite(V) ==> abs(V) < exp_mask
8496 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8497 Test &= ~fcFinite;
8498 } else if ((Test & fcFinite) == fcPosFinite) {
8499 // finite(V) && V > 0 ==> V < exp_mask
8500 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ExpMaskV, Cond: ISD::SETULT);
8501 Test &= ~fcPosFinite;
8502 } else if ((Test & fcFinite) == fcNegFinite) {
8503 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8504 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ExpMaskV, Cond: ISD::SETLT);
8505 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8506 Test &= ~fcNegFinite;
8507 }
8508 appendResult(PartialRes);
8509
8510 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8511 // fcZero | fcSubnormal => test all exponent bits are 0
8512 // TODO: Handle sign bit specific cases
8513 if (PartialCheck == (fcZero | fcSubnormal)) {
8514 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: OpAsInt, N2: ExpMaskV);
8515 SDValue ExpIsZero =
8516 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8517 appendResult(ExpIsZero);
8518 Test &= ~PartialCheck & fcAllFlags;
8519 }
8520 }
8521
8522 // Check for individual classes.
8523
8524 if (unsigned PartialCheck = Test & fcZero) {
8525 if (PartialCheck == fcPosZero)
8526 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: ZeroV, Cond: ISD::SETEQ);
8527 else if (PartialCheck == fcZero)
8528 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: ZeroV, Cond: ISD::SETEQ);
8529 else // ISD::fcNegZero
8530 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: SignBitV, Cond: ISD::SETEQ);
8531 appendResult(PartialRes);
8532 }
8533
8534 if (unsigned PartialCheck = Test & fcSubnormal) {
8535 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8536 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8537 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8538 SDValue MantissaV = DAG.getConstant(Val: AllOneMantissa, DL, VT: IntVT);
8539 SDValue VMinusOneV =
8540 DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: V, N2: DAG.getConstant(Val: 1, DL, VT: IntVT));
8541 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: VMinusOneV, RHS: MantissaV, Cond: ISD::SETULT);
8542 if (PartialCheck == fcNegSubnormal)
8543 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8544 appendResult(PartialRes);
8545 }
8546
8547 if (unsigned PartialCheck = Test & fcInf) {
8548 if (PartialCheck == fcPosInf)
8549 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: InfV, Cond: ISD::SETEQ);
8550 else if (PartialCheck == fcInf)
8551 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETEQ);
8552 else { // ISD::fcNegInf
8553 APInt NegInf = APFloat::getInf(Sem: Semantics, Negative: true).bitcastToAPInt();
8554 SDValue NegInfV = DAG.getConstant(Val: NegInf, DL, VT: IntVT);
8555 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: OpAsInt, RHS: NegInfV, Cond: ISD::SETEQ);
8556 }
8557 appendResult(PartialRes);
8558 }
8559
8560 if (unsigned PartialCheck = Test & fcNan) {
8561 APInt InfWithQnanBit = Inf | QNaNBitMask;
8562 SDValue InfWithQnanBitV = DAG.getConstant(Val: InfWithQnanBit, DL, VT: IntVT);
8563 if (PartialCheck == fcNan) {
8564 // isnan(V) ==> abs(V) > int(inf)
8565 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8566 if (IsF80) {
8567 // Recognize unsupported values as NaNs for compatibility with glibc.
8568 // In them (exp(V)==0) == int_bit.
8569 SDValue ExpBits = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: AbsV, N2: ExpMaskV);
8570 SDValue ExpIsZero =
8571 DAG.getSetCC(DL, VT: ResultVT, LHS: ExpBits, RHS: ZeroV, Cond: ISD::SETEQ);
8572 SDValue IsPseudo =
8573 DAG.getSetCC(DL, VT: ResultVT, LHS: getIntBitIsSet(), RHS: ExpIsZero, Cond: ISD::SETEQ);
8574 PartialRes = DAG.getNode(Opcode: ISD::OR, DL, VT: ResultVT, N1: PartialRes, N2: IsPseudo);
8575 }
8576 } else if (PartialCheck == fcQNan) {
8577 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8578 PartialRes =
8579 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETGE);
8580 } else { // ISD::fcSNan
8581 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8582 // abs(V) < (unsigned(Inf) | quiet_bit)
8583 SDValue IsNan = DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfV, Cond: ISD::SETGT);
8584 SDValue IsNotQnan =
8585 DAG.getSetCC(DL, VT: ResultVT, LHS: AbsV, RHS: InfWithQnanBitV, Cond: ISD::SETLT);
8586 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: IsNan, N2: IsNotQnan);
8587 }
8588 appendResult(PartialRes);
8589 }
8590
8591 if (unsigned PartialCheck = Test & fcNormal) {
8592 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8593 APInt ExpLSB = ExpMask & ~(ExpMask.shl(shiftAmt: 1));
8594 SDValue ExpLSBV = DAG.getConstant(Val: ExpLSB, DL, VT: IntVT);
8595 SDValue ExpMinus1 = DAG.getNode(Opcode: ISD::SUB, DL, VT: IntVT, N1: AbsV, N2: ExpLSBV);
8596 APInt ExpLimit = ExpMask - ExpLSB;
8597 SDValue ExpLimitV = DAG.getConstant(Val: ExpLimit, DL, VT: IntVT);
8598 PartialRes = DAG.getSetCC(DL, VT: ResultVT, LHS: ExpMinus1, RHS: ExpLimitV, Cond: ISD::SETULT);
8599 if (PartialCheck == fcNegNormal)
8600 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: SignV);
8601 else if (PartialCheck == fcPosNormal) {
8602 SDValue PosSignV =
8603 DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: SignV, N2: ResultInvertionMask);
8604 PartialRes = DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: PosSignV);
8605 }
8606 if (IsF80)
8607 PartialRes =
8608 DAG.getNode(Opcode: ISD::AND, DL, VT: ResultVT, N1: PartialRes, N2: getIntBitIsSet());
8609 appendResult(PartialRes);
8610 }
8611
8612 if (!Res)
8613 return DAG.getConstant(Val: IsInverted, DL, VT: ResultVT);
8614 if (IsInverted)
8615 Res = DAG.getNode(Opcode: ISD::XOR, DL, VT: ResultVT, N1: Res, N2: ResultInvertionMask);
8616 return Res;
8617}
8618
8619// Only expand vector types if we have the appropriate vector bit operations.
8620static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8621 assert(VT.isVector() && "Expected vector type");
8622 unsigned Len = VT.getScalarSizeInBits();
8623 return TLI.isOperationLegalOrCustom(Op: ISD::ADD, VT) &&
8624 TLI.isOperationLegalOrCustom(Op: ISD::SUB, VT) &&
8625 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
8626 (Len == 8 || TLI.isOperationLegalOrCustom(Op: ISD::MUL, VT)) &&
8627 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT);
8628}
8629
8630SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8631 SDLoc dl(Node);
8632 EVT VT = Node->getValueType(ResNo: 0);
8633 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8634 SDValue Op = Node->getOperand(Num: 0);
8635 unsigned Len = VT.getScalarSizeInBits();
8636 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8637
8638 // TODO: Add support for irregular type lengths.
8639 if (!(Len <= 128 && Len % 8 == 0))
8640 return SDValue();
8641
8642 // Only expand vector types if we have the appropriate vector bit operations.
8643 if (VT.isVector() && !canExpandVectorCTPOP(TLI: *this, VT))
8644 return SDValue();
8645
8646 // This is the "best" algorithm from
8647 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8648 SDValue Mask55 =
8649 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
8650 SDValue Mask33 =
8651 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
8652 SDValue Mask0F =
8653 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
8654
8655 // v = v - ((v >> 1) & 0x55555555...)
8656 Op = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op,
8657 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8658 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8659 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT)),
8660 N2: Mask55));
8661 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8662 Op = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op, N2: Mask33),
8663 N2: DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8664 N1: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8665 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT)),
8666 N2: Mask33));
8667 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8668 Op = DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8669 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8670 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8671 N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT))),
8672 N2: Mask0F);
8673
8674 if (Len <= 8)
8675 return Op;
8676
8677 // Avoid the multiply if we only have 2 bytes to add.
8678 // TODO: Only doing this for scalars because vectors weren't as obviously
8679 // improved.
8680 if (Len == 16 && !VT.isVector()) {
8681 // v = (v + (v >> 8)) & 0x00FF;
8682 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT,
8683 N1: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Op,
8684 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op,
8685 N2: DAG.getConstant(Val: 8, DL: dl, VT: ShVT))),
8686 N2: DAG.getConstant(Val: 0xFF, DL: dl, VT));
8687 }
8688
8689 // v = (v * 0x01010101...) >> (Len - 8)
8690 SDValue Mask01 =
8691 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
8692 return DAG.getNode(Opcode: ISD::SRL, DL: dl, VT,
8693 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Op, N2: Mask01),
8694 N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT));
8695}
8696
8697SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8698 SDLoc dl(Node);
8699 EVT VT = Node->getValueType(ResNo: 0);
8700 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8701 SDValue Op = Node->getOperand(Num: 0);
8702 SDValue Mask = Node->getOperand(Num: 1);
8703 SDValue VL = Node->getOperand(Num: 2);
8704 unsigned Len = VT.getScalarSizeInBits();
8705 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8706
8707 // TODO: Add support for irregular type lengths.
8708 if (!(Len <= 128 && Len % 8 == 0))
8709 return SDValue();
8710
8711 // This is same algorithm of expandCTPOP from
8712 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8713 SDValue Mask55 =
8714 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x55)), DL: dl, VT);
8715 SDValue Mask33 =
8716 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x33)), DL: dl, VT);
8717 SDValue Mask0F =
8718 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x0F)), DL: dl, VT);
8719
8720 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8721
8722 // v = v - ((v >> 1) & 0x55555555...)
8723 Tmp1 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8724 N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8725 N2: DAG.getConstant(Val: 1, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8726 N2: Mask55, N3: Mask, N4: VL);
8727 Op = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op, N2: Tmp1, N3: Mask, N4: VL);
8728
8729 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8730 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: Mask33, N3: Mask, N4: VL);
8731 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT,
8732 N1: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op,
8733 N2: DAG.getConstant(Val: 2, DL: dl, VT: ShVT), N3: Mask, N4: VL),
8734 N2: Mask33, N3: Mask, N4: VL);
8735 Op = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: VL);
8736
8737 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8738 Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 4, DL: dl, VT: ShVT),
8739 N3: Mask, N4: VL),
8740 Tmp5 = DAG.getNode(Opcode: ISD::VP_ADD, DL: dl, VT, N1: Op, N2: Tmp4, N3: Mask, N4: VL);
8741 Op = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp5, N2: Mask0F, N3: Mask, N4: VL);
8742
8743 if (Len <= 8)
8744 return Op;
8745
8746 // v = (v * 0x01010101...) >> (Len - 8)
8747 SDValue Mask01 =
8748 DAG.getConstant(Val: APInt::getSplat(NewLen: Len, V: APInt(8, 0x01)), DL: dl, VT);
8749 return DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT,
8750 N1: DAG.getNode(Opcode: ISD::VP_MUL, DL: dl, VT, N1: Op, N2: Mask01, N3: Mask, N4: VL),
8751 N2: DAG.getConstant(Val: Len - 8, DL: dl, VT: ShVT), N3: Mask, N4: VL);
8752}
8753
8754SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8755 SDLoc dl(Node);
8756 EVT VT = Node->getValueType(ResNo: 0);
8757 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8758 SDValue Op = Node->getOperand(Num: 0);
8759 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8760
8761 // If the non-ZERO_UNDEF version is supported we can use that instead.
8762 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8763 isOperationLegalOrCustom(Op: ISD::CTLZ, VT))
8764 return DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Op);
8765
8766 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8767 if (isOperationLegalOrCustom(Op: ISD::CTLZ_ZERO_UNDEF, VT)) {
8768 EVT SetCCVT =
8769 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8770 SDValue CTLZ = DAG.getNode(Opcode: ISD::CTLZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8771 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8772 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8773 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8774 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTLZ);
8775 }
8776
8777 // Only expand vector types if we have the appropriate vector bit operations.
8778 // This includes the operations needed to expand CTPOP if it isn't supported.
8779 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
8780 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8781 !canExpandVectorCTPOP(TLI: *this, VT)) ||
8782 !isOperationLegalOrCustom(Op: ISD::SRL, VT) ||
8783 !isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)))
8784 return SDValue();
8785
8786 // for now, we do this:
8787 // x = x | (x >> 1);
8788 // x = x | (x >> 2);
8789 // ...
8790 // x = x | (x >>16);
8791 // x = x | (x >>32); // for 64-bit input
8792 // return popcount(~x);
8793 //
8794 // Ref: "Hacker's Delight" by Henry Warren
8795 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8796 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
8797 Op = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Op,
8798 N2: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: Tmp));
8799 }
8800 Op = DAG.getNOT(DL: dl, Val: Op, VT);
8801 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Op);
8802}
8803
8804SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8805 SDLoc dl(Node);
8806 EVT VT = Node->getValueType(ResNo: 0);
8807 EVT ShVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
8808 SDValue Op = Node->getOperand(Num: 0);
8809 SDValue Mask = Node->getOperand(Num: 1);
8810 SDValue VL = Node->getOperand(Num: 2);
8811 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8812
8813 // do this:
8814 // x = x | (x >> 1);
8815 // x = x | (x >> 2);
8816 // ...
8817 // x = x | (x >>16);
8818 // x = x | (x >>32); // for 64-bit input
8819 // return popcount(~x);
8820 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8821 SDValue Tmp = DAG.getConstant(Val: 1ULL << i, DL: dl, VT: ShVT);
8822 Op = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Op,
8823 N2: DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: Tmp, N3: Mask, N4: VL), N3: Mask,
8824 N4: VL);
8825 }
8826 Op = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: -1, DL: dl, VT), N3: Mask,
8827 N4: VL);
8828 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Op, N2: Mask, N3: VL);
8829}
8830
8831SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8832 const SDLoc &DL, EVT VT, SDValue Op,
8833 unsigned BitWidth) const {
8834 if (BitWidth != 32 && BitWidth != 64)
8835 return SDValue();
8836 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8837 : APInt(64, 0x0218A392CD3D5DBFULL);
8838 const DataLayout &TD = DAG.getDataLayout();
8839 MachinePointerInfo PtrInfo =
8840 MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction());
8841 unsigned ShiftAmt = BitWidth - Log2_32(Value: BitWidth);
8842 SDValue Neg = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: DAG.getConstant(Val: 0, DL, VT), N2: Op);
8843 SDValue Lookup = DAG.getNode(
8844 Opcode: ISD::SRL, DL, VT,
8845 N1: DAG.getNode(Opcode: ISD::MUL, DL, VT, N1: DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op, N2: Neg),
8846 N2: DAG.getConstant(Val: DeBruijn, DL, VT)),
8847 N2: DAG.getConstant(Val: ShiftAmt, DL, VT));
8848 Lookup = DAG.getSExtOrTrunc(Op: Lookup, DL, VT: getPointerTy(DL: TD));
8849
8850 SmallVector<uint8_t> Table(BitWidth, 0);
8851 for (unsigned i = 0; i < BitWidth; i++) {
8852 APInt Shl = DeBruijn.shl(shiftAmt: i);
8853 APInt Lshr = Shl.lshr(shiftAmt: ShiftAmt);
8854 Table[Lshr.getZExtValue()] = i;
8855 }
8856
8857 // Create a ConstantArray in Constant Pool
8858 auto *CA = ConstantDataArray::get(Context&: *DAG.getContext(), Elts&: Table);
8859 SDValue CPIdx = DAG.getConstantPool(C: CA, VT: getPointerTy(DL: TD),
8860 Align: TD.getPrefTypeAlign(Ty: CA->getType()));
8861 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8862 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8863 PtrInfo, MVT::i8);
8864 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8865 return ExtLoad;
8866
8867 EVT SetCCVT =
8868 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8869 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
8870 SDValue SrcIsZero = DAG.getSetCC(DL, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8871 return DAG.getSelect(DL, VT, Cond: SrcIsZero,
8872 LHS: DAG.getConstant(Val: BitWidth, DL, VT), RHS: ExtLoad);
8873}
8874
8875SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8876 SDLoc dl(Node);
8877 EVT VT = Node->getValueType(ResNo: 0);
8878 SDValue Op = Node->getOperand(Num: 0);
8879 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8880
8881 // If the non-ZERO_UNDEF version is supported we can use that instead.
8882 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8883 isOperationLegalOrCustom(Op: ISD::CTTZ, VT))
8884 return DAG.getNode(Opcode: ISD::CTTZ, DL: dl, VT, Operand: Op);
8885
8886 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8887 if (isOperationLegalOrCustom(Op: ISD::CTTZ_ZERO_UNDEF, VT)) {
8888 EVT SetCCVT =
8889 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
8890 SDValue CTTZ = DAG.getNode(Opcode: ISD::CTTZ_ZERO_UNDEF, DL: dl, VT, Operand: Op);
8891 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8892 SDValue SrcIsZero = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Op, RHS: Zero, Cond: ISD::SETEQ);
8893 return DAG.getSelect(DL: dl, VT, Cond: SrcIsZero,
8894 LHS: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT), RHS: CTTZ);
8895 }
8896
8897 // Only expand vector types if we have the appropriate vector bit operations.
8898 // This includes the operations needed to expand CTPOP if it isn't supported.
8899 if (VT.isVector() && (!isPowerOf2_32(Value: NumBitsPerElt) ||
8900 (!isOperationLegalOrCustom(Op: ISD::CTPOP, VT) &&
8901 !isOperationLegalOrCustom(Op: ISD::CTLZ, VT) &&
8902 !canExpandVectorCTPOP(TLI: *this, VT)) ||
8903 !isOperationLegalOrCustom(Op: ISD::SUB, VT) ||
8904 !isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) ||
8905 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8906 return SDValue();
8907
8908 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8909 if (!VT.isVector() && isOperationExpand(Op: ISD::CTPOP, VT) &&
8910 !isOperationLegal(Op: ISD::CTLZ, VT))
8911 if (SDValue V = CTTZTableLookup(Node, DAG, DL: dl, VT, Op, BitWidth: NumBitsPerElt))
8912 return V;
8913
8914 // for now, we use: { return popcount(~x & (x - 1)); }
8915 // unless the target has ctlz but not ctpop, in which case we use:
8916 // { return 32 - nlz(~x & (x-1)); }
8917 // Ref: "Hacker's Delight" by Henry Warren
8918 SDValue Tmp = DAG.getNode(
8919 Opcode: ISD::AND, DL: dl, VT, N1: DAG.getNOT(DL: dl, Val: Op, VT),
8920 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 1, DL: dl, VT)));
8921
8922 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8923 if (isOperationLegal(Op: ISD::CTLZ, VT) && !isOperationLegal(Op: ISD::CTPOP, VT)) {
8924 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: DAG.getConstant(Val: NumBitsPerElt, DL: dl, VT),
8925 N2: DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Tmp));
8926 }
8927
8928 return DAG.getNode(Opcode: ISD::CTPOP, DL: dl, VT, Operand: Tmp);
8929}
8930
8931SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8932 SDValue Op = Node->getOperand(Num: 0);
8933 SDValue Mask = Node->getOperand(Num: 1);
8934 SDValue VL = Node->getOperand(Num: 2);
8935 SDLoc dl(Node);
8936 EVT VT = Node->getValueType(ResNo: 0);
8937
8938 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8939 SDValue Not = DAG.getNode(Opcode: ISD::VP_XOR, DL: dl, VT, N1: Op,
8940 N2: DAG.getConstant(Val: -1, DL: dl, VT), N3: Mask, N4: VL);
8941 SDValue MinusOne = DAG.getNode(Opcode: ISD::VP_SUB, DL: dl, VT, N1: Op,
8942 N2: DAG.getConstant(Val: 1, DL: dl, VT), N3: Mask, N4: VL);
8943 SDValue Tmp = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Not, N2: MinusOne, N3: Mask, N4: VL);
8944 return DAG.getNode(Opcode: ISD::VP_CTPOP, DL: dl, VT, N1: Tmp, N2: Mask, N3: VL);
8945}
8946
8947SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
8948 bool IsNegative) const {
8949 SDLoc dl(N);
8950 EVT VT = N->getValueType(ResNo: 0);
8951 SDValue Op = N->getOperand(Num: 0);
8952
8953 // abs(x) -> smax(x,sub(0,x))
8954 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8955 isOperationLegal(Op: ISD::SMAX, VT)) {
8956 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8957 return DAG.getNode(Opcode: ISD::SMAX, DL: dl, VT, N1: Op,
8958 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8959 }
8960
8961 // abs(x) -> umin(x,sub(0,x))
8962 if (!IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8963 isOperationLegal(Op: ISD::UMIN, VT)) {
8964 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8965 Op = DAG.getFreeze(V: Op);
8966 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: Op,
8967 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8968 }
8969
8970 // 0 - abs(x) -> smin(x, sub(0,x))
8971 if (IsNegative && isOperationLegal(Op: ISD::SUB, VT) &&
8972 isOperationLegal(Op: ISD::SMIN, VT)) {
8973 Op = DAG.getFreeze(V: Op);
8974 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
8975 return DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT, N1: Op,
8976 N2: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Zero, N2: Op));
8977 }
8978
8979 // Only expand vector types if we have the appropriate vector operations.
8980 if (VT.isVector() &&
8981 (!isOperationLegalOrCustom(Op: ISD::SRA, VT) ||
8982 (!IsNegative && !isOperationLegalOrCustom(Op: ISD::ADD, VT)) ||
8983 (IsNegative && !isOperationLegalOrCustom(Op: ISD::SUB, VT)) ||
8984 !isOperationLegalOrCustomOrPromote(Op: ISD::XOR, VT)))
8985 return SDValue();
8986
8987 Op = DAG.getFreeze(V: Op);
8988 SDValue Shift = DAG.getNode(
8989 Opcode: ISD::SRA, DL: dl, VT, N1: Op,
8990 N2: DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits() - 1, VT, DL: dl));
8991 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Op, N2: Shift);
8992
8993 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
8994 if (!IsNegative)
8995 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Xor, N2: Shift);
8996
8997 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
8998 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Shift, N2: Xor);
8999}
9000
9001SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
9002 SDLoc dl(N);
9003 EVT VT = N->getValueType(ResNo: 0);
9004 SDValue LHS = DAG.getFreeze(V: N->getOperand(Num: 0));
9005 SDValue RHS = DAG.getFreeze(V: N->getOperand(Num: 1));
9006 bool IsSigned = N->getOpcode() == ISD::ABDS;
9007
9008 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9009 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9010 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9011 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9012 if (isOperationLegal(Op: MaxOpc, VT) && isOperationLegal(Op: MinOpc, VT)) {
9013 SDValue Max = DAG.getNode(Opcode: MaxOpc, DL: dl, VT, N1: LHS, N2: RHS);
9014 SDValue Min = DAG.getNode(Opcode: MinOpc, DL: dl, VT, N1: LHS, N2: RHS);
9015 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: Min);
9016 }
9017
9018 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9019 if (!IsSigned && isOperationLegal(Op: ISD::USUBSAT, VT))
9020 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT,
9021 N1: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: LHS, N2: RHS),
9022 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL: dl, VT, N1: RHS, N2: LHS));
9023
9024 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9025 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9026 EVT CCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9027 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9028 SDValue Cmp = DAG.getSetCC(DL: dl, VT: CCVT, LHS, RHS, Cond: CC);
9029 return DAG.getSelect(DL: dl, VT, Cond: Cmp, LHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: LHS, N2: RHS),
9030 RHS: DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: RHS, N2: LHS));
9031}
9032
9033SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9034 SDLoc dl(N);
9035 EVT VT = N->getValueType(ResNo: 0);
9036 SDValue Op = N->getOperand(Num: 0);
9037
9038 if (!VT.isSimple())
9039 return SDValue();
9040
9041 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9042 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9043 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9044 default:
9045 return SDValue();
9046 case MVT::i16:
9047 // Use a rotate by 8. This can be further expanded if necessary.
9048 return DAG.getNode(Opcode: ISD::ROTL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9049 case MVT::i32:
9050 Tmp4 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9051 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9052 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9053 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9054 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9055 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT));
9056 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9057 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9058 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9059 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9060 case MVT::i64:
9061 Tmp8 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9062 Tmp7 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9063 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9064 Tmp7 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9065 Tmp6 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9066 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9067 Tmp6 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9068 Tmp5 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Op,
9069 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9070 Tmp5 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9071 Tmp4 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT));
9072 Tmp4 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp4,
9073 N2: DAG.getConstant(Val: 255ULL<<24, DL: dl, VT));
9074 Tmp3 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT));
9075 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp3,
9076 N2: DAG.getConstant(Val: 255ULL<<16, DL: dl, VT));
9077 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT));
9078 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2,
9079 N2: DAG.getConstant(Val: 255ULL<<8, DL: dl, VT));
9080 Tmp1 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT));
9081 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp7);
9082 Tmp6 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp6, N2: Tmp5);
9083 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp3);
9084 Tmp2 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp1);
9085 Tmp8 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp6);
9086 Tmp4 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp4, N2: Tmp2);
9087 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp8, N2: Tmp4);
9088 }
9089}
9090
9091SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9092 SDLoc dl(N);
9093 EVT VT = N->getValueType(ResNo: 0);
9094 SDValue Op = N->getOperand(Num: 0);
9095 SDValue Mask = N->getOperand(Num: 1);
9096 SDValue EVL = N->getOperand(Num: 2);
9097
9098 if (!VT.isSimple())
9099 return SDValue();
9100
9101 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9102 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9103 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9104 default:
9105 return SDValue();
9106 case MVT::i16:
9107 Tmp1 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9108 N3: Mask, N4: EVL);
9109 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9110 N3: Mask, N4: EVL);
9111 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp1, N2: Tmp2, N3: Mask, N4: EVL);
9112 case MVT::i32:
9113 Tmp4 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9114 N3: Mask, N4: EVL);
9115 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT),
9116 N3: Mask, N4: EVL);
9117 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9118 N3: Mask, N4: EVL);
9119 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9120 N3: Mask, N4: EVL);
9121 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9122 N2: DAG.getConstant(Val: 0xFF00, DL: dl, VT), N3: Mask, N4: EVL);
9123 Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9124 N3: Mask, N4: EVL);
9125 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9126 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9127 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9128 case MVT::i64:
9129 Tmp8 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9130 N3: Mask, N4: EVL);
9131 Tmp7 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9132 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9133 Tmp7 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp7, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9134 N3: Mask, N4: EVL);
9135 Tmp6 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9136 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9137 Tmp6 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp6, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9138 N3: Mask, N4: EVL);
9139 Tmp5 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Op,
9140 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9141 Tmp5 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp5, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9142 N3: Mask, N4: EVL);
9143 Tmp4 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 8, DL: dl, VT: SHVT),
9144 N3: Mask, N4: EVL);
9145 Tmp4 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp4,
9146 N2: DAG.getConstant(Val: 255ULL << 24, DL: dl, VT), N3: Mask, N4: EVL);
9147 Tmp3 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 24, DL: dl, VT: SHVT),
9148 N3: Mask, N4: EVL);
9149 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp3,
9150 N2: DAG.getConstant(Val: 255ULL << 16, DL: dl, VT), N3: Mask, N4: EVL);
9151 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 40, DL: dl, VT: SHVT),
9152 N3: Mask, N4: EVL);
9153 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9154 N2: DAG.getConstant(Val: 255ULL << 8, DL: dl, VT), N3: Mask, N4: EVL);
9155 Tmp1 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: 56, DL: dl, VT: SHVT),
9156 N3: Mask, N4: EVL);
9157 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp7, N3: Mask, N4: EVL);
9158 Tmp6 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp6, N2: Tmp5, N3: Mask, N4: EVL);
9159 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp3, N3: Mask, N4: EVL);
9160 Tmp2 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp1, N3: Mask, N4: EVL);
9161 Tmp8 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp6, N3: Mask, N4: EVL);
9162 Tmp4 = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp4, N2: Tmp2, N3: Mask, N4: EVL);
9163 return DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp8, N2: Tmp4, N3: Mask, N4: EVL);
9164 }
9165}
9166
9167SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9168 SDLoc dl(N);
9169 EVT VT = N->getValueType(ResNo: 0);
9170 SDValue Op = N->getOperand(Num: 0);
9171 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9172 unsigned Sz = VT.getScalarSizeInBits();
9173
9174 SDValue Tmp, Tmp2, Tmp3;
9175
9176 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9177 // and finally the i1 pairs.
9178 // TODO: We can easily support i4/i2 legal types if any target ever does.
9179 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
9180 // Create the masks - repeating the pattern every byte.
9181 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
9182 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
9183 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
9184
9185 // BSWAP if the type is wider than a single byte.
9186 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::BSWAP, DL: dl, VT, Operand: Op) : Op);
9187
9188 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9189 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
9190 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9191 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT));
9192 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT));
9193 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9194
9195 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9196 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
9197 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9198 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT));
9199 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT));
9200 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9201
9202 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9203 Tmp2 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
9204 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9205 Tmp3 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT));
9206 Tmp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT));
9207 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp2, N2: Tmp3);
9208 return Tmp;
9209 }
9210
9211 Tmp = DAG.getConstant(Val: 0, DL: dl, VT);
9212 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9213 if (I < J)
9214 Tmp2 =
9215 DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: J - I, DL: dl, VT: SHVT));
9216 else
9217 Tmp2 =
9218 DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Op, N2: DAG.getConstant(Val: I - J, DL: dl, VT: SHVT));
9219
9220 APInt Shift = APInt::getOneBitSet(numBits: Sz, BitNo: J);
9221 Tmp2 = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Tmp2, N2: DAG.getConstant(Val: Shift, DL: dl, VT));
9222 Tmp = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Tmp, N2: Tmp2);
9223 }
9224
9225 return Tmp;
9226}
9227
9228SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9229 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9230
9231 SDLoc dl(N);
9232 EVT VT = N->getValueType(ResNo: 0);
9233 SDValue Op = N->getOperand(Num: 0);
9234 SDValue Mask = N->getOperand(Num: 1);
9235 SDValue EVL = N->getOperand(Num: 2);
9236 EVT SHVT = getShiftAmountTy(LHSTy: VT, DL: DAG.getDataLayout());
9237 unsigned Sz = VT.getScalarSizeInBits();
9238
9239 SDValue Tmp, Tmp2, Tmp3;
9240
9241 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9242 // and finally the i1 pairs.
9243 // TODO: We can easily support i4/i2 legal types if any target ever does.
9244 if (Sz >= 8 && isPowerOf2_32(Value: Sz)) {
9245 // Create the masks - repeating the pattern every byte.
9246 APInt Mask4 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x0F));
9247 APInt Mask2 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x33));
9248 APInt Mask1 = APInt::getSplat(NewLen: Sz, V: APInt(8, 0x55));
9249
9250 // BSWAP if the type is wider than a single byte.
9251 Tmp = (Sz > 8 ? DAG.getNode(Opcode: ISD::VP_BSWAP, DL: dl, VT, N1: Op, N2: Mask, N3: EVL) : Op);
9252
9253 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9254 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
9255 N3: Mask, N4: EVL);
9256 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9257 N2: DAG.getConstant(Val: Mask4, DL: dl, VT), N3: Mask, N4: EVL);
9258 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask4, DL: dl, VT),
9259 N3: Mask, N4: EVL);
9260 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 4, DL: dl, VT: SHVT),
9261 N3: Mask, N4: EVL);
9262 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9263
9264 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9265 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
9266 N3: Mask, N4: EVL);
9267 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9268 N2: DAG.getConstant(Val: Mask2, DL: dl, VT), N3: Mask, N4: EVL);
9269 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask2, DL: dl, VT),
9270 N3: Mask, N4: EVL);
9271 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 2, DL: dl, VT: SHVT),
9272 N3: Mask, N4: EVL);
9273 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9274
9275 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9276 Tmp2 = DAG.getNode(Opcode: ISD::VP_LSHR, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
9277 N3: Mask, N4: EVL);
9278 Tmp2 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp2,
9279 N2: DAG.getConstant(Val: Mask1, DL: dl, VT), N3: Mask, N4: EVL);
9280 Tmp3 = DAG.getNode(Opcode: ISD::VP_AND, DL: dl, VT, N1: Tmp, N2: DAG.getConstant(Val: Mask1, DL: dl, VT),
9281 N3: Mask, N4: EVL);
9282 Tmp3 = DAG.getNode(Opcode: ISD::VP_SHL, DL: dl, VT, N1: Tmp3, N2: DAG.getConstant(Val: 1, DL: dl, VT: SHVT),
9283 N3: Mask, N4: EVL);
9284 Tmp = DAG.getNode(Opcode: ISD::VP_OR, DL: dl, VT, N1: Tmp2, N2: Tmp3, N3: Mask, N4: EVL);
9285 return Tmp;
9286 }
9287 return SDValue();
9288}
9289
9290std::pair<SDValue, SDValue>
9291TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9292 SelectionDAG &DAG) const {
9293 SDLoc SL(LD);
9294 SDValue Chain = LD->getChain();
9295 SDValue BasePTR = LD->getBasePtr();
9296 EVT SrcVT = LD->getMemoryVT();
9297 EVT DstVT = LD->getValueType(ResNo: 0);
9298 ISD::LoadExtType ExtType = LD->getExtensionType();
9299
9300 if (SrcVT.isScalableVector())
9301 report_fatal_error(reason: "Cannot scalarize scalable vector loads");
9302
9303 unsigned NumElem = SrcVT.getVectorNumElements();
9304
9305 EVT SrcEltVT = SrcVT.getScalarType();
9306 EVT DstEltVT = DstVT.getScalarType();
9307
9308 // A vector must always be stored in memory as-is, i.e. without any padding
9309 // between the elements, since various code depend on it, e.g. in the
9310 // handling of a bitcast of a vector type to int, which may be done with a
9311 // vector store followed by an integer load. A vector that does not have
9312 // elements that are byte-sized must therefore be stored as an integer
9313 // built out of the extracted vector elements.
9314 if (!SrcEltVT.isByteSized()) {
9315 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9316 EVT LoadVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumLoadBits);
9317
9318 unsigned NumSrcBits = SrcVT.getSizeInBits();
9319 EVT SrcIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumSrcBits);
9320
9321 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9322 SDValue SrcEltBitMask = DAG.getConstant(
9323 Val: APInt::getLowBitsSet(numBits: NumLoadBits, loBitsSet: SrcEltBits), DL: SL, VT: LoadVT);
9324
9325 // Load the whole vector and avoid masking off the top bits as it makes
9326 // the codegen worse.
9327 SDValue Load =
9328 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl: SL, VT: LoadVT, Chain, Ptr: BasePTR,
9329 PtrInfo: LD->getPointerInfo(), MemVT: SrcIntVT, Alignment: LD->getOriginalAlign(),
9330 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9331
9332 SmallVector<SDValue, 8> Vals;
9333 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9334 unsigned ShiftIntoIdx =
9335 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9336 SDValue ShiftAmount =
9337 DAG.getShiftAmountConstant(Val: ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9338 VT: LoadVT, DL: SL, /*LegalTypes=*/false);
9339 SDValue ShiftedElt = DAG.getNode(Opcode: ISD::SRL, DL: SL, VT: LoadVT, N1: Load, N2: ShiftAmount);
9340 SDValue Elt =
9341 DAG.getNode(Opcode: ISD::AND, DL: SL, VT: LoadVT, N1: ShiftedElt, N2: SrcEltBitMask);
9342 SDValue Scalar = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: SrcEltVT, Operand: Elt);
9343
9344 if (ExtType != ISD::NON_EXTLOAD) {
9345 unsigned ExtendOp = ISD::getExtForLoadExtType(IsFP: false, ExtType);
9346 Scalar = DAG.getNode(Opcode: ExtendOp, DL: SL, VT: DstEltVT, Operand: Scalar);
9347 }
9348
9349 Vals.push_back(Elt: Scalar);
9350 }
9351
9352 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9353 return std::make_pair(x&: Value, y: Load.getValue(R: 1));
9354 }
9355
9356 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9357 assert(SrcEltVT.isByteSized());
9358
9359 SmallVector<SDValue, 8> Vals;
9360 SmallVector<SDValue, 8> LoadChains;
9361
9362 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9363 SDValue ScalarLoad =
9364 DAG.getExtLoad(ExtType, dl: SL, VT: DstEltVT, Chain, Ptr: BasePTR,
9365 PtrInfo: LD->getPointerInfo().getWithOffset(O: Idx * Stride),
9366 MemVT: SrcEltVT, Alignment: LD->getOriginalAlign(),
9367 MMOFlags: LD->getMemOperand()->getFlags(), AAInfo: LD->getAAInfo());
9368
9369 BasePTR = DAG.getObjectPtrOffset(SL, Ptr: BasePTR, Offset: TypeSize::getFixed(ExactSize: Stride));
9370
9371 Vals.push_back(Elt: ScalarLoad.getValue(R: 0));
9372 LoadChains.push_back(Elt: ScalarLoad.getValue(R: 1));
9373 }
9374
9375 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9376 SDValue Value = DAG.getBuildVector(VT: DstVT, DL: SL, Ops: Vals);
9377
9378 return std::make_pair(x&: Value, y&: NewChain);
9379}
9380
9381SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9382 SelectionDAG &DAG) const {
9383 SDLoc SL(ST);
9384
9385 SDValue Chain = ST->getChain();
9386 SDValue BasePtr = ST->getBasePtr();
9387 SDValue Value = ST->getValue();
9388 EVT StVT = ST->getMemoryVT();
9389
9390 if (StVT.isScalableVector())
9391 report_fatal_error(reason: "Cannot scalarize scalable vector stores");
9392
9393 // The type of the data we want to save
9394 EVT RegVT = Value.getValueType();
9395 EVT RegSclVT = RegVT.getScalarType();
9396
9397 // The type of data as saved in memory.
9398 EVT MemSclVT = StVT.getScalarType();
9399
9400 unsigned NumElem = StVT.getVectorNumElements();
9401
9402 // A vector must always be stored in memory as-is, i.e. without any padding
9403 // between the elements, since various code depend on it, e.g. in the
9404 // handling of a bitcast of a vector type to int, which may be done with a
9405 // vector store followed by an integer load. A vector that does not have
9406 // elements that are byte-sized must therefore be stored as an integer
9407 // built out of the extracted vector elements.
9408 if (!MemSclVT.isByteSized()) {
9409 unsigned NumBits = StVT.getSizeInBits();
9410 EVT IntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits);
9411
9412 SDValue CurrVal = DAG.getConstant(Val: 0, DL: SL, VT: IntVT);
9413
9414 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9415 SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9416 N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9417 SDValue Trunc = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SL, VT: MemSclVT, Operand: Elt);
9418 SDValue ExtElt = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SL, VT: IntVT, Operand: Trunc);
9419 unsigned ShiftIntoIdx =
9420 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9421 SDValue ShiftAmount =
9422 DAG.getConstant(Val: ShiftIntoIdx * MemSclVT.getSizeInBits(), DL: SL, VT: IntVT);
9423 SDValue ShiftedElt =
9424 DAG.getNode(Opcode: ISD::SHL, DL: SL, VT: IntVT, N1: ExtElt, N2: ShiftAmount);
9425 CurrVal = DAG.getNode(Opcode: ISD::OR, DL: SL, VT: IntVT, N1: CurrVal, N2: ShiftedElt);
9426 }
9427
9428 return DAG.getStore(Chain, dl: SL, Val: CurrVal, Ptr: BasePtr, PtrInfo: ST->getPointerInfo(),
9429 Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9430 AAInfo: ST->getAAInfo());
9431 }
9432
9433 // Store Stride in bytes
9434 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9435 assert(Stride && "Zero stride!");
9436 // Extract each of the elements from the original vector and save them into
9437 // memory individually.
9438 SmallVector<SDValue, 8> Stores;
9439 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9440 SDValue Elt = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SL, VT: RegSclVT, N1: Value,
9441 N2: DAG.getVectorIdxConstant(Val: Idx, DL: SL));
9442
9443 SDValue Ptr =
9444 DAG.getObjectPtrOffset(SL, Ptr: BasePtr, Offset: TypeSize::getFixed(ExactSize: Idx * Stride));
9445
9446 // This scalar TruncStore may be illegal, but we legalize it later.
9447 SDValue Store = DAG.getTruncStore(
9448 Chain, dl: SL, Val: Elt, Ptr, PtrInfo: ST->getPointerInfo().getWithOffset(O: Idx * Stride),
9449 SVT: MemSclVT, Alignment: ST->getOriginalAlign(), MMOFlags: ST->getMemOperand()->getFlags(),
9450 AAInfo: ST->getAAInfo());
9451
9452 Stores.push_back(Elt: Store);
9453 }
9454
9455 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9456}
9457
9458std::pair<SDValue, SDValue>
9459TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9460 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9461 "unaligned indexed loads not implemented!");
9462 SDValue Chain = LD->getChain();
9463 SDValue Ptr = LD->getBasePtr();
9464 EVT VT = LD->getValueType(ResNo: 0);
9465 EVT LoadedVT = LD->getMemoryVT();
9466 SDLoc dl(LD);
9467 auto &MF = DAG.getMachineFunction();
9468
9469 if (VT.isFloatingPoint() || VT.isVector()) {
9470 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: LoadedVT.getSizeInBits());
9471 if (isTypeLegal(VT: intVT) && isTypeLegal(VT: LoadedVT)) {
9472 if (!isOperationLegalOrCustom(Op: ISD::LOAD, VT: intVT) &&
9473 LoadedVT.isVector()) {
9474 // Scalarize the load and let the individual components be handled.
9475 return scalarizeVectorLoad(LD, DAG);
9476 }
9477
9478 // Expand to a (misaligned) integer load of the same size,
9479 // then bitconvert to floating point or vector.
9480 SDValue newLoad = DAG.getLoad(VT: intVT, dl, Chain, Ptr,
9481 MMO: LD->getMemOperand());
9482 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: LoadedVT, Operand: newLoad);
9483 if (LoadedVT != VT)
9484 Result = DAG.getNode(Opcode: VT.isFloatingPoint() ? ISD::FP_EXTEND :
9485 ISD::ANY_EXTEND, DL: dl, VT, Operand: Result);
9486
9487 return std::make_pair(x&: Result, y: newLoad.getValue(R: 1));
9488 }
9489
9490 // Copy the value to a (aligned) stack slot using (unaligned) integer
9491 // loads and stores, then do a (aligned) load from the stack slot.
9492 MVT RegVT = getRegisterType(Context&: *DAG.getContext(), VT: intVT);
9493 unsigned LoadedBytes = LoadedVT.getStoreSize();
9494 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9495 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9496
9497 // Make sure the stack slot is also aligned for the register type.
9498 SDValue StackBase = DAG.CreateStackTemporary(VT1: LoadedVT, VT2: RegVT);
9499 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackBase.getNode())->getIndex();
9500 SmallVector<SDValue, 8> Stores;
9501 SDValue StackPtr = StackBase;
9502 unsigned Offset = 0;
9503
9504 EVT PtrVT = Ptr.getValueType();
9505 EVT StackPtrVT = StackPtr.getValueType();
9506
9507 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9508 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9509
9510 // Do all but one copies using the full register width.
9511 for (unsigned i = 1; i < NumRegs; i++) {
9512 // Load one integer register's worth from the original location.
9513 SDValue Load = DAG.getLoad(
9514 VT: RegVT, dl, Chain, Ptr, PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset),
9515 Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9516 AAInfo: LD->getAAInfo());
9517 // Follow the load with a store to the stack slot. Remember the store.
9518 Stores.push_back(Elt: DAG.getStore(
9519 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
9520 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset)));
9521 // Increment the pointers.
9522 Offset += RegBytes;
9523
9524 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9525 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9526 }
9527
9528 // The last copy may be partial. Do an extending load.
9529 EVT MemVT = EVT::getIntegerVT(Context&: *DAG.getContext(),
9530 BitWidth: 8 * (LoadedBytes - Offset));
9531 SDValue Load =
9532 DAG.getExtLoad(ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain, Ptr,
9533 PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), MemVT,
9534 Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(),
9535 AAInfo: LD->getAAInfo());
9536 // Follow the load with a store to the stack slot. Remember the store.
9537 // On big-endian machines this requires a truncating store to ensure
9538 // that the bits end up in the right place.
9539 Stores.push_back(Elt: DAG.getTruncStore(
9540 Chain: Load.getValue(R: 1), dl, Val: Load, Ptr: StackPtr,
9541 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), SVT: MemVT));
9542
9543 // The order of the stores doesn't matter - say it with a TokenFactor.
9544 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9545
9546 // Finally, perform the original load only redirected to the stack slot.
9547 Load = DAG.getExtLoad(ExtType: LD->getExtensionType(), dl, VT, Chain: TF, Ptr: StackBase,
9548 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0),
9549 MemVT: LoadedVT);
9550
9551 // Callers expect a MERGE_VALUES node.
9552 return std::make_pair(x&: Load, y&: TF);
9553 }
9554
9555 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9556 "Unaligned load of unsupported type.");
9557
9558 // Compute the new VT that is half the size of the old one. This is an
9559 // integer MVT.
9560 unsigned NumBits = LoadedVT.getSizeInBits();
9561 EVT NewLoadedVT;
9562 NewLoadedVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: NumBits/2);
9563 NumBits >>= 1;
9564
9565 Align Alignment = LD->getOriginalAlign();
9566 unsigned IncrementSize = NumBits / 8;
9567 ISD::LoadExtType HiExtType = LD->getExtensionType();
9568
9569 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9570 if (HiExtType == ISD::NON_EXTLOAD)
9571 HiExtType = ISD::ZEXTLOAD;
9572
9573 // Load the value in two parts
9574 SDValue Lo, Hi;
9575 if (DAG.getDataLayout().isLittleEndian()) {
9576 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9577 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9578 AAInfo: LD->getAAInfo());
9579
9580 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9581 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr,
9582 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9583 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9584 AAInfo: LD->getAAInfo());
9585 } else {
9586 Hi = DAG.getExtLoad(ExtType: HiExtType, dl, VT, Chain, Ptr, PtrInfo: LD->getPointerInfo(),
9587 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9588 AAInfo: LD->getAAInfo());
9589
9590 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9591 Lo = DAG.getExtLoad(ExtType: ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9592 PtrInfo: LD->getPointerInfo().getWithOffset(O: IncrementSize),
9593 MemVT: NewLoadedVT, Alignment, MMOFlags: LD->getMemOperand()->getFlags(),
9594 AAInfo: LD->getAAInfo());
9595 }
9596
9597 // aggregate the two parts
9598 SDValue ShiftAmount = DAG.getShiftAmountConstant(Val: NumBits, VT, DL: dl);
9599 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: Hi, N2: ShiftAmount);
9600 Result = DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: Result, N2: Lo);
9601
9602 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9603 Hi.getValue(1));
9604
9605 return std::make_pair(x&: Result, y&: TF);
9606}
9607
9608SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9609 SelectionDAG &DAG) const {
9610 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9611 "unaligned indexed stores not implemented!");
9612 SDValue Chain = ST->getChain();
9613 SDValue Ptr = ST->getBasePtr();
9614 SDValue Val = ST->getValue();
9615 EVT VT = Val.getValueType();
9616 Align Alignment = ST->getOriginalAlign();
9617 auto &MF = DAG.getMachineFunction();
9618 EVT StoreMemVT = ST->getMemoryVT();
9619
9620 SDLoc dl(ST);
9621 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9622 EVT intVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits());
9623 if (isTypeLegal(VT: intVT)) {
9624 if (!isOperationLegalOrCustom(Op: ISD::STORE, VT: intVT) &&
9625 StoreMemVT.isVector()) {
9626 // Scalarize the store and let the individual components be handled.
9627 SDValue Result = scalarizeVectorStore(ST, DAG);
9628 return Result;
9629 }
9630 // Expand to a bitconvert of the value to the integer type of the
9631 // same size, then a (misaligned) int store.
9632 // FIXME: Does not handle truncating floating point stores!
9633 SDValue Result = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: intVT, Operand: Val);
9634 Result = DAG.getStore(Chain, dl, Val: Result, Ptr, PtrInfo: ST->getPointerInfo(),
9635 Alignment, MMOFlags: ST->getMemOperand()->getFlags());
9636 return Result;
9637 }
9638 // Do a (aligned) store to a stack slot, then copy from the stack slot
9639 // to the final destination using (unaligned) integer loads and stores.
9640 MVT RegVT = getRegisterType(
9641 Context&: *DAG.getContext(),
9642 VT: EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: StoreMemVT.getSizeInBits()));
9643 EVT PtrVT = Ptr.getValueType();
9644 unsigned StoredBytes = StoreMemVT.getStoreSize();
9645 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9646 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9647
9648 // Make sure the stack slot is also aligned for the register type.
9649 SDValue StackPtr = DAG.CreateStackTemporary(VT1: StoreMemVT, VT2: RegVT);
9650 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
9651
9652 // Perform the original store, only redirected to the stack slot.
9653 SDValue Store = DAG.getTruncStore(
9654 Chain, dl, Val, Ptr: StackPtr,
9655 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset: 0), SVT: StoreMemVT);
9656
9657 EVT StackPtrVT = StackPtr.getValueType();
9658
9659 SDValue PtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: PtrVT);
9660 SDValue StackPtrIncrement = DAG.getConstant(Val: RegBytes, DL: dl, VT: StackPtrVT);
9661 SmallVector<SDValue, 8> Stores;
9662 unsigned Offset = 0;
9663
9664 // Do all but one copies using the full register width.
9665 for (unsigned i = 1; i < NumRegs; i++) {
9666 // Load one integer register's worth from the stack slot.
9667 SDValue Load = DAG.getLoad(
9668 VT: RegVT, dl, Chain: Store, Ptr: StackPtr,
9669 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset));
9670 // Store it to the final location. Remember the store.
9671 Stores.push_back(Elt: DAG.getStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
9672 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset),
9673 Alignment: ST->getOriginalAlign(),
9674 MMOFlags: ST->getMemOperand()->getFlags()));
9675 // Increment the pointers.
9676 Offset += RegBytes;
9677 StackPtr = DAG.getObjectPtrOffset(SL: dl, Ptr: StackPtr, Offset: StackPtrIncrement);
9678 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: PtrIncrement);
9679 }
9680
9681 // The last store may be partial. Do a truncating store. On big-endian
9682 // machines this requires an extending load from the stack slot to ensure
9683 // that the bits are in the right place.
9684 EVT LoadMemVT =
9685 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: 8 * (StoredBytes - Offset));
9686
9687 // Load from the stack slot.
9688 SDValue Load = DAG.getExtLoad(
9689 ExtType: ISD::EXTLOAD, dl, VT: RegVT, Chain: Store, Ptr: StackPtr,
9690 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex, Offset), MemVT: LoadMemVT);
9691
9692 Stores.push_back(
9693 Elt: DAG.getTruncStore(Chain: Load.getValue(R: 1), dl, Val: Load, Ptr,
9694 PtrInfo: ST->getPointerInfo().getWithOffset(O: Offset), SVT: LoadMemVT,
9695 Alignment: ST->getOriginalAlign(),
9696 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo()));
9697 // The order of the stores doesn't matter - say it with a TokenFactor.
9698 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9699 return Result;
9700 }
9701
9702 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9703 "Unaligned store of unknown type.");
9704 // Get the half-size VT
9705 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(Context&: *DAG.getContext());
9706 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9707 unsigned IncrementSize = NumBits / 8;
9708
9709 // Divide the stored value in two parts.
9710 SDValue ShiftAmount =
9711 DAG.getShiftAmountConstant(Val: NumBits, VT: Val.getValueType(), DL: dl);
9712 SDValue Lo = Val;
9713 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9714 // fold and not use the upper bits. A smaller constant may be easier to
9715 // materialize.
9716 if (auto *C = dyn_cast<ConstantSDNode>(Val&: Lo); C && !C->isOpaque())
9717 Lo = DAG.getNode(
9718 Opcode: ISD::AND, DL: dl, VT, N1: Lo,
9719 N2: DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VT.getSizeInBits(), loBitsSet: NumBits), DL: dl,
9720 VT));
9721 SDValue Hi = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: Val, N2: ShiftAmount);
9722
9723 // Store the two parts
9724 SDValue Store1, Store2;
9725 Store1 = DAG.getTruncStore(Chain, dl,
9726 Val: DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9727 Ptr, PtrInfo: ST->getPointerInfo(), SVT: NewStoredVT, Alignment,
9728 MMOFlags: ST->getMemOperand()->getFlags());
9729
9730 Ptr = DAG.getObjectPtrOffset(SL: dl, Ptr, Offset: TypeSize::getFixed(ExactSize: IncrementSize));
9731 Store2 = DAG.getTruncStore(
9732 Chain, dl, Val: DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9733 PtrInfo: ST->getPointerInfo().getWithOffset(O: IncrementSize), SVT: NewStoredVT, Alignment,
9734 MMOFlags: ST->getMemOperand()->getFlags(), AAInfo: ST->getAAInfo());
9735
9736 SDValue Result =
9737 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9738 return Result;
9739}
9740
9741SDValue
9742TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9743 const SDLoc &DL, EVT DataVT,
9744 SelectionDAG &DAG,
9745 bool IsCompressedMemory) const {
9746 SDValue Increment;
9747 EVT AddrVT = Addr.getValueType();
9748 EVT MaskVT = Mask.getValueType();
9749 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9750 "Incompatible types of Data and Mask");
9751 if (IsCompressedMemory) {
9752 if (DataVT.isScalableVector())
9753 report_fatal_error(
9754 reason: "Cannot currently handle compressed memory with scalable vectors");
9755 // Incrementing the pointer according to number of '1's in the mask.
9756 EVT MaskIntVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: MaskVT.getSizeInBits());
9757 SDValue MaskInIntReg = DAG.getBitcast(VT: MaskIntVT, V: Mask);
9758 if (MaskIntVT.getSizeInBits() < 32) {
9759 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9760 MaskIntVT = MVT::i32;
9761 }
9762
9763 // Count '1's with POPCNT.
9764 Increment = DAG.getNode(Opcode: ISD::CTPOP, DL, VT: MaskIntVT, Operand: MaskInIntReg);
9765 Increment = DAG.getZExtOrTrunc(Op: Increment, DL, VT: AddrVT);
9766 // Scale is an element size in bytes.
9767 SDValue Scale = DAG.getConstant(Val: DataVT.getScalarSizeInBits() / 8, DL,
9768 VT: AddrVT);
9769 Increment = DAG.getNode(Opcode: ISD::MUL, DL, VT: AddrVT, N1: Increment, N2: Scale);
9770 } else if (DataVT.isScalableVector()) {
9771 Increment = DAG.getVScale(DL, VT: AddrVT,
9772 MulImm: APInt(AddrVT.getFixedSizeInBits(),
9773 DataVT.getStoreSize().getKnownMinValue()));
9774 } else
9775 Increment = DAG.getConstant(Val: DataVT.getStoreSize(), DL, VT: AddrVT);
9776
9777 return DAG.getNode(Opcode: ISD::ADD, DL, VT: AddrVT, N1: Addr, N2: Increment);
9778}
9779
9780static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9781 EVT VecVT, const SDLoc &dl,
9782 ElementCount SubEC) {
9783 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9784 "Cannot index a scalable vector within a fixed-width vector");
9785
9786 unsigned NElts = VecVT.getVectorMinNumElements();
9787 unsigned NumSubElts = SubEC.getKnownMinValue();
9788 EVT IdxVT = Idx.getValueType();
9789
9790 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9791 // If this is a constant index and we know the value plus the number of the
9792 // elements in the subvector minus one is less than the minimum number of
9793 // elements then it's safe to return Idx.
9794 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Val&: Idx))
9795 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9796 return Idx;
9797 SDValue VS =
9798 DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getFixedSizeInBits(), NElts));
9799 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9800 SDValue Sub = DAG.getNode(Opcode: SubOpcode, DL: dl, VT: IdxVT, N1: VS,
9801 N2: DAG.getConstant(Val: NumSubElts, DL: dl, VT: IdxVT));
9802 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx, N2: Sub);
9803 }
9804 if (isPowerOf2_32(Value: NElts) && NumSubElts == 1) {
9805 APInt Imm = APInt::getLowBitsSet(numBits: IdxVT.getSizeInBits(), loBitsSet: Log2_32(Value: NElts));
9806 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IdxVT, N1: Idx,
9807 N2: DAG.getConstant(Val: Imm, DL: dl, VT: IdxVT));
9808 }
9809 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9810 return DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT: IdxVT, N1: Idx,
9811 N2: DAG.getConstant(Val: MaxIndex, DL: dl, VT: IdxVT));
9812}
9813
9814SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9815 SDValue VecPtr, EVT VecVT,
9816 SDValue Index) const {
9817 return getVectorSubVecPointer(
9818 DAG, VecPtr, VecVT,
9819 SubVecVT: EVT::getVectorVT(Context&: *DAG.getContext(), VT: VecVT.getVectorElementType(), NumElements: 1),
9820 Index);
9821}
9822
9823SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9824 SDValue VecPtr, EVT VecVT,
9825 EVT SubVecVT,
9826 SDValue Index) const {
9827 SDLoc dl(Index);
9828 // Make sure the index type is big enough to compute in.
9829 Index = DAG.getZExtOrTrunc(Op: Index, DL: dl, VT: VecPtr.getValueType());
9830
9831 EVT EltVT = VecVT.getVectorElementType();
9832
9833 // Calculate the element offset and add it to the pointer.
9834 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9835 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9836 "Converting bits to bytes lost precision");
9837 assert(SubVecVT.getVectorElementType() == EltVT &&
9838 "Sub-vector must be a vector with matching element type");
9839 Index = clampDynamicVectorIndex(DAG, Idx: Index, VecVT, dl,
9840 SubEC: SubVecVT.getVectorElementCount());
9841
9842 EVT IdxVT = Index.getValueType();
9843 if (SubVecVT.isScalableVector())
9844 Index =
9845 DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9846 N2: DAG.getVScale(DL: dl, VT: IdxVT, MulImm: APInt(IdxVT.getSizeInBits(), 1)));
9847
9848 Index = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: IdxVT, N1: Index,
9849 N2: DAG.getConstant(Val: EltSize, DL: dl, VT: IdxVT));
9850 return DAG.getMemBasePlusOffset(Base: VecPtr, Offset: Index, DL: dl);
9851}
9852
9853//===----------------------------------------------------------------------===//
9854// Implementation of Emulated TLS Model
9855//===----------------------------------------------------------------------===//
9856
9857SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9858 SelectionDAG &DAG) const {
9859 // Access to address of TLS varialbe xyz is lowered to a function call:
9860 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9861 EVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
9862 PointerType *VoidPtrType = PointerType::get(C&: *DAG.getContext(), AddressSpace: 0);
9863 SDLoc dl(GA);
9864
9865 ArgListTy Args;
9866 ArgListEntry Entry;
9867 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9868 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9869 StringRef EmuTlsVarName(NameString);
9870 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(Name: EmuTlsVarName);
9871 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9872 Entry.Node = DAG.getGlobalAddress(GV: EmuTlsVar, DL: dl, VT: PtrVT);
9873 Entry.Ty = VoidPtrType;
9874 Args.push_back(x: Entry);
9875
9876 SDValue EmuTlsGetAddr = DAG.getExternalSymbol(Sym: "__emutls_get_address", VT: PtrVT);
9877
9878 TargetLowering::CallLoweringInfo CLI(DAG);
9879 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9880 CLI.setLibCallee(CC: CallingConv::C, ResultType: VoidPtrType, Target: EmuTlsGetAddr, ArgsList: std::move(Args));
9881 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9882
9883 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9884 // At last for X86 targets, maybe good for other targets too?
9885 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9886 MFI.setAdjustsStack(true); // Is this only for X86 target?
9887 MFI.setHasCalls(true);
9888
9889 assert((GA->getOffset() == 0) &&
9890 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9891 return CallResult.first;
9892}
9893
9894SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9895 SelectionDAG &DAG) const {
9896 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9897 if (!isCtlzFast())
9898 return SDValue();
9899 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get();
9900 SDLoc dl(Op);
9901 if (isNullConstant(V: Op.getOperand(i: 1)) && CC == ISD::SETEQ) {
9902 EVT VT = Op.getOperand(i: 0).getValueType();
9903 SDValue Zext = Op.getOperand(i: 0);
9904 if (VT.bitsLT(MVT::i32)) {
9905 VT = MVT::i32;
9906 Zext = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: dl, VT, Operand: Op.getOperand(i: 0));
9907 }
9908 unsigned Log2b = Log2_32(Value: VT.getSizeInBits());
9909 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT, Operand: Zext);
9910 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9911 DAG.getConstant(Log2b, dl, MVT::i32));
9912 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9913 }
9914 return SDValue();
9915}
9916
9917SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
9918 SDValue Op0 = Node->getOperand(Num: 0);
9919 SDValue Op1 = Node->getOperand(Num: 1);
9920 EVT VT = Op0.getValueType();
9921 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
9922 unsigned Opcode = Node->getOpcode();
9923 SDLoc DL(Node);
9924
9925 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9926 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(V: Op1, AllowUndefs: true) && BoolVT == VT &&
9927 getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
9928 Op0 = DAG.getFreeze(V: Op0);
9929 SDValue Zero = DAG.getConstant(Val: 0, DL, VT);
9930 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
9931 N2: DAG.getSetCC(DL, VT, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ));
9932 }
9933
9934 // umin(x,y) -> sub(x,usubsat(x,y))
9935 // TODO: Missing freeze(Op0)?
9936 if (Opcode == ISD::UMIN && isOperationLegal(Op: ISD::SUB, VT) &&
9937 isOperationLegal(Op: ISD::USUBSAT, VT)) {
9938 return DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Op0,
9939 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op0, N2: Op1));
9940 }
9941
9942 // umax(x,y) -> add(x,usubsat(y,x))
9943 // TODO: Missing freeze(Op0)?
9944 if (Opcode == ISD::UMAX && isOperationLegal(Op: ISD::ADD, VT) &&
9945 isOperationLegal(Op: ISD::USUBSAT, VT)) {
9946 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op0,
9947 N2: DAG.getNode(Opcode: ISD::USUBSAT, DL, VT, N1: Op1, N2: Op0));
9948 }
9949
9950 // FIXME: Should really try to split the vector in case it's legal on a
9951 // subvector.
9952 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
9953 return DAG.UnrollVectorOp(N: Node);
9954
9955 // Attempt to find an existing SETCC node that we can reuse.
9956 // TODO: Do we need a generic doesSETCCNodeExist?
9957 // TODO: Missing freeze(Op0)/freeze(Op1)?
9958 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
9959 ISD::CondCode PrefCommuteCC,
9960 ISD::CondCode AltCommuteCC) {
9961 SDVTList BoolVTList = DAG.getVTList(VT: BoolVT);
9962 for (ISD::CondCode CC : {PrefCC, AltCC}) {
9963 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
9964 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
9965 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
9966 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
9967 }
9968 }
9969 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
9970 if (DAG.doesNodeExist(Opcode: ISD::SETCC, VTList: BoolVTList,
9971 Ops: {Op0, Op1, DAG.getCondCode(Cond: CC)})) {
9972 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: CC);
9973 return DAG.getSelect(DL, VT, Cond, LHS: Op1, RHS: Op0);
9974 }
9975 }
9976 SDValue Cond = DAG.getSetCC(DL, VT: BoolVT, LHS: Op0, RHS: Op1, Cond: PrefCC);
9977 return DAG.getSelect(DL, VT, Cond, LHS: Op0, RHS: Op1);
9978 };
9979
9980 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
9981 // -> Y = (A < B) ? B : A
9982 // -> Y = (A >= B) ? A : B
9983 // -> Y = (A <= B) ? B : A
9984 switch (Opcode) {
9985 case ISD::SMAX:
9986 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
9987 case ISD::SMIN:
9988 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
9989 case ISD::UMAX:
9990 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
9991 case ISD::UMIN:
9992 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
9993 }
9994
9995 llvm_unreachable("How did we get here?");
9996}
9997
9998SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
9999 unsigned Opcode = Node->getOpcode();
10000 SDValue LHS = Node->getOperand(Num: 0);
10001 SDValue RHS = Node->getOperand(Num: 1);
10002 EVT VT = LHS.getValueType();
10003 SDLoc dl(Node);
10004
10005 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10006 assert(VT.isInteger() && "Expected operands to be integers");
10007
10008 // usub.sat(a, b) -> umax(a, b) - b
10009 if (Opcode == ISD::USUBSAT && isOperationLegal(Op: ISD::UMAX, VT)) {
10010 SDValue Max = DAG.getNode(Opcode: ISD::UMAX, DL: dl, VT, N1: LHS, N2: RHS);
10011 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Max, N2: RHS);
10012 }
10013
10014 // uadd.sat(a, b) -> umin(a, ~b) + b
10015 if (Opcode == ISD::UADDSAT && isOperationLegal(Op: ISD::UMIN, VT)) {
10016 SDValue InvRHS = DAG.getNOT(DL: dl, Val: RHS, VT);
10017 SDValue Min = DAG.getNode(Opcode: ISD::UMIN, DL: dl, VT, N1: LHS, N2: InvRHS);
10018 return DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: Min, N2: RHS);
10019 }
10020
10021 unsigned OverflowOp;
10022 switch (Opcode) {
10023 case ISD::SADDSAT:
10024 OverflowOp = ISD::SADDO;
10025 break;
10026 case ISD::UADDSAT:
10027 OverflowOp = ISD::UADDO;
10028 break;
10029 case ISD::SSUBSAT:
10030 OverflowOp = ISD::SSUBO;
10031 break;
10032 case ISD::USUBSAT:
10033 OverflowOp = ISD::USUBO;
10034 break;
10035 default:
10036 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10037 "addition or subtraction node.");
10038 }
10039
10040 // FIXME: Should really try to split the vector in case it's legal on a
10041 // subvector.
10042 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10043 return DAG.UnrollVectorOp(N: Node);
10044
10045 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10046 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10047 SDValue Result = DAG.getNode(Opcode: OverflowOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10048 SDValue SumDiff = Result.getValue(R: 0);
10049 SDValue Overflow = Result.getValue(R: 1);
10050 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10051 SDValue AllOnes = DAG.getAllOnesConstant(DL: dl, VT);
10052
10053 if (Opcode == ISD::UADDSAT) {
10054 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10055 // (LHS + RHS) | OverflowMask
10056 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10057 return DAG.getNode(Opcode: ISD::OR, DL: dl, VT, N1: SumDiff, N2: OverflowMask);
10058 }
10059 // Overflow ? 0xffff.... : (LHS + RHS)
10060 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: AllOnes, RHS: SumDiff);
10061 }
10062
10063 if (Opcode == ISD::USUBSAT) {
10064 if (getBooleanContents(Type: VT) == ZeroOrNegativeOneBooleanContent) {
10065 // (LHS - RHS) & ~OverflowMask
10066 SDValue OverflowMask = DAG.getSExtOrTrunc(Op: Overflow, DL: dl, VT);
10067 SDValue Not = DAG.getNOT(DL: dl, Val: OverflowMask, VT);
10068 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: SumDiff, N2: Not);
10069 }
10070 // Overflow ? 0 : (LHS - RHS)
10071 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Zero, RHS: SumDiff);
10072 }
10073
10074 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10075 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10076 APInt MaxVal = APInt::getSignedMaxValue(numBits: BitWidth);
10077
10078 KnownBits KnownLHS = DAG.computeKnownBits(Op: LHS);
10079 KnownBits KnownRHS = DAG.computeKnownBits(Op: RHS);
10080
10081 // If either of the operand signs are known, then they are guaranteed to
10082 // only saturate in one direction. If non-negative they will saturate
10083 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10084 //
10085 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10086 // sign of 'y' has to be flipped.
10087
10088 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10089 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10090 : KnownRHS.isNegative();
10091 if (LHSIsNonNegative || RHSIsNonNegative) {
10092 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10093 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: SumDiff);
10094 }
10095
10096 bool LHSIsNegative = KnownLHS.isNegative();
10097 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10098 : KnownRHS.isNonNegative();
10099 if (LHSIsNegative || RHSIsNegative) {
10100 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10101 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMin, RHS: SumDiff);
10102 }
10103 }
10104
10105 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10106 APInt MinVal = APInt::getSignedMinValue(numBits: BitWidth);
10107 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10108 SDValue Shift = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: SumDiff,
10109 N2: DAG.getConstant(Val: BitWidth - 1, DL: dl, VT));
10110 Result = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: Shift, N2: SatMin);
10111 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: SumDiff);
10112}
10113
10114SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10115 unsigned Opcode = Node->getOpcode();
10116 bool IsSigned = Opcode == ISD::SSHLSAT;
10117 SDValue LHS = Node->getOperand(Num: 0);
10118 SDValue RHS = Node->getOperand(Num: 1);
10119 EVT VT = LHS.getValueType();
10120 SDLoc dl(Node);
10121
10122 assert((Node->getOpcode() == ISD::SSHLSAT ||
10123 Node->getOpcode() == ISD::USHLSAT) &&
10124 "Expected a SHLSAT opcode");
10125 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10126 assert(VT.isInteger() && "Expected operands to be integers");
10127
10128 if (VT.isVector() && !isOperationLegalOrCustom(Op: ISD::VSELECT, VT))
10129 return DAG.UnrollVectorOp(N: Node);
10130
10131 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10132
10133 unsigned BW = VT.getScalarSizeInBits();
10134 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10135 SDValue Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: RHS);
10136 SDValue Orig =
10137 DAG.getNode(Opcode: IsSigned ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: Result, N2: RHS);
10138
10139 SDValue SatVal;
10140 if (IsSigned) {
10141 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: BW), DL: dl, VT);
10142 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: BW), DL: dl, VT);
10143 SDValue Cond =
10144 DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETLT);
10145 SatVal = DAG.getSelect(DL: dl, VT, Cond, LHS: SatMin, RHS: SatMax);
10146 } else {
10147 SatVal = DAG.getConstant(Val: APInt::getMaxValue(numBits: BW), DL: dl, VT);
10148 }
10149 SDValue Cond = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Orig, Cond: ISD::SETNE);
10150 return DAG.getSelect(DL: dl, VT, Cond, LHS: SatVal, RHS: Result);
10151}
10152
10153void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10154 bool Signed, EVT WideVT,
10155 const SDValue LL, const SDValue LH,
10156 const SDValue RL, const SDValue RH,
10157 SDValue &Lo, SDValue &Hi) const {
10158 // We can fall back to a libcall with an illegal type for the MUL if we
10159 // have a libcall big enough.
10160 // Also, we can fall back to a division in some cases, but that's a big
10161 // performance hit in the general case.
10162 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10163 if (WideVT == MVT::i16)
10164 LC = RTLIB::MUL_I16;
10165 else if (WideVT == MVT::i32)
10166 LC = RTLIB::MUL_I32;
10167 else if (WideVT == MVT::i64)
10168 LC = RTLIB::MUL_I64;
10169 else if (WideVT == MVT::i128)
10170 LC = RTLIB::MUL_I128;
10171
10172 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(Call: LC)) {
10173 // We'll expand the multiplication by brute force because we have no other
10174 // options. This is a trivially-generalized version of the code from
10175 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10176 // 4.3.1).
10177 EVT VT = LL.getValueType();
10178 unsigned Bits = VT.getSizeInBits();
10179 unsigned HalfBits = Bits >> 1;
10180 SDValue Mask =
10181 DAG.getConstant(Val: APInt::getLowBitsSet(numBits: Bits, loBitsSet: HalfBits), DL: dl, VT);
10182 SDValue LLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: LL, N2: Mask);
10183 SDValue RLL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: RL, N2: Mask);
10184
10185 SDValue T = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLL);
10186 SDValue TL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: T, N2: Mask);
10187
10188 SDValue Shift = DAG.getShiftAmountConstant(Val: HalfBits, VT, DL: dl);
10189 SDValue TH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: T, N2: Shift);
10190 SDValue LLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: LL, N2: Shift);
10191 SDValue RLH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: RL, N2: Shift);
10192
10193 SDValue U = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10194 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLL), N2: TH);
10195 SDValue UL = DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: U, N2: Mask);
10196 SDValue UH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: U, N2: Shift);
10197
10198 SDValue V = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10199 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLL, N2: RLH), N2: UL);
10200 SDValue VH = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT, N1: V, N2: Shift);
10201
10202 SDValue W =
10203 DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LLH, N2: RLH),
10204 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: UH, N2: VH));
10205 Lo = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: TL,
10206 N2: DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: V, N2: Shift));
10207
10208 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT, N1: W,
10209 N2: DAG.getNode(Opcode: ISD::ADD, DL: dl, VT,
10210 N1: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RH, N2: LL),
10211 N2: DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: RL, N2: LH)));
10212 } else {
10213 // Attempt a libcall.
10214 SDValue Ret;
10215 TargetLowering::MakeLibCallOptions CallOptions;
10216 CallOptions.setSExt(Signed);
10217 CallOptions.setIsPostTypeLegalization(true);
10218 if (shouldSplitFunctionArgumentsAsLittleEndian(DL: DAG.getDataLayout())) {
10219 // Halves of WideVT are packed into registers in different order
10220 // depending on platform endianness. This is usually handled by
10221 // the C calling convention, but we can't defer to it in
10222 // the legalizer.
10223 SDValue Args[] = {LL, LH, RL, RH};
10224 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10225 } else {
10226 SDValue Args[] = {LH, LL, RH, RL};
10227 Ret = makeLibCall(DAG, LC, RetVT: WideVT, Ops: Args, CallOptions, dl).first;
10228 }
10229 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10230 "Ret value is a collection of constituent nodes holding result.");
10231 if (DAG.getDataLayout().isLittleEndian()) {
10232 // Same as above.
10233 Lo = Ret.getOperand(i: 0);
10234 Hi = Ret.getOperand(i: 1);
10235 } else {
10236 Lo = Ret.getOperand(i: 1);
10237 Hi = Ret.getOperand(i: 0);
10238 }
10239 }
10240}
10241
10242void TargetLowering::forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl,
10243 bool Signed, const SDValue LHS,
10244 const SDValue RHS, SDValue &Lo,
10245 SDValue &Hi) const {
10246 EVT VT = LHS.getValueType();
10247 assert(RHS.getValueType() == VT && "Mismatching operand types");
10248
10249 SDValue HiLHS;
10250 SDValue HiRHS;
10251 if (Signed) {
10252 // The high part is obtained by SRA'ing all but one of the bits of low
10253 // part.
10254 unsigned LoSize = VT.getFixedSizeInBits();
10255 HiLHS = DAG.getNode(
10256 Opcode: ISD::SRA, DL: dl, VT, N1: LHS,
10257 N2: DAG.getConstant(Val: LoSize - 1, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10258 HiRHS = DAG.getNode(
10259 Opcode: ISD::SRA, DL: dl, VT, N1: RHS,
10260 N2: DAG.getConstant(Val: LoSize - 1, DL: dl, VT: getPointerTy(DL: DAG.getDataLayout())));
10261 } else {
10262 HiLHS = DAG.getConstant(Val: 0, DL: dl, VT);
10263 HiRHS = DAG.getConstant(Val: 0, DL: dl, VT);
10264 }
10265 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getSizeInBits() * 2);
10266 forceExpandWideMUL(DAG, dl, Signed, WideVT, LL: LHS, LH: HiLHS, RL: RHS, RH: HiRHS, Lo, Hi);
10267}
10268
10269SDValue
10270TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10271 assert((Node->getOpcode() == ISD::SMULFIX ||
10272 Node->getOpcode() == ISD::UMULFIX ||
10273 Node->getOpcode() == ISD::SMULFIXSAT ||
10274 Node->getOpcode() == ISD::UMULFIXSAT) &&
10275 "Expected a fixed point multiplication opcode");
10276
10277 SDLoc dl(Node);
10278 SDValue LHS = Node->getOperand(Num: 0);
10279 SDValue RHS = Node->getOperand(Num: 1);
10280 EVT VT = LHS.getValueType();
10281 unsigned Scale = Node->getConstantOperandVal(Num: 2);
10282 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10283 Node->getOpcode() == ISD::UMULFIXSAT);
10284 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10285 Node->getOpcode() == ISD::SMULFIXSAT);
10286 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10287 unsigned VTSize = VT.getScalarSizeInBits();
10288
10289 if (!Scale) {
10290 // [us]mul.fix(a, b, 0) -> mul(a, b)
10291 if (!Saturating) {
10292 if (isOperationLegalOrCustom(Op: ISD::MUL, VT))
10293 return DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10294 } else if (Signed && isOperationLegalOrCustom(Op: ISD::SMULO, VT)) {
10295 SDValue Result =
10296 DAG.getNode(Opcode: ISD::SMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10297 SDValue Product = Result.getValue(R: 0);
10298 SDValue Overflow = Result.getValue(R: 1);
10299 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10300
10301 APInt MinVal = APInt::getSignedMinValue(numBits: VTSize);
10302 APInt MaxVal = APInt::getSignedMaxValue(numBits: VTSize);
10303 SDValue SatMin = DAG.getConstant(Val: MinVal, DL: dl, VT);
10304 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10305 // Xor the inputs, if resulting sign bit is 0 the product will be
10306 // positive, else negative.
10307 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT, N1: LHS, N2: RHS);
10308 SDValue ProdNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Xor, RHS: Zero, Cond: ISD::SETLT);
10309 Result = DAG.getSelect(DL: dl, VT, Cond: ProdNeg, LHS: SatMin, RHS: SatMax);
10310 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: Result, RHS: Product);
10311 } else if (!Signed && isOperationLegalOrCustom(Op: ISD::UMULO, VT)) {
10312 SDValue Result =
10313 DAG.getNode(Opcode: ISD::UMULO, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: BoolVT), N1: LHS, N2: RHS);
10314 SDValue Product = Result.getValue(R: 0);
10315 SDValue Overflow = Result.getValue(R: 1);
10316
10317 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10318 SDValue SatMax = DAG.getConstant(Val: MaxVal, DL: dl, VT);
10319 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: SatMax, RHS: Product);
10320 }
10321 }
10322
10323 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10324 "Expected scale to be less than the number of bits if signed or at "
10325 "most the number of bits if unsigned.");
10326 assert(LHS.getValueType() == RHS.getValueType() &&
10327 "Expected both operands to be the same type");
10328
10329 // Get the upper and lower bits of the result.
10330 SDValue Lo, Hi;
10331 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10332 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10333 if (isOperationLegalOrCustom(Op: LoHiOp, VT)) {
10334 SDValue Result = DAG.getNode(Opcode: LoHiOp, DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS, N2: RHS);
10335 Lo = Result.getValue(R: 0);
10336 Hi = Result.getValue(R: 1);
10337 } else if (isOperationLegalOrCustom(Op: HiOp, VT)) {
10338 Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10339 Hi = DAG.getNode(Opcode: HiOp, DL: dl, VT, N1: LHS, N2: RHS);
10340 } else if (VT.isVector()) {
10341 return SDValue();
10342 } else {
10343 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10344 }
10345
10346 if (Scale == VTSize)
10347 // Result is just the top half since we'd be shifting by the width of the
10348 // operand. Overflow impossible so this works for both UMULFIX and
10349 // UMULFIXSAT.
10350 return Hi;
10351
10352 // The result will need to be shifted right by the scale since both operands
10353 // are scaled. The result is given to us in 2 halves, so we only want part of
10354 // both in the result.
10355 SDValue Result = DAG.getNode(Opcode: ISD::FSHR, DL: dl, VT, N1: Hi, N2: Lo,
10356 N3: DAG.getShiftAmountConstant(Val: Scale, VT, DL: dl));
10357 if (!Saturating)
10358 return Result;
10359
10360 if (!Signed) {
10361 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10362 // widened multiplication) aren't all zeroes.
10363
10364 // Saturate to max if ((Hi >> Scale) != 0),
10365 // which is the same as if (Hi > ((1 << Scale) - 1))
10366 APInt MaxVal = APInt::getMaxValue(numBits: VTSize);
10367 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale),
10368 DL: dl, VT);
10369 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask,
10370 True: DAG.getConstant(Val: MaxVal, DL: dl, VT), False: Result,
10371 Cond: ISD::SETUGT);
10372
10373 return Result;
10374 }
10375
10376 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10377 // widened multiplication) aren't all ones or all zeroes.
10378
10379 SDValue SatMin = DAG.getConstant(Val: APInt::getSignedMinValue(numBits: VTSize), DL: dl, VT);
10380 SDValue SatMax = DAG.getConstant(Val: APInt::getSignedMaxValue(numBits: VTSize), DL: dl, VT);
10381
10382 if (Scale == 0) {
10383 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: Lo,
10384 N2: DAG.getShiftAmountConstant(Val: VTSize - 1, VT, DL: dl));
10385 SDValue Overflow = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Hi, RHS: Sign, Cond: ISD::SETNE);
10386 // Saturated to SatMin if wide product is negative, and SatMax if wide
10387 // product is positive ...
10388 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10389 SDValue ResultIfOverflow = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: Zero, True: SatMin, False: SatMax,
10390 Cond: ISD::SETLT);
10391 // ... but only if we overflowed.
10392 return DAG.getSelect(DL: dl, VT, Cond: Overflow, LHS: ResultIfOverflow, RHS: Result);
10393 }
10394
10395 // We handled Scale==0 above so all the bits to examine is in Hi.
10396
10397 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10398 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10399 SDValue LowMask = DAG.getConstant(Val: APInt::getLowBitsSet(numBits: VTSize, loBitsSet: Scale - 1),
10400 DL: dl, VT);
10401 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: LowMask, True: SatMax, False: Result, Cond: ISD::SETGT);
10402 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10403 // which is the same as if (HI < (-1 << (Scale - 1))
10404 SDValue HighMask =
10405 DAG.getConstant(Val: APInt::getHighBitsSet(numBits: VTSize, hiBitsSet: VTSize - Scale + 1),
10406 DL: dl, VT);
10407 Result = DAG.getSelectCC(DL: dl, LHS: Hi, RHS: HighMask, True: SatMin, False: Result, Cond: ISD::SETLT);
10408 return Result;
10409}
10410
10411SDValue
10412TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10413 SDValue LHS, SDValue RHS,
10414 unsigned Scale, SelectionDAG &DAG) const {
10415 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10416 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10417 "Expected a fixed point division opcode");
10418
10419 EVT VT = LHS.getValueType();
10420 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10421 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10422 EVT BoolVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10423
10424 // If there is enough room in the type to upscale the LHS or downscale the
10425 // RHS before the division, we can perform it in this type without having to
10426 // resize. For signed operations, the LHS headroom is the number of
10427 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10428 // The headroom for the RHS is the number of trailing zeroes.
10429 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(Op: LHS) - 1
10430 : DAG.computeKnownBits(Op: LHS).countMinLeadingZeros();
10431 unsigned RHSTrail = DAG.computeKnownBits(Op: RHS).countMinTrailingZeros();
10432
10433 // For signed saturating operations, we need to be able to detect true integer
10434 // division overflow; that is, when you have MIN / -EPS. However, this
10435 // is undefined behavior and if we emit divisions that could take such
10436 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10437 // example).
10438 // Avoid this by requiring an extra bit so that we never get this case.
10439 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10440 // signed saturating division, we need to emit a whopping 32-bit division.
10441 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10442 return SDValue();
10443
10444 unsigned LHSShift = std::min(a: LHSLead, b: Scale);
10445 unsigned RHSShift = Scale - LHSShift;
10446
10447 // At this point, we know that if we shift the LHS up by LHSShift and the
10448 // RHS down by RHSShift, we can emit a regular division with a final scaling
10449 // factor of Scale.
10450
10451 if (LHSShift)
10452 LHS = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS,
10453 N2: DAG.getShiftAmountConstant(Val: LHSShift, VT, DL: dl));
10454 if (RHSShift)
10455 RHS = DAG.getNode(Opcode: Signed ? ISD::SRA : ISD::SRL, DL: dl, VT, N1: RHS,
10456 N2: DAG.getShiftAmountConstant(Val: RHSShift, VT, DL: dl));
10457
10458 SDValue Quot;
10459 if (Signed) {
10460 // For signed operations, if the resulting quotient is negative and the
10461 // remainder is nonzero, subtract 1 from the quotient to round towards
10462 // negative infinity.
10463 SDValue Rem;
10464 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10465 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10466 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10467 if (isTypeLegal(VT) &&
10468 isOperationLegalOrCustom(Op: ISD::SDIVREM, VT)) {
10469 Quot = DAG.getNode(Opcode: ISD::SDIVREM, DL: dl,
10470 VTList: DAG.getVTList(VT1: VT, VT2: VT),
10471 N1: LHS, N2: RHS);
10472 Rem = Quot.getValue(R: 1);
10473 Quot = Quot.getValue(R: 0);
10474 } else {
10475 Quot = DAG.getNode(Opcode: ISD::SDIV, DL: dl, VT,
10476 N1: LHS, N2: RHS);
10477 Rem = DAG.getNode(Opcode: ISD::SREM, DL: dl, VT,
10478 N1: LHS, N2: RHS);
10479 }
10480 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT);
10481 SDValue RemNonZero = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: Rem, RHS: Zero, Cond: ISD::SETNE);
10482 SDValue LHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS, RHS: Zero, Cond: ISD::SETLT);
10483 SDValue RHSNeg = DAG.getSetCC(DL: dl, VT: BoolVT, LHS: RHS, RHS: Zero, Cond: ISD::SETLT);
10484 SDValue QuotNeg = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: BoolVT, N1: LHSNeg, N2: RHSNeg);
10485 SDValue Sub1 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Quot,
10486 N2: DAG.getConstant(Val: 1, DL: dl, VT));
10487 Quot = DAG.getSelect(DL: dl, VT,
10488 Cond: DAG.getNode(Opcode: ISD::AND, DL: dl, VT: BoolVT, N1: RemNonZero, N2: QuotNeg),
10489 LHS: Sub1, RHS: Quot);
10490 } else
10491 Quot = DAG.getNode(Opcode: ISD::UDIV, DL: dl, VT,
10492 N1: LHS, N2: RHS);
10493
10494 return Quot;
10495}
10496
10497void TargetLowering::expandUADDSUBO(
10498 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10499 SDLoc dl(Node);
10500 SDValue LHS = Node->getOperand(Num: 0);
10501 SDValue RHS = Node->getOperand(Num: 1);
10502 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10503
10504 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10505 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10506 if (isOperationLegalOrCustom(Op: OpcCarry, VT: Node->getValueType(ResNo: 0))) {
10507 SDValue CarryIn = DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 1));
10508 SDValue NodeCarry = DAG.getNode(Opcode: OpcCarry, DL: dl, VTList: Node->getVTList(),
10509 Ops: { LHS, RHS, CarryIn });
10510 Result = SDValue(NodeCarry.getNode(), 0);
10511 Overflow = SDValue(NodeCarry.getNode(), 1);
10512 return;
10513 }
10514
10515 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10516 VT: LHS.getValueType(), N1: LHS, N2: RHS);
10517
10518 EVT ResultType = Node->getValueType(ResNo: 1);
10519 EVT SetCCType = getSetCCResultType(
10520 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
10521 SDValue SetCC;
10522 if (IsAdd && isOneConstant(V: RHS)) {
10523 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10524 // the live range of X. We assume comparing with 0 is cheap.
10525 // The general case (X + C) < C is not necessarily beneficial. Although we
10526 // reduce the live range of X, we may introduce the materialization of
10527 // constant C.
10528 SetCC =
10529 DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result,
10530 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETEQ);
10531 } else if (IsAdd && isAllOnesConstant(V: RHS)) {
10532 // Special case: uaddo X, -1 overflows if X != 0.
10533 SetCC =
10534 DAG.getSetCC(DL: dl, VT: SetCCType, LHS,
10535 RHS: DAG.getConstant(Val: 0, DL: dl, VT: Node->getValueType(ResNo: 0)), Cond: ISD::SETNE);
10536 } else {
10537 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10538 SetCC = DAG.getSetCC(DL: dl, VT: SetCCType, LHS: Result, RHS: LHS, Cond: CC);
10539 }
10540 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10541}
10542
10543void TargetLowering::expandSADDSUBO(
10544 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10545 SDLoc dl(Node);
10546 SDValue LHS = Node->getOperand(Num: 0);
10547 SDValue RHS = Node->getOperand(Num: 1);
10548 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10549
10550 Result = DAG.getNode(Opcode: IsAdd ? ISD::ADD : ISD::SUB, DL: dl,
10551 VT: LHS.getValueType(), N1: LHS, N2: RHS);
10552
10553 EVT ResultType = Node->getValueType(ResNo: 1);
10554 EVT OType = getSetCCResultType(
10555 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
10556
10557 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10558 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10559 if (isOperationLegal(Op: OpcSat, VT: LHS.getValueType())) {
10560 SDValue Sat = DAG.getNode(Opcode: OpcSat, DL: dl, VT: LHS.getValueType(), N1: LHS, N2: RHS);
10561 SDValue SetCC = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: Sat, Cond: ISD::SETNE);
10562 Overflow = DAG.getBoolExtOrTrunc(Op: SetCC, SL: dl, VT: ResultType, OpVT: ResultType);
10563 return;
10564 }
10565
10566 SDValue Zero = DAG.getConstant(Val: 0, DL: dl, VT: LHS.getValueType());
10567
10568 // For an addition, the result should be less than one of the operands (LHS)
10569 // if and only if the other operand (RHS) is negative, otherwise there will
10570 // be overflow.
10571 // For a subtraction, the result should be less than one of the operands
10572 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10573 // otherwise there will be overflow.
10574 SDValue ResultLowerThanLHS = DAG.getSetCC(DL: dl, VT: OType, LHS: Result, RHS: LHS, Cond: ISD::SETLT);
10575 SDValue ConditionRHS =
10576 DAG.getSetCC(DL: dl, VT: OType, LHS: RHS, RHS: Zero, Cond: IsAdd ? ISD::SETLT : ISD::SETGT);
10577
10578 Overflow = DAG.getBoolExtOrTrunc(
10579 Op: DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: OType, N1: ConditionRHS, N2: ResultLowerThanLHS), SL: dl,
10580 VT: ResultType, OpVT: ResultType);
10581}
10582
10583bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10584 SDValue &Overflow, SelectionDAG &DAG) const {
10585 SDLoc dl(Node);
10586 EVT VT = Node->getValueType(ResNo: 0);
10587 EVT SetCCVT = getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT);
10588 SDValue LHS = Node->getOperand(Num: 0);
10589 SDValue RHS = Node->getOperand(Num: 1);
10590 bool isSigned = Node->getOpcode() == ISD::SMULO;
10591
10592 // For power-of-two multiplications we can use a simpler shift expansion.
10593 if (ConstantSDNode *RHSC = isConstOrConstSplat(N: RHS)) {
10594 const APInt &C = RHSC->getAPIntValue();
10595 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10596 if (C.isPowerOf2()) {
10597 // smulo(x, signed_min) is same as umulo(x, signed_min).
10598 bool UseArithShift = isSigned && !C.isMinSignedValue();
10599 SDValue ShiftAmt = DAG.getShiftAmountConstant(Val: C.logBase2(), VT, DL: dl);
10600 Result = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT, N1: LHS, N2: ShiftAmt);
10601 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT,
10602 LHS: DAG.getNode(Opcode: UseArithShift ? ISD::SRA : ISD::SRL,
10603 DL: dl, VT, N1: Result, N2: ShiftAmt),
10604 RHS: LHS, Cond: ISD::SETNE);
10605 return true;
10606 }
10607 }
10608
10609 EVT WideVT = EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: VT.getScalarSizeInBits() * 2);
10610 if (VT.isVector())
10611 WideVT =
10612 EVT::getVectorVT(Context&: *DAG.getContext(), VT: WideVT, EC: VT.getVectorElementCount());
10613
10614 SDValue BottomHalf;
10615 SDValue TopHalf;
10616 static const unsigned Ops[2][3] =
10617 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10618 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10619 if (isOperationLegalOrCustom(Op: Ops[isSigned][0], VT)) {
10620 BottomHalf = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: LHS, N2: RHS);
10621 TopHalf = DAG.getNode(Opcode: Ops[isSigned][0], DL: dl, VT, N1: LHS, N2: RHS);
10622 } else if (isOperationLegalOrCustom(Op: Ops[isSigned][1], VT)) {
10623 BottomHalf = DAG.getNode(Opcode: Ops[isSigned][1], DL: dl, VTList: DAG.getVTList(VT1: VT, VT2: VT), N1: LHS,
10624 N2: RHS);
10625 TopHalf = BottomHalf.getValue(R: 1);
10626 } else if (isTypeLegal(VT: WideVT)) {
10627 LHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: LHS);
10628 RHS = DAG.getNode(Opcode: Ops[isSigned][2], DL: dl, VT: WideVT, Operand: RHS);
10629 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: WideVT, N1: LHS, N2: RHS);
10630 BottomHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Mul);
10631 SDValue ShiftAmt =
10632 DAG.getShiftAmountConstant(Val: VT.getScalarSizeInBits(), VT: WideVT, DL: dl);
10633 TopHalf = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT,
10634 Operand: DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: WideVT, N1: Mul, N2: ShiftAmt));
10635 } else {
10636 if (VT.isVector())
10637 return false;
10638
10639 forceExpandWideMUL(DAG, dl, Signed: isSigned, LHS, RHS, Lo&: BottomHalf, Hi&: TopHalf);
10640 }
10641
10642 Result = BottomHalf;
10643 if (isSigned) {
10644 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10645 Val: VT.getScalarSizeInBits() - 1, VT: BottomHalf.getValueType(), DL: dl);
10646 SDValue Sign = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT, N1: BottomHalf, N2: ShiftAmt);
10647 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf, RHS: Sign, Cond: ISD::SETNE);
10648 } else {
10649 Overflow = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: TopHalf,
10650 RHS: DAG.getConstant(Val: 0, DL: dl, VT), Cond: ISD::SETNE);
10651 }
10652
10653 // Truncate the result if SetCC returns a larger type than needed.
10654 EVT RType = Node->getValueType(ResNo: 1);
10655 if (RType.bitsLT(VT: Overflow.getValueType()))
10656 Overflow = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: RType, Operand: Overflow);
10657
10658 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10659 "Unexpected result type for S/UMULO legalization");
10660 return true;
10661}
10662
10663SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
10664 SDLoc dl(Node);
10665 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10666 SDValue Op = Node->getOperand(Num: 0);
10667 EVT VT = Op.getValueType();
10668
10669 if (VT.isScalableVector())
10670 report_fatal_error(
10671 reason: "Expanding reductions for scalable vectors is undefined.");
10672
10673 // Try to use a shuffle reduction for power of two vectors.
10674 if (VT.isPow2VectorType()) {
10675 while (VT.getVectorNumElements() > 1) {
10676 EVT HalfVT = VT.getHalfNumVectorElementsVT(Context&: *DAG.getContext());
10677 if (!isOperationLegalOrCustom(Op: BaseOpcode, VT: HalfVT))
10678 break;
10679
10680 SDValue Lo, Hi;
10681 std::tie(args&: Lo, args&: Hi) = DAG.SplitVector(N: Op, DL: dl);
10682 Op = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: HalfVT, N1: Lo, N2: Hi);
10683 VT = HalfVT;
10684 }
10685 }
10686
10687 EVT EltVT = VT.getVectorElementType();
10688 unsigned NumElts = VT.getVectorNumElements();
10689
10690 SmallVector<SDValue, 8> Ops;
10691 DAG.ExtractVectorElements(Op, Args&: Ops, Start: 0, Count: NumElts);
10692
10693 SDValue Res = Ops[0];
10694 for (unsigned i = 1; i < NumElts; i++)
10695 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags: Node->getFlags());
10696
10697 // Result type may be wider than element type.
10698 if (EltVT != Node->getValueType(ResNo: 0))
10699 Res = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: Node->getValueType(ResNo: 0), Operand: Res);
10700 return Res;
10701}
10702
10703SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
10704 SDLoc dl(Node);
10705 SDValue AccOp = Node->getOperand(Num: 0);
10706 SDValue VecOp = Node->getOperand(Num: 1);
10707 SDNodeFlags Flags = Node->getFlags();
10708
10709 EVT VT = VecOp.getValueType();
10710 EVT EltVT = VT.getVectorElementType();
10711
10712 if (VT.isScalableVector())
10713 report_fatal_error(
10714 reason: "Expanding reductions for scalable vectors is undefined.");
10715
10716 unsigned NumElts = VT.getVectorNumElements();
10717
10718 SmallVector<SDValue, 8> Ops;
10719 DAG.ExtractVectorElements(Op: VecOp, Args&: Ops, Start: 0, Count: NumElts);
10720
10721 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(VecReduceOpcode: Node->getOpcode());
10722
10723 SDValue Res = AccOp;
10724 for (unsigned i = 0; i < NumElts; i++)
10725 Res = DAG.getNode(Opcode: BaseOpcode, DL: dl, VT: EltVT, N1: Res, N2: Ops[i], Flags);
10726
10727 return Res;
10728}
10729
10730bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10731 SelectionDAG &DAG) const {
10732 EVT VT = Node->getValueType(ResNo: 0);
10733 SDLoc dl(Node);
10734 bool isSigned = Node->getOpcode() == ISD::SREM;
10735 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10736 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10737 SDValue Dividend = Node->getOperand(Num: 0);
10738 SDValue Divisor = Node->getOperand(Num: 1);
10739 if (isOperationLegalOrCustom(Op: DivRemOpc, VT)) {
10740 SDVTList VTs = DAG.getVTList(VT1: VT, VT2: VT);
10741 Result = DAG.getNode(Opcode: DivRemOpc, DL: dl, VTList: VTs, N1: Dividend, N2: Divisor).getValue(R: 1);
10742 return true;
10743 }
10744 if (isOperationLegalOrCustom(Op: DivOpc, VT)) {
10745 // X % Y -> X-X/Y*Y
10746 SDValue Divide = DAG.getNode(Opcode: DivOpc, DL: dl, VT, N1: Dividend, N2: Divisor);
10747 SDValue Mul = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT, N1: Divide, N2: Divisor);
10748 Result = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT, N1: Dividend, N2: Mul);
10749 return true;
10750 }
10751 return false;
10752}
10753
10754SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10755 SelectionDAG &DAG) const {
10756 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10757 SDLoc dl(SDValue(Node, 0));
10758 SDValue Src = Node->getOperand(Num: 0);
10759
10760 // DstVT is the result type, while SatVT is the size to which we saturate
10761 EVT SrcVT = Src.getValueType();
10762 EVT DstVT = Node->getValueType(ResNo: 0);
10763
10764 EVT SatVT = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
10765 unsigned SatWidth = SatVT.getScalarSizeInBits();
10766 unsigned DstWidth = DstVT.getScalarSizeInBits();
10767 assert(SatWidth <= DstWidth &&
10768 "Expected saturation width smaller than result width");
10769
10770 // Determine minimum and maximum integer values and their corresponding
10771 // floating-point values.
10772 APInt MinInt, MaxInt;
10773 if (IsSigned) {
10774 MinInt = APInt::getSignedMinValue(numBits: SatWidth).sext(width: DstWidth);
10775 MaxInt = APInt::getSignedMaxValue(numBits: SatWidth).sext(width: DstWidth);
10776 } else {
10777 MinInt = APInt::getMinValue(numBits: SatWidth).zext(width: DstWidth);
10778 MaxInt = APInt::getMaxValue(numBits: SatWidth).zext(width: DstWidth);
10779 }
10780
10781 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10782 // libcall emission cannot handle this. Large result types will fail.
10783 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10784 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10785 SrcVT = Src.getValueType();
10786 }
10787
10788 APFloat MinFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10789 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(VT: SrcVT));
10790
10791 APFloat::opStatus MinStatus =
10792 MinFloat.convertFromAPInt(Input: MinInt, IsSigned, RM: APFloat::rmTowardZero);
10793 APFloat::opStatus MaxStatus =
10794 MaxFloat.convertFromAPInt(Input: MaxInt, IsSigned, RM: APFloat::rmTowardZero);
10795 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10796 !(MaxStatus & APFloat::opStatus::opInexact);
10797
10798 SDValue MinFloatNode = DAG.getConstantFP(Val: MinFloat, DL: dl, VT: SrcVT);
10799 SDValue MaxFloatNode = DAG.getConstantFP(Val: MaxFloat, DL: dl, VT: SrcVT);
10800
10801 // If the integer bounds are exactly representable as floats and min/max are
10802 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10803 // of comparisons and selects.
10804 bool MinMaxLegal = isOperationLegal(Op: ISD::FMINNUM, VT: SrcVT) &&
10805 isOperationLegal(Op: ISD::FMAXNUM, VT: SrcVT);
10806 if (AreExactFloatBounds && MinMaxLegal) {
10807 SDValue Clamped = Src;
10808
10809 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10810 Clamped = DAG.getNode(Opcode: ISD::FMAXNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MinFloatNode);
10811 // Clamp by MaxFloat from above. NaN cannot occur.
10812 Clamped = DAG.getNode(Opcode: ISD::FMINNUM, DL: dl, VT: SrcVT, N1: Clamped, N2: MaxFloatNode);
10813 // Convert clamped value to integer.
10814 SDValue FpToInt = DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10815 DL: dl, VT: DstVT, Operand: Clamped);
10816
10817 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10818 // which will cast to zero.
10819 if (!IsSigned)
10820 return FpToInt;
10821
10822 // Otherwise, select 0 if Src is NaN.
10823 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
10824 EVT SetCCVT =
10825 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10826 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10827 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: FpToInt);
10828 }
10829
10830 SDValue MinIntNode = DAG.getConstant(Val: MinInt, DL: dl, VT: DstVT);
10831 SDValue MaxIntNode = DAG.getConstant(Val: MaxInt, DL: dl, VT: DstVT);
10832
10833 // Result of direct conversion. The assumption here is that the operation is
10834 // non-trapping and it's fine to apply it to an out-of-range value if we
10835 // select it away later.
10836 SDValue FpToInt =
10837 DAG.getNode(Opcode: IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, DL: dl, VT: DstVT, Operand: Src);
10838
10839 SDValue Select = FpToInt;
10840
10841 EVT SetCCVT =
10842 getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: SrcVT);
10843
10844 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10845 // MinInt if Src is NaN.
10846 SDValue ULT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MinFloatNode, Cond: ISD::SETULT);
10847 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: ULT, LHS: MinIntNode, RHS: Select);
10848 // If Src OGT MaxFloat, select MaxInt.
10849 SDValue OGT = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: MaxFloatNode, Cond: ISD::SETOGT);
10850 Select = DAG.getSelect(DL: dl, VT: DstVT, Cond: OGT, LHS: MaxIntNode, RHS: Select);
10851
10852 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10853 // is already zero.
10854 if (!IsSigned)
10855 return Select;
10856
10857 // Otherwise, select 0 if Src is NaN.
10858 SDValue ZeroInt = DAG.getConstant(Val: 0, DL: dl, VT: DstVT);
10859 SDValue IsNan = DAG.getSetCC(DL: dl, VT: SetCCVT, LHS: Src, RHS: Src, Cond: ISD::CondCode::SETUO);
10860 return DAG.getSelect(DL: dl, VT: DstVT, Cond: IsNan, LHS: ZeroInt, RHS: Select);
10861}
10862
10863SDValue TargetLowering::expandVectorSplice(SDNode *Node,
10864 SelectionDAG &DAG) const {
10865 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
10866 assert(Node->getValueType(0).isScalableVector() &&
10867 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
10868
10869 EVT VT = Node->getValueType(ResNo: 0);
10870 SDValue V1 = Node->getOperand(Num: 0);
10871 SDValue V2 = Node->getOperand(Num: 1);
10872 int64_t Imm = cast<ConstantSDNode>(Val: Node->getOperand(Num: 2))->getSExtValue();
10873 SDLoc DL(Node);
10874
10875 // Expand through memory thusly:
10876 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
10877 // Store V1, Ptr
10878 // Store V2, Ptr + sizeof(V1)
10879 // If (Imm < 0)
10880 // TrailingElts = -Imm
10881 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
10882 // else
10883 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
10884 // Res = Load Ptr
10885
10886 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
10887
10888 EVT MemVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: VT.getVectorElementType(),
10889 EC: VT.getVectorElementCount() * 2);
10890 SDValue StackPtr = DAG.CreateStackTemporary(Bytes: MemVT.getStoreSize(), Alignment);
10891 EVT PtrVT = StackPtr.getValueType();
10892 auto &MF = DAG.getMachineFunction();
10893 auto FrameIndex = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex();
10894 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI: FrameIndex);
10895
10896 // Store the lo part of CONCAT_VECTORS(V1, V2)
10897 SDValue StoreV1 = DAG.getStore(Chain: DAG.getEntryNode(), dl: DL, Val: V1, Ptr: StackPtr, PtrInfo);
10898 // Store the hi part of CONCAT_VECTORS(V1, V2)
10899 SDValue OffsetToV2 = DAG.getVScale(
10900 DL, VT: PtrVT,
10901 MulImm: APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
10902 SDValue StackPtr2 = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, N2: OffsetToV2);
10903 SDValue StoreV2 = DAG.getStore(Chain: StoreV1, dl: DL, Val: V2, Ptr: StackPtr2, PtrInfo);
10904
10905 if (Imm >= 0) {
10906 // Load back the required element. getVectorElementPointer takes care of
10907 // clamping the index if it's out-of-bounds.
10908 StackPtr = getVectorElementPointer(DAG, VecPtr: StackPtr, VecVT: VT, Index: Node->getOperand(Num: 2));
10909 // Load the spliced result
10910 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr,
10911 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
10912 }
10913
10914 uint64_t TrailingElts = -Imm;
10915
10916 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
10917 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
10918 SDValue TrailingBytes =
10919 DAG.getConstant(Val: TrailingElts * EltByteSize, DL, VT: PtrVT);
10920
10921 if (TrailingElts > VT.getVectorMinNumElements()) {
10922 SDValue VLBytes =
10923 DAG.getVScale(DL, VT: PtrVT,
10924 MulImm: APInt(PtrVT.getFixedSizeInBits(),
10925 VT.getStoreSize().getKnownMinValue()));
10926 TrailingBytes = DAG.getNode(Opcode: ISD::UMIN, DL, VT: PtrVT, N1: TrailingBytes, N2: VLBytes);
10927 }
10928
10929 // Calculate the start address of the spliced result.
10930 StackPtr2 = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: StackPtr2, N2: TrailingBytes);
10931
10932 // Load the spliced result
10933 return DAG.getLoad(VT, dl: DL, Chain: StoreV2, Ptr: StackPtr2,
10934 PtrInfo: MachinePointerInfo::getUnknownStack(MF));
10935}
10936
10937bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
10938 SDValue &LHS, SDValue &RHS,
10939 SDValue &CC, SDValue Mask,
10940 SDValue EVL, bool &NeedInvert,
10941 const SDLoc &dl, SDValue &Chain,
10942 bool IsSignaling) const {
10943 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10944 MVT OpVT = LHS.getSimpleValueType();
10945 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
10946 NeedInvert = false;
10947 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
10948 bool IsNonVP = !EVL;
10949 switch (TLI.getCondCodeAction(CC: CCCode, VT: OpVT)) {
10950 default:
10951 llvm_unreachable("Unknown condition code action!");
10952 case TargetLowering::Legal:
10953 // Nothing to do.
10954 break;
10955 case TargetLowering::Expand: {
10956 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(Operation: CCCode);
10957 if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10958 std::swap(a&: LHS, b&: RHS);
10959 CC = DAG.getCondCode(Cond: InvCC);
10960 return true;
10961 }
10962 // Swapping operands didn't work. Try inverting the condition.
10963 bool NeedSwap = false;
10964 InvCC = getSetCCInverse(Operation: CCCode, Type: OpVT);
10965 if (!TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10966 // If inverting the condition is not enough, try swapping operands
10967 // on top of it.
10968 InvCC = ISD::getSetCCSwappedOperands(Operation: InvCC);
10969 NeedSwap = true;
10970 }
10971 if (TLI.isCondCodeLegalOrCustom(CC: InvCC, VT: OpVT)) {
10972 CC = DAG.getCondCode(Cond: InvCC);
10973 NeedInvert = true;
10974 if (NeedSwap)
10975 std::swap(a&: LHS, b&: RHS);
10976 return true;
10977 }
10978
10979 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
10980 unsigned Opc = 0;
10981 switch (CCCode) {
10982 default:
10983 llvm_unreachable("Don't know how to expand this condition!");
10984 case ISD::SETUO:
10985 if (TLI.isCondCodeLegal(CC: ISD::SETUNE, VT: OpVT)) {
10986 CC1 = ISD::SETUNE;
10987 CC2 = ISD::SETUNE;
10988 Opc = ISD::OR;
10989 break;
10990 }
10991 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10992 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
10993 NeedInvert = true;
10994 [[fallthrough]];
10995 case ISD::SETO:
10996 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10997 "If SETO is expanded, SETOEQ must be legal!");
10998 CC1 = ISD::SETOEQ;
10999 CC2 = ISD::SETOEQ;
11000 Opc = ISD::AND;
11001 break;
11002 case ISD::SETONE:
11003 case ISD::SETUEQ:
11004 // If the SETUO or SETO CC isn't legal, we might be able to use
11005 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11006 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11007 // the operands.
11008 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11009 if (!TLI.isCondCodeLegal(CC: CC2, VT: OpVT) &&
11010 (TLI.isCondCodeLegal(CC: ISD::SETOGT, VT: OpVT) ||
11011 TLI.isCondCodeLegal(CC: ISD::SETOLT, VT: OpVT))) {
11012 CC1 = ISD::SETOGT;
11013 CC2 = ISD::SETOLT;
11014 Opc = ISD::OR;
11015 NeedInvert = ((unsigned)CCCode & 0x8U);
11016 break;
11017 }
11018 [[fallthrough]];
11019 case ISD::SETOEQ:
11020 case ISD::SETOGT:
11021 case ISD::SETOGE:
11022 case ISD::SETOLT:
11023 case ISD::SETOLE:
11024 case ISD::SETUNE:
11025 case ISD::SETUGT:
11026 case ISD::SETUGE:
11027 case ISD::SETULT:
11028 case ISD::SETULE:
11029 // If we are floating point, assign and break, otherwise fall through.
11030 if (!OpVT.isInteger()) {
11031 // We can use the 4th bit to tell if we are the unordered
11032 // or ordered version of the opcode.
11033 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11034 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11035 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11036 break;
11037 }
11038 // Fallthrough if we are unsigned integer.
11039 [[fallthrough]];
11040 case ISD::SETLE:
11041 case ISD::SETGT:
11042 case ISD::SETGE:
11043 case ISD::SETLT:
11044 case ISD::SETNE:
11045 case ISD::SETEQ:
11046 // If all combinations of inverting the condition and swapping operands
11047 // didn't work then we have no means to expand the condition.
11048 llvm_unreachable("Don't know how to expand this condition!");
11049 }
11050
11051 SDValue SetCC1, SetCC2;
11052 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11053 // If we aren't the ordered or unorder operation,
11054 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11055 if (IsNonVP) {
11056 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC1, Chain, IsSignaling);
11057 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS, RHS, Cond: CC2, Chain, IsSignaling);
11058 } else {
11059 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC1, Mask, EVL);
11060 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS, Cond: CC2, Mask, EVL);
11061 }
11062 } else {
11063 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11064 if (IsNonVP) {
11065 SetCC1 = DAG.getSetCC(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Chain, IsSignaling);
11066 SetCC2 = DAG.getSetCC(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Chain, IsSignaling);
11067 } else {
11068 SetCC1 = DAG.getSetCCVP(DL: dl, VT, LHS, RHS: LHS, Cond: CC1, Mask, EVL);
11069 SetCC2 = DAG.getSetCCVP(DL: dl, VT, LHS: RHS, RHS, Cond: CC2, Mask, EVL);
11070 }
11071 }
11072 if (Chain)
11073 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11074 SetCC2.getValue(1));
11075 if (IsNonVP)
11076 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2);
11077 else {
11078 // Transform the binary opcode to the VP equivalent.
11079 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11080 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11081 LHS = DAG.getNode(Opcode: Opc, DL: dl, VT, N1: SetCC1, N2: SetCC2, N3: Mask, N4: EVL);
11082 }
11083 RHS = SDValue();
11084 CC = SDValue();
11085 return true;
11086 }
11087 }
11088 return false;
11089}
11090

source code of llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp