1 | //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// Provides analysis for querying information about KnownBits during GISel |
10 | /// passes. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/Analysis/ValueTracking.h" |
16 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
17 | #include "llvm/CodeGen/MachineFrameInfo.h" |
18 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
19 | #include "llvm/CodeGen/TargetLowering.h" |
20 | #include "llvm/CodeGen/TargetOpcodes.h" |
21 | #include "llvm/IR/Module.h" |
22 | #include "llvm/Target/TargetMachine.h" |
23 | |
24 | #define DEBUG_TYPE "gisel-known-bits" |
25 | |
26 | using namespace llvm; |
27 | |
28 | char llvm::GISelKnownBitsAnalysis::ID = 0; |
29 | |
30 | INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, |
31 | "Analysis for ComputingKnownBits" , false, true) |
32 | |
33 | GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) |
34 | : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), |
35 | DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {} |
36 | |
37 | Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { |
38 | const MachineInstr *MI = MRI.getVRegDef(Reg: R); |
39 | switch (MI->getOpcode()) { |
40 | case TargetOpcode::COPY: |
41 | return computeKnownAlignment(R: MI->getOperand(i: 1).getReg(), Depth); |
42 | case TargetOpcode::G_ASSERT_ALIGN: { |
43 | // TODO: Min with source |
44 | return Align(MI->getOperand(i: 2).getImm()); |
45 | } |
46 | case TargetOpcode::G_FRAME_INDEX: { |
47 | int FrameIdx = MI->getOperand(i: 1).getIndex(); |
48 | return MF.getFrameInfo().getObjectAlign(ObjectIdx: FrameIdx); |
49 | } |
50 | case TargetOpcode::G_INTRINSIC: |
51 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
52 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
53 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
54 | default: |
55 | return TL.computeKnownAlignForTargetInstr(Analysis&: *this, R, MRI, Depth: Depth + 1); |
56 | } |
57 | } |
58 | |
59 | KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { |
60 | assert(MI.getNumExplicitDefs() == 1 && |
61 | "expected single return generic instruction" ); |
62 | return getKnownBits(R: MI.getOperand(i: 0).getReg()); |
63 | } |
64 | |
65 | KnownBits GISelKnownBits::getKnownBits(Register R) { |
66 | const LLT Ty = MRI.getType(Reg: R); |
67 | APInt DemandedElts = |
68 | Ty.isVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
69 | return getKnownBits(R, DemandedElts); |
70 | } |
71 | |
72 | KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, |
73 | unsigned Depth) { |
74 | // For now, we only maintain the cache during one request. |
75 | assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared" ); |
76 | |
77 | KnownBits Known; |
78 | computeKnownBitsImpl(R, Known, DemandedElts, Depth); |
79 | ComputeKnownBitsCache.clear(); |
80 | return Known; |
81 | } |
82 | |
83 | bool GISelKnownBits::signBitIsZero(Register R) { |
84 | LLT Ty = MRI.getType(Reg: R); |
85 | unsigned BitWidth = Ty.getScalarSizeInBits(); |
86 | return maskedValueIsZero(Val: R, Mask: APInt::getSignMask(BitWidth)); |
87 | } |
88 | |
89 | APInt GISelKnownBits::getKnownZeroes(Register R) { |
90 | return getKnownBits(R).Zero; |
91 | } |
92 | |
93 | APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } |
94 | |
95 | LLVM_ATTRIBUTE_UNUSED static void |
96 | dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { |
97 | dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth |
98 | << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" |
99 | << toString(I: Known.Zero | Known.One, Radix: 16, Signed: false) << "\n" |
100 | << "[" << Depth << "] Zero: 0x" << toString(I: Known.Zero, Radix: 16, Signed: false) |
101 | << "\n" |
102 | << "[" << Depth << "] One: 0x" << toString(I: Known.One, Radix: 16, Signed: false) |
103 | << "\n" ; |
104 | } |
105 | |
106 | /// Compute known bits for the intersection of \p Src0 and \p Src1 |
107 | void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, |
108 | KnownBits &Known, |
109 | const APInt &DemandedElts, |
110 | unsigned Depth) { |
111 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
112 | computeKnownBitsImpl(R: Src1, Known, DemandedElts, Depth); |
113 | |
114 | // If we don't know any bits, early out. |
115 | if (Known.isUnknown()) |
116 | return; |
117 | |
118 | KnownBits Known2; |
119 | computeKnownBitsImpl(R: Src0, Known&: Known2, DemandedElts, Depth); |
120 | |
121 | // Only known if known in both the LHS and RHS. |
122 | Known = Known.intersectWith(RHS: Known2); |
123 | } |
124 | |
125 | // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is |
126 | // created using Width. Use this function when the inputs are KnownBits |
127 | // objects. TODO: Move this KnownBits.h if this is usable in more cases. |
128 | static KnownBits (unsigned BitWidth, const KnownBits &SrcOpKnown, |
129 | const KnownBits &OffsetKnown, |
130 | const KnownBits &WidthKnown) { |
131 | KnownBits Mask(BitWidth); |
132 | Mask.Zero = APInt::getBitsSetFrom( |
133 | numBits: BitWidth, loBit: WidthKnown.getMaxValue().getLimitedValue(Limit: BitWidth)); |
134 | Mask.One = APInt::getLowBitsSet( |
135 | numBits: BitWidth, loBitsSet: WidthKnown.getMinValue().getLimitedValue(Limit: BitWidth)); |
136 | return KnownBits::lshr(LHS: SrcOpKnown, RHS: OffsetKnown) & Mask; |
137 | } |
138 | |
139 | void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, |
140 | const APInt &DemandedElts, |
141 | unsigned Depth) { |
142 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
143 | unsigned Opcode = MI.getOpcode(); |
144 | LLT DstTy = MRI.getType(Reg: R); |
145 | |
146 | // Handle the case where this is called on a register that does not have a |
147 | // type constraint (i.e. it has a register class constraint instead). This is |
148 | // unlikely to occur except by looking through copies but it is possible for |
149 | // the initial register being queried to be in this state. |
150 | if (!DstTy.isValid()) { |
151 | Known = KnownBits(); |
152 | return; |
153 | } |
154 | |
155 | unsigned BitWidth = DstTy.getScalarSizeInBits(); |
156 | auto CacheEntry = ComputeKnownBitsCache.find(Val: R); |
157 | if (CacheEntry != ComputeKnownBitsCache.end()) { |
158 | Known = CacheEntry->second; |
159 | LLVM_DEBUG(dbgs() << "Cache hit at " ); |
160 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
161 | assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match" ); |
162 | return; |
163 | } |
164 | Known = KnownBits(BitWidth); // Don't know anything |
165 | |
166 | // Depth may get bigger than max depth if it gets passed to a different |
167 | // GISelKnownBits object. |
168 | // This may happen when say a generic part uses a GISelKnownBits object |
169 | // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr |
170 | // which creates a new GISelKnownBits object with a different and smaller |
171 | // depth. If we just check for equality, we would never exit if the depth |
172 | // that is passed down to the target specific GISelKnownBits object is |
173 | // already bigger than its max depth. |
174 | if (Depth >= getMaxDepth()) |
175 | return; |
176 | |
177 | if (!DemandedElts) |
178 | return; // No demanded elts, better to assume we don't know anything. |
179 | |
180 | KnownBits Known2; |
181 | |
182 | switch (Opcode) { |
183 | default: |
184 | TL.computeKnownBitsForTargetInstr(Analysis&: *this, R, Known, DemandedElts, MRI, |
185 | Depth); |
186 | break; |
187 | case TargetOpcode::G_BUILD_VECTOR: { |
188 | // Collect the known bits that are shared by every demanded vector element. |
189 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
190 | for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { |
191 | if (!DemandedElts[i]) |
192 | continue; |
193 | |
194 | computeKnownBitsImpl(R: MI.getOperand(i: i + 1).getReg(), Known&: Known2, DemandedElts, |
195 | Depth: Depth + 1); |
196 | |
197 | // Known bits are the values that are shared by every demanded element. |
198 | Known = Known.intersectWith(RHS: Known2); |
199 | |
200 | // If we don't know any bits, early out. |
201 | if (Known.isUnknown()) |
202 | break; |
203 | } |
204 | break; |
205 | } |
206 | case TargetOpcode::COPY: |
207 | case TargetOpcode::G_PHI: |
208 | case TargetOpcode::PHI: { |
209 | Known.One = APInt::getAllOnes(numBits: BitWidth); |
210 | Known.Zero = APInt::getAllOnes(numBits: BitWidth); |
211 | // Destination registers should not have subregisters at this |
212 | // point of the pipeline, otherwise the main live-range will be |
213 | // defined more than once, which is against SSA. |
214 | assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?" ); |
215 | // Record in the cache that we know nothing for MI. |
216 | // This will get updated later and in the meantime, if we reach that |
217 | // phi again, because of a loop, we will cut the search thanks to this |
218 | // cache entry. |
219 | // We could actually build up more information on the phi by not cutting |
220 | // the search, but that additional information is more a side effect |
221 | // than an intended choice. |
222 | // Therefore, for now, save on compile time until we derive a proper way |
223 | // to derive known bits for PHIs within loops. |
224 | ComputeKnownBitsCache[R] = KnownBits(BitWidth); |
225 | // PHI's operand are a mix of registers and basic blocks interleaved. |
226 | // We only care about the register ones. |
227 | for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { |
228 | const MachineOperand &Src = MI.getOperand(i: Idx); |
229 | Register SrcReg = Src.getReg(); |
230 | // Look through trivial copies and phis but don't look through trivial |
231 | // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits |
232 | // analysis is currently unable to determine the bit width of a |
233 | // register class. |
234 | // |
235 | // We can't use NoSubRegister by name as it's defined by each target but |
236 | // it's always defined to be 0 by tablegen. |
237 | if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && |
238 | MRI.getType(Reg: SrcReg).isValid()) { |
239 | // For COPYs we don't do anything, don't increase the depth. |
240 | computeKnownBitsImpl(R: SrcReg, Known&: Known2, DemandedElts, |
241 | Depth: Depth + (Opcode != TargetOpcode::COPY)); |
242 | Known = Known.intersectWith(RHS: Known2); |
243 | // If we reach a point where we don't know anything |
244 | // just stop looking through the operands. |
245 | if (Known.isUnknown()) |
246 | break; |
247 | } else { |
248 | // We know nothing. |
249 | Known = KnownBits(BitWidth); |
250 | break; |
251 | } |
252 | } |
253 | break; |
254 | } |
255 | case TargetOpcode::G_CONSTANT: { |
256 | auto CstVal = getIConstantVRegVal(VReg: R, MRI); |
257 | if (!CstVal) |
258 | break; |
259 | Known = KnownBits::makeConstant(C: *CstVal); |
260 | break; |
261 | } |
262 | case TargetOpcode::G_FRAME_INDEX: { |
263 | int FrameIdx = MI.getOperand(i: 1).getIndex(); |
264 | TL.computeKnownBitsForFrameIndex(FIOp: FrameIdx, Known, MF); |
265 | break; |
266 | } |
267 | case TargetOpcode::G_SUB: { |
268 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
269 | Depth: Depth + 1); |
270 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
271 | Depth: Depth + 1); |
272 | Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, LHS: Known, |
273 | RHS: Known2); |
274 | break; |
275 | } |
276 | case TargetOpcode::G_XOR: { |
277 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
278 | Depth: Depth + 1); |
279 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
280 | Depth: Depth + 1); |
281 | |
282 | Known ^= Known2; |
283 | break; |
284 | } |
285 | case TargetOpcode::G_PTR_ADD: { |
286 | if (DstTy.isVector()) |
287 | break; |
288 | // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? |
289 | LLT Ty = MRI.getType(Reg: MI.getOperand(i: 1).getReg()); |
290 | if (DL.isNonIntegralAddressSpace(AddrSpace: Ty.getAddressSpace())) |
291 | break; |
292 | [[fallthrough]]; |
293 | } |
294 | case TargetOpcode::G_ADD: { |
295 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
296 | Depth: Depth + 1); |
297 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: Known2, DemandedElts, |
298 | Depth: Depth + 1); |
299 | Known = |
300 | KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, LHS: Known, RHS: Known2); |
301 | break; |
302 | } |
303 | case TargetOpcode::G_AND: { |
304 | // If either the LHS or the RHS are Zero, the result is zero. |
305 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
306 | Depth: Depth + 1); |
307 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
308 | Depth: Depth + 1); |
309 | |
310 | Known &= Known2; |
311 | break; |
312 | } |
313 | case TargetOpcode::G_OR: { |
314 | // If either the LHS or the RHS are Zero, the result is zero. |
315 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
316 | Depth: Depth + 1); |
317 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
318 | Depth: Depth + 1); |
319 | |
320 | Known |= Known2; |
321 | break; |
322 | } |
323 | case TargetOpcode::G_MUL: { |
324 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known, DemandedElts, |
325 | Depth: Depth + 1); |
326 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
327 | Depth: Depth + 1); |
328 | Known = KnownBits::mul(LHS: Known, RHS: Known2); |
329 | break; |
330 | } |
331 | case TargetOpcode::G_SELECT: { |
332 | computeKnownBitsMin(Src0: MI.getOperand(i: 2).getReg(), Src1: MI.getOperand(i: 3).getReg(), |
333 | Known, DemandedElts, Depth: Depth + 1); |
334 | break; |
335 | } |
336 | case TargetOpcode::G_SMIN: { |
337 | // TODO: Handle clamp pattern with number of sign bits |
338 | KnownBits KnownRHS; |
339 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
340 | Depth: Depth + 1); |
341 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
342 | Depth: Depth + 1); |
343 | Known = KnownBits::smin(LHS: Known, RHS: KnownRHS); |
344 | break; |
345 | } |
346 | case TargetOpcode::G_SMAX: { |
347 | // TODO: Handle clamp pattern with number of sign bits |
348 | KnownBits KnownRHS; |
349 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
350 | Depth: Depth + 1); |
351 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, DemandedElts, |
352 | Depth: Depth + 1); |
353 | Known = KnownBits::smax(LHS: Known, RHS: KnownRHS); |
354 | break; |
355 | } |
356 | case TargetOpcode::G_UMIN: { |
357 | KnownBits KnownRHS; |
358 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
359 | DemandedElts, Depth: Depth + 1); |
360 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
361 | DemandedElts, Depth: Depth + 1); |
362 | Known = KnownBits::umin(LHS: Known, RHS: KnownRHS); |
363 | break; |
364 | } |
365 | case TargetOpcode::G_UMAX: { |
366 | KnownBits KnownRHS; |
367 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, |
368 | DemandedElts, Depth: Depth + 1); |
369 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: KnownRHS, |
370 | DemandedElts, Depth: Depth + 1); |
371 | Known = KnownBits::umax(LHS: Known, RHS: KnownRHS); |
372 | break; |
373 | } |
374 | case TargetOpcode::G_FCMP: |
375 | case TargetOpcode::G_ICMP: { |
376 | if (DstTy.isVector()) |
377 | break; |
378 | if (TL.getBooleanContents(isVec: DstTy.isVector(), |
379 | isFloat: Opcode == TargetOpcode::G_FCMP) == |
380 | TargetLowering::ZeroOrOneBooleanContent && |
381 | BitWidth > 1) |
382 | Known.Zero.setBitsFrom(1); |
383 | break; |
384 | } |
385 | case TargetOpcode::G_SEXT: { |
386 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
387 | Depth: Depth + 1); |
388 | // If the sign bit is known to be zero or one, then sext will extend |
389 | // it to the top bits, else it will just zext. |
390 | Known = Known.sext(BitWidth); |
391 | break; |
392 | } |
393 | case TargetOpcode::G_ASSERT_SEXT: |
394 | case TargetOpcode::G_SEXT_INREG: { |
395 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
396 | Depth: Depth + 1); |
397 | Known = Known.sextInReg(SrcBitWidth: MI.getOperand(i: 2).getImm()); |
398 | break; |
399 | } |
400 | case TargetOpcode::G_ANYEXT: { |
401 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known, DemandedElts, |
402 | Depth: Depth + 1); |
403 | Known = Known.anyext(BitWidth); |
404 | break; |
405 | } |
406 | case TargetOpcode::G_LOAD: { |
407 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
408 | if (const MDNode *Ranges = MMO->getRanges()) { |
409 | computeKnownBitsFromRangeMetadata(Ranges: *Ranges, Known); |
410 | } |
411 | |
412 | break; |
413 | } |
414 | case TargetOpcode::G_ZEXTLOAD: { |
415 | if (DstTy.isVector()) |
416 | break; |
417 | // Everything above the retrieved bits is zero |
418 | Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); |
419 | break; |
420 | } |
421 | case TargetOpcode::G_ASHR: { |
422 | KnownBits LHSKnown, RHSKnown; |
423 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
424 | Depth: Depth + 1); |
425 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
426 | Depth: Depth + 1); |
427 | Known = KnownBits::ashr(LHS: LHSKnown, RHS: RHSKnown); |
428 | break; |
429 | } |
430 | case TargetOpcode::G_LSHR: { |
431 | KnownBits LHSKnown, RHSKnown; |
432 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
433 | Depth: Depth + 1); |
434 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
435 | Depth: Depth + 1); |
436 | Known = KnownBits::lshr(LHS: LHSKnown, RHS: RHSKnown); |
437 | break; |
438 | } |
439 | case TargetOpcode::G_SHL: { |
440 | KnownBits LHSKnown, RHSKnown; |
441 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: LHSKnown, DemandedElts, |
442 | Depth: Depth + 1); |
443 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: RHSKnown, DemandedElts, |
444 | Depth: Depth + 1); |
445 | Known = KnownBits::shl(LHS: LHSKnown, RHS: RHSKnown); |
446 | break; |
447 | } |
448 | case TargetOpcode::G_INTTOPTR: |
449 | case TargetOpcode::G_PTRTOINT: |
450 | if (DstTy.isVector()) |
451 | break; |
452 | // Fall through and handle them the same as zext/trunc. |
453 | [[fallthrough]]; |
454 | case TargetOpcode::G_ASSERT_ZEXT: |
455 | case TargetOpcode::G_ZEXT: |
456 | case TargetOpcode::G_TRUNC: { |
457 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
458 | LLT SrcTy = MRI.getType(Reg: SrcReg); |
459 | unsigned SrcBitWidth; |
460 | |
461 | // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. |
462 | if (Opcode == TargetOpcode::G_ASSERT_ZEXT) |
463 | SrcBitWidth = MI.getOperand(i: 2).getImm(); |
464 | else { |
465 | SrcBitWidth = SrcTy.isPointer() |
466 | ? DL.getIndexSizeInBits(AS: SrcTy.getAddressSpace()) |
467 | : SrcTy.getSizeInBits(); |
468 | } |
469 | assert(SrcBitWidth && "SrcBitWidth can't be zero" ); |
470 | Known = Known.zextOrTrunc(BitWidth: SrcBitWidth); |
471 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
472 | Known = Known.zextOrTrunc(BitWidth); |
473 | if (BitWidth > SrcBitWidth) |
474 | Known.Zero.setBitsFrom(SrcBitWidth); |
475 | break; |
476 | } |
477 | case TargetOpcode::G_ASSERT_ALIGN: { |
478 | int64_t LogOfAlign = Log2_64(Value: MI.getOperand(i: 2).getImm()); |
479 | |
480 | // TODO: Should use maximum with source |
481 | // If a node is guaranteed to be aligned, set low zero bits accordingly as |
482 | // well as clearing one bits. |
483 | Known.Zero.setLowBits(LogOfAlign); |
484 | Known.One.clearLowBits(loBits: LogOfAlign); |
485 | break; |
486 | } |
487 | case TargetOpcode::G_MERGE_VALUES: { |
488 | unsigned NumOps = MI.getNumOperands(); |
489 | unsigned OpSize = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits(); |
490 | |
491 | for (unsigned I = 0; I != NumOps - 1; ++I) { |
492 | KnownBits SrcOpKnown; |
493 | computeKnownBitsImpl(R: MI.getOperand(i: I + 1).getReg(), Known&: SrcOpKnown, |
494 | DemandedElts, Depth: Depth + 1); |
495 | Known.insertBits(SubBits: SrcOpKnown, BitPosition: I * OpSize); |
496 | } |
497 | break; |
498 | } |
499 | case TargetOpcode::G_UNMERGE_VALUES: { |
500 | if (DstTy.isVector()) |
501 | break; |
502 | unsigned NumOps = MI.getNumOperands(); |
503 | Register SrcReg = MI.getOperand(i: NumOps - 1).getReg(); |
504 | if (MRI.getType(Reg: SrcReg).isVector()) |
505 | return; // TODO: Handle vectors. |
506 | |
507 | KnownBits SrcOpKnown; |
508 | computeKnownBitsImpl(R: SrcReg, Known&: SrcOpKnown, DemandedElts, Depth: Depth + 1); |
509 | |
510 | // Figure out the result operand index |
511 | unsigned DstIdx = 0; |
512 | for (; DstIdx != NumOps - 1 && MI.getOperand(i: DstIdx).getReg() != R; |
513 | ++DstIdx) |
514 | ; |
515 | |
516 | Known = SrcOpKnown.extractBits(NumBits: BitWidth, BitPosition: BitWidth * DstIdx); |
517 | break; |
518 | } |
519 | case TargetOpcode::G_BSWAP: { |
520 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
521 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
522 | Known = Known.byteSwap(); |
523 | break; |
524 | } |
525 | case TargetOpcode::G_BITREVERSE: { |
526 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
527 | computeKnownBitsImpl(R: SrcReg, Known, DemandedElts, Depth: Depth + 1); |
528 | Known = Known.reverseBits(); |
529 | break; |
530 | } |
531 | case TargetOpcode::G_CTPOP: { |
532 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: Known2, DemandedElts, |
533 | Depth: Depth + 1); |
534 | // We can bound the space the count needs. Also, bits known to be zero can't |
535 | // contribute to the population. |
536 | unsigned BitsPossiblySet = Known2.countMaxPopulation(); |
537 | unsigned LowBits = llvm::bit_width(Value: BitsPossiblySet); |
538 | Known.Zero.setBitsFrom(LowBits); |
539 | // TODO: we could bound Known.One using the lower bound on the number of |
540 | // bits which might be set provided by popcnt KnownOne2. |
541 | break; |
542 | } |
543 | case TargetOpcode::G_UBFX: { |
544 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
545 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
546 | Depth: Depth + 1); |
547 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
548 | Depth: Depth + 1); |
549 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
550 | Depth: Depth + 1); |
551 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
552 | break; |
553 | } |
554 | case TargetOpcode::G_SBFX: { |
555 | KnownBits SrcOpKnown, OffsetKnown, WidthKnown; |
556 | computeKnownBitsImpl(R: MI.getOperand(i: 1).getReg(), Known&: SrcOpKnown, DemandedElts, |
557 | Depth: Depth + 1); |
558 | computeKnownBitsImpl(R: MI.getOperand(i: 2).getReg(), Known&: OffsetKnown, DemandedElts, |
559 | Depth: Depth + 1); |
560 | computeKnownBitsImpl(R: MI.getOperand(i: 3).getReg(), Known&: WidthKnown, DemandedElts, |
561 | Depth: Depth + 1); |
562 | Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); |
563 | // Sign extend the extracted value using shift left and arithmetic shift |
564 | // right. |
565 | KnownBits ExtKnown = KnownBits::makeConstant(C: APInt(BitWidth, BitWidth)); |
566 | KnownBits ShiftKnown = KnownBits::computeForAddSub( |
567 | /*Add*/ false, /*NSW*/ false, LHS: ExtKnown, RHS: WidthKnown); |
568 | Known = KnownBits::ashr(LHS: KnownBits::shl(LHS: Known, RHS: ShiftKnown), RHS: ShiftKnown); |
569 | break; |
570 | } |
571 | case TargetOpcode::G_UADDO: |
572 | case TargetOpcode::G_UADDE: |
573 | case TargetOpcode::G_SADDO: |
574 | case TargetOpcode::G_SADDE: |
575 | case TargetOpcode::G_USUBO: |
576 | case TargetOpcode::G_USUBE: |
577 | case TargetOpcode::G_SSUBO: |
578 | case TargetOpcode::G_SSUBE: |
579 | case TargetOpcode::G_UMULO: |
580 | case TargetOpcode::G_SMULO: { |
581 | if (MI.getOperand(i: 1).getReg() == R) { |
582 | // If we know the result of a compare has the top bits zero, use this |
583 | // info. |
584 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
585 | TargetLowering::ZeroOrOneBooleanContent && |
586 | BitWidth > 1) |
587 | Known.Zero.setBitsFrom(1); |
588 | } |
589 | break; |
590 | } |
591 | } |
592 | |
593 | assert(!Known.hasConflict() && "Bits known to be one AND zero?" ); |
594 | LLVM_DEBUG(dumpResult(MI, Known, Depth)); |
595 | |
596 | // Update the cache. |
597 | ComputeKnownBitsCache[R] = Known; |
598 | } |
599 | |
600 | /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 |
601 | unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, |
602 | const APInt &DemandedElts, |
603 | unsigned Depth) { |
604 | // Test src1 first, since we canonicalize simpler expressions to the RHS. |
605 | unsigned Src1SignBits = computeNumSignBits(R: Src1, DemandedElts, Depth); |
606 | if (Src1SignBits == 1) |
607 | return 1; |
608 | return std::min(a: computeNumSignBits(R: Src0, DemandedElts, Depth), b: Src1SignBits); |
609 | } |
610 | |
611 | unsigned GISelKnownBits::computeNumSignBits(Register R, |
612 | const APInt &DemandedElts, |
613 | unsigned Depth) { |
614 | MachineInstr &MI = *MRI.getVRegDef(Reg: R); |
615 | unsigned Opcode = MI.getOpcode(); |
616 | |
617 | if (Opcode == TargetOpcode::G_CONSTANT) |
618 | return MI.getOperand(i: 1).getCImm()->getValue().getNumSignBits(); |
619 | |
620 | if (Depth == getMaxDepth()) |
621 | return 1; |
622 | |
623 | if (!DemandedElts) |
624 | return 1; // No demanded elts, better to assume we don't know anything. |
625 | |
626 | LLT DstTy = MRI.getType(Reg: R); |
627 | const unsigned TyBits = DstTy.getScalarSizeInBits(); |
628 | |
629 | // Handle the case where this is called on a register that does not have a |
630 | // type constraint. This is unlikely to occur except by looking through copies |
631 | // but it is possible for the initial register being queried to be in this |
632 | // state. |
633 | if (!DstTy.isValid()) |
634 | return 1; |
635 | |
636 | unsigned FirstAnswer = 1; |
637 | switch (Opcode) { |
638 | case TargetOpcode::COPY: { |
639 | MachineOperand &Src = MI.getOperand(i: 1); |
640 | if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && |
641 | MRI.getType(Reg: Src.getReg()).isValid()) { |
642 | // Don't increment Depth for this one since we didn't do any work. |
643 | return computeNumSignBits(R: Src.getReg(), DemandedElts, Depth); |
644 | } |
645 | |
646 | return 1; |
647 | } |
648 | case TargetOpcode::G_SEXT: { |
649 | Register Src = MI.getOperand(i: 1).getReg(); |
650 | LLT SrcTy = MRI.getType(Reg: Src); |
651 | unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); |
652 | return computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1) + Tmp; |
653 | } |
654 | case TargetOpcode::G_ASSERT_SEXT: |
655 | case TargetOpcode::G_SEXT_INREG: { |
656 | // Max of the input and what this extends. |
657 | Register Src = MI.getOperand(i: 1).getReg(); |
658 | unsigned SrcBits = MI.getOperand(i: 2).getImm(); |
659 | unsigned InRegBits = TyBits - SrcBits + 1; |
660 | return std::max(a: computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1), b: InRegBits); |
661 | } |
662 | case TargetOpcode::G_SEXTLOAD: { |
663 | // FIXME: We need an in-memory type representation. |
664 | if (DstTy.isVector()) |
665 | return 1; |
666 | |
667 | // e.g. i16->i32 = '17' bits known. |
668 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
669 | return TyBits - MMO->getSizeInBits() + 1; |
670 | } |
671 | case TargetOpcode::G_ZEXTLOAD: { |
672 | // FIXME: We need an in-memory type representation. |
673 | if (DstTy.isVector()) |
674 | return 1; |
675 | |
676 | // e.g. i16->i32 = '16' bits known. |
677 | const MachineMemOperand *MMO = *MI.memoperands_begin(); |
678 | return TyBits - MMO->getSizeInBits(); |
679 | } |
680 | case TargetOpcode::G_TRUNC: { |
681 | Register Src = MI.getOperand(i: 1).getReg(); |
682 | LLT SrcTy = MRI.getType(Reg: Src); |
683 | |
684 | // Check if the sign bits of source go down as far as the truncated value. |
685 | unsigned DstTyBits = DstTy.getScalarSizeInBits(); |
686 | unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); |
687 | unsigned NumSrcSignBits = computeNumSignBits(R: Src, DemandedElts, Depth: Depth + 1); |
688 | if (NumSrcSignBits > (NumSrcBits - DstTyBits)) |
689 | return NumSrcSignBits - (NumSrcBits - DstTyBits); |
690 | break; |
691 | } |
692 | case TargetOpcode::G_SELECT: { |
693 | return computeNumSignBitsMin(Src0: MI.getOperand(i: 2).getReg(), |
694 | Src1: MI.getOperand(i: 3).getReg(), DemandedElts, |
695 | Depth: Depth + 1); |
696 | } |
697 | case TargetOpcode::G_SADDO: |
698 | case TargetOpcode::G_SADDE: |
699 | case TargetOpcode::G_UADDO: |
700 | case TargetOpcode::G_UADDE: |
701 | case TargetOpcode::G_SSUBO: |
702 | case TargetOpcode::G_SSUBE: |
703 | case TargetOpcode::G_USUBO: |
704 | case TargetOpcode::G_USUBE: |
705 | case TargetOpcode::G_SMULO: |
706 | case TargetOpcode::G_UMULO: { |
707 | // If compares returns 0/-1, all bits are sign bits. |
708 | // We know that we have an integer-based boolean since these operations |
709 | // are only available for integer. |
710 | if (MI.getOperand(i: 1).getReg() == R) { |
711 | if (TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: false) == |
712 | TargetLowering::ZeroOrNegativeOneBooleanContent) |
713 | return TyBits; |
714 | } |
715 | |
716 | break; |
717 | } |
718 | case TargetOpcode::G_FCMP: |
719 | case TargetOpcode::G_ICMP: { |
720 | bool IsFP = Opcode == TargetOpcode::G_FCMP; |
721 | if (TyBits == 1) |
722 | break; |
723 | auto BC = TL.getBooleanContents(isVec: DstTy.isVector(), isFloat: IsFP); |
724 | if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) |
725 | return TyBits; // All bits are sign bits. |
726 | if (BC == TargetLowering::ZeroOrOneBooleanContent) |
727 | return TyBits - 1; // Every always-zero bit is a sign bit. |
728 | break; |
729 | } |
730 | case TargetOpcode::G_INTRINSIC: |
731 | case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: |
732 | case TargetOpcode::G_INTRINSIC_CONVERGENT: |
733 | case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: |
734 | default: { |
735 | unsigned NumBits = |
736 | TL.computeNumSignBitsForTargetInstr(Analysis&: *this, R, DemandedElts, MRI, Depth); |
737 | if (NumBits > 1) |
738 | FirstAnswer = std::max(a: FirstAnswer, b: NumBits); |
739 | break; |
740 | } |
741 | } |
742 | |
743 | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
744 | // use this information. |
745 | KnownBits Known = getKnownBits(R, DemandedElts, Depth); |
746 | APInt Mask; |
747 | if (Known.isNonNegative()) { // sign bit is 0 |
748 | Mask = Known.Zero; |
749 | } else if (Known.isNegative()) { // sign bit is 1; |
750 | Mask = Known.One; |
751 | } else { |
752 | // Nothing known. |
753 | return FirstAnswer; |
754 | } |
755 | |
756 | // Okay, we know that the sign bit in Mask is set. Use CLO to determine |
757 | // the number of identical bits in the top of the input value. |
758 | Mask <<= Mask.getBitWidth() - TyBits; |
759 | return std::max(a: FirstAnswer, b: Mask.countl_one()); |
760 | } |
761 | |
762 | unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { |
763 | LLT Ty = MRI.getType(Reg: R); |
764 | APInt DemandedElts = |
765 | Ty.isVector() ? APInt::getAllOnes(numBits: Ty.getNumElements()) : APInt(1, 1); |
766 | return computeNumSignBits(R, DemandedElts, Depth); |
767 | } |
768 | |
769 | void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
770 | AU.setPreservesAll(); |
771 | MachineFunctionPass::getAnalysisUsage(AU); |
772 | } |
773 | |
774 | bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { |
775 | return false; |
776 | } |
777 | |
778 | GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { |
779 | if (!Info) { |
780 | unsigned MaxDepth = |
781 | MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; |
782 | Info = std::make_unique<GISelKnownBits>(args&: MF, args&: MaxDepth); |
783 | } |
784 | return *Info.get(); |
785 | } |
786 | |