1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/TargetLowering.h"
19
20namespace llvm {
21 class X86Subtarget;
22 class X86TargetMachine;
23
24 namespace X86ISD {
25 // X86 Specific DAG Nodes
26 enum NodeType : unsigned {
27 // Start the numbering where the builtin ops leave off.
28 FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30 /// Bit scan forward.
31 BSF,
32 /// Bit scan reverse.
33 BSR,
34
35 /// X86 funnel/double shift i16 instructions. These correspond to
36 /// X86::SHLDW and X86::SHRDW instructions which have different amt
37 /// modulo rules to generic funnel shifts.
38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39 FSHL,
40 FSHR,
41
42 /// Bitwise logical AND of floating point values. This corresponds
43 /// to X86::ANDPS or X86::ANDPD.
44 FAND,
45
46 /// Bitwise logical OR of floating point values. This corresponds
47 /// to X86::ORPS or X86::ORPD.
48 FOR,
49
50 /// Bitwise logical XOR of floating point values. This corresponds
51 /// to X86::XORPS or X86::XORPD.
52 FXOR,
53
54 /// Bitwise logical ANDNOT of floating point values. This
55 /// corresponds to X86::ANDNPS or X86::ANDNPD.
56 FANDN,
57
58 /// These operations represent an abstract X86 call
59 /// instruction, which includes a bunch of information. In particular the
60 /// operands of these node are:
61 ///
62 /// #0 - The incoming token chain
63 /// #1 - The callee
64 /// #2 - The number of arg bytes the caller pushes on the stack.
65 /// #3 - The number of arg bytes the callee pops off the stack.
66 /// #4 - The value to pass in AL/AX/EAX (optional)
67 /// #5 - The value to pass in DL/DX/EDX (optional)
68 ///
69 /// The result values of these nodes are:
70 ///
71 /// #0 - The outgoing token chain
72 /// #1 - The first register result value (optional)
73 /// #2 - The second register result value (optional)
74 ///
75 CALL,
76
77 /// Same as call except it adds the NoTrack prefix.
78 NT_CALL,
79
80 // Pseudo for a OBJC call that gets emitted together with a special
81 // marker instruction.
82 CALL_RVMARKER,
83
84 /// X86 compare and logical compare instructions.
85 CMP,
86 FCMP,
87 COMI,
88 UCOMI,
89
90 /// X86 bit-test instructions.
91 BT,
92
93 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94 /// operand, usually produced by a CMP instruction.
95 SETCC,
96
97 /// X86 Select
98 SELECTS,
99
100 // Same as SETCC except it's materialized with a sbb and the value is all
101 // one's or all zero's.
102 SETCC_CARRY, // R = carry_bit ? ~0 : 0
103
104 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105 /// Operands are two FP values to compare; result is a mask of
106 /// 0s or 1s. Generally DTRT for C/C++ with NaNs.
107 FSETCC,
108
109 /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110 /// and a version with SAE.
111 FSETCCM,
112 FSETCCM_SAE,
113
114 /// X86 conditional moves. Operand 0 and operand 1 are the two values
115 /// to select from. Operand 2 is the condition code, and operand 3 is the
116 /// flag operand produced by a CMP or TEST instruction.
117 CMOV,
118
119 /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120 /// is the block to branch if condition is true, operand 2 is the
121 /// condition code, and operand 3 is the flag operand produced by a CMP
122 /// or TEST instruction.
123 BRCOND,
124
125 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126 /// operand 1 is the target address.
127 NT_BRIND,
128
129 /// Return with a glue operand. Operand 0 is the chain operand, operand
130 /// 1 is the number of bytes of stack to pop.
131 RET_GLUE,
132
133 /// Return from interrupt. Operand 0 is the number of bytes to pop.
134 IRET,
135
136 /// Repeat fill, corresponds to X86::REP_STOSx.
137 REP_STOS,
138
139 /// Repeat move, corresponds to X86::REP_MOVSx.
140 REP_MOVS,
141
142 /// On Darwin, this node represents the result of the popl
143 /// at function entry, used for PIC code.
144 GlobalBaseReg,
145
146 /// A wrapper node for TargetConstantPool, TargetJumpTable,
147 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148 /// MCSymbol and TargetBlockAddress.
149 Wrapper,
150
151 /// Special wrapper used under X86-64 PIC mode for RIP
152 /// relative displacements.
153 WrapperRIP,
154
155 /// Copies a 64-bit value from an MMX vector to the low word
156 /// of an XMM vector, with the high word zero filled.
157 MOVQ2DQ,
158
159 /// Copies a 64-bit value from the low word of an XMM vector
160 /// to an MMX vector.
161 MOVDQ2Q,
162
163 /// Copies a 32-bit value from the low word of a MMX
164 /// vector to a GPR.
165 MMX_MOVD2W,
166
167 /// Copies a GPR into the low 32-bit word of a MMX vector
168 /// and zero out the high word.
169 MMX_MOVW2D,
170
171 /// Extract an 8-bit value from a vector and zero extend it to
172 /// i32, corresponds to X86::PEXTRB.
173 PEXTRB,
174
175 /// Extract a 16-bit value from a vector and zero extend it to
176 /// i32, corresponds to X86::PEXTRW.
177 PEXTRW,
178
179 /// Insert any element of a 4 x float vector into any element
180 /// of a destination 4 x floatvector.
181 INSERTPS,
182
183 /// Insert the lower 8-bits of a 32-bit value to a vector,
184 /// corresponds to X86::PINSRB.
185 PINSRB,
186
187 /// Insert the lower 16-bits of a 32-bit value to a vector,
188 /// corresponds to X86::PINSRW.
189 PINSRW,
190
191 /// Shuffle 16 8-bit values within a vector.
192 PSHUFB,
193
194 /// Compute Sum of Absolute Differences.
195 PSADBW,
196 /// Compute Double Block Packed Sum-Absolute-Differences
197 DBPSADBW,
198
199 /// Bitwise Logical AND NOT of Packed FP values.
200 ANDNP,
201
202 /// Blend where the selector is an immediate.
203 BLENDI,
204
205 /// Dynamic (non-constant condition) vector blend where only the sign bits
206 /// of the condition elements are used. This is used to enforce that the
207 /// condition mask is not valid for generic VSELECT optimizations. This
208 /// is also used to implement the intrinsics.
209 /// Operands are in VSELECT order: MASK, TRUE, FALSE
210 BLENDV,
211
212 /// Combined add and sub on an FP vector.
213 ADDSUB,
214
215 // FP vector ops with rounding mode.
216 FADD_RND,
217 FADDS,
218 FADDS_RND,
219 FSUB_RND,
220 FSUBS,
221 FSUBS_RND,
222 FMUL_RND,
223 FMULS,
224 FMULS_RND,
225 FDIV_RND,
226 FDIVS,
227 FDIVS_RND,
228 FMAX_SAE,
229 FMAXS_SAE,
230 FMIN_SAE,
231 FMINS_SAE,
232 FSQRT_RND,
233 FSQRTS,
234 FSQRTS_RND,
235
236 // FP vector get exponent.
237 FGETEXP,
238 FGETEXP_SAE,
239 FGETEXPS,
240 FGETEXPS_SAE,
241 // Extract Normalized Mantissas.
242 VGETMANT,
243 VGETMANT_SAE,
244 VGETMANTS,
245 VGETMANTS_SAE,
246 // FP Scale.
247 SCALEF,
248 SCALEF_RND,
249 SCALEFS,
250 SCALEFS_RND,
251
252 /// Integer horizontal add/sub.
253 HADD,
254 HSUB,
255
256 /// Floating point horizontal add/sub.
257 FHADD,
258 FHSUB,
259
260 // Detect Conflicts Within a Vector
261 CONFLICT,
262
263 /// Floating point max and min.
264 FMAX,
265 FMIN,
266
267 /// Commutative FMIN and FMAX.
268 FMAXC,
269 FMINC,
270
271 /// Scalar intrinsic floating point max and min.
272 FMAXS,
273 FMINS,
274
275 /// Floating point reciprocal-sqrt and reciprocal approximation.
276 /// Note that these typically require refinement
277 /// in order to obtain suitable precision.
278 FRSQRT,
279 FRCP,
280
281 // AVX-512 reciprocal approximations with a little more precision.
282 RSQRT14,
283 RSQRT14S,
284 RCP14,
285 RCP14S,
286
287 // Thread Local Storage.
288 TLSADDR,
289
290 // Thread Local Storage. A call to get the start address
291 // of the TLS block for the current module.
292 TLSBASEADDR,
293
294 // Thread Local Storage. When calling to an OS provided
295 // thunk at the address from an earlier relocation.
296 TLSCALL,
297
298 // Thread Local Storage. A descriptor containing pointer to
299 // code and to argument to get the TLS offset for the symbol.
300 TLSDESC,
301
302 // Exception Handling helpers.
303 EH_RETURN,
304
305 // SjLj exception handling setjmp.
306 EH_SJLJ_SETJMP,
307
308 // SjLj exception handling longjmp.
309 EH_SJLJ_LONGJMP,
310
311 // SjLj exception handling dispatch.
312 EH_SJLJ_SETUP_DISPATCH,
313
314 /// Tail call return. See X86TargetLowering::LowerCall for
315 /// the list of operands.
316 TC_RETURN,
317
318 // Vector move to low scalar and zero higher vector elements.
319 VZEXT_MOVL,
320
321 // Vector integer truncate.
322 VTRUNC,
323 // Vector integer truncate with unsigned/signed saturation.
324 VTRUNCUS,
325 VTRUNCS,
326
327 // Masked version of the above. Used when less than a 128-bit result is
328 // produced since the mask only applies to the lower elements and can't
329 // be represented by a select.
330 // SRC, PASSTHRU, MASK
331 VMTRUNC,
332 VMTRUNCUS,
333 VMTRUNCS,
334
335 // Vector FP extend.
336 VFPEXT,
337 VFPEXT_SAE,
338 VFPEXTS,
339 VFPEXTS_SAE,
340
341 // Vector FP round.
342 VFPROUND,
343 VFPROUND_RND,
344 VFPROUNDS,
345 VFPROUNDS_RND,
346
347 // Masked version of above. Used for v2f64->v4f32.
348 // SRC, PASSTHRU, MASK
349 VMFPROUND,
350
351 // 128-bit vector logical left / right shift
352 VSHLDQ,
353 VSRLDQ,
354
355 // Vector shift elements
356 VSHL,
357 VSRL,
358 VSRA,
359
360 // Vector variable shift
361 VSHLV,
362 VSRLV,
363 VSRAV,
364
365 // Vector shift elements by immediate
366 VSHLI,
367 VSRLI,
368 VSRAI,
369
370 // Shifts of mask registers.
371 KSHIFTL,
372 KSHIFTR,
373
374 // Bit rotate by immediate
375 VROTLI,
376 VROTRI,
377
378 // Vector packed double/float comparison.
379 CMPP,
380
381 // Vector integer comparisons.
382 PCMPEQ,
383 PCMPGT,
384
385 // v8i16 Horizontal minimum and position.
386 PHMINPOS,
387
388 MULTISHIFT,
389
390 /// Vector comparison generating mask bits for fp and
391 /// integer signed and unsigned data types.
392 CMPM,
393 // Vector mask comparison generating mask bits for FP values.
394 CMPMM,
395 // Vector mask comparison with SAE for FP values.
396 CMPMM_SAE,
397
398 // Arithmetic operations with FLAGS results.
399 ADD,
400 SUB,
401 ADC,
402 SBB,
403 SMUL,
404 UMUL,
405 OR,
406 XOR,
407 AND,
408
409 // Bit field extract.
410 BEXTR,
411 BEXTRI,
412
413 // Zero High Bits Starting with Specified Bit Position.
414 BZHI,
415
416 // Parallel extract and deposit.
417 PDEP,
418 PEXT,
419
420 // X86-specific multiply by immediate.
421 MUL_IMM,
422
423 // Vector sign bit extraction.
424 MOVMSK,
425
426 // Vector bitwise comparisons.
427 PTEST,
428
429 // Vector packed fp sign bitwise comparisons.
430 TESTP,
431
432 // OR/AND test for masks.
433 KORTEST,
434 KTEST,
435
436 // ADD for masks.
437 KADD,
438
439 // Several flavors of instructions with vector shuffle behaviors.
440 // Saturated signed/unnsigned packing.
441 PACKSS,
442 PACKUS,
443 // Intra-lane alignr.
444 PALIGNR,
445 // AVX512 inter-lane alignr.
446 VALIGN,
447 PSHUFD,
448 PSHUFHW,
449 PSHUFLW,
450 SHUFP,
451 // VBMI2 Concat & Shift.
452 VSHLD,
453 VSHRD,
454 VSHLDV,
455 VSHRDV,
456 // Shuffle Packed Values at 128-bit granularity.
457 SHUF128,
458 MOVDDUP,
459 MOVSHDUP,
460 MOVSLDUP,
461 MOVLHPS,
462 MOVHLPS,
463 MOVSD,
464 MOVSS,
465 MOVSH,
466 UNPCKL,
467 UNPCKH,
468 VPERMILPV,
469 VPERMILPI,
470 VPERMI,
471 VPERM2X128,
472
473 // Variable Permute (VPERM).
474 // Res = VPERMV MaskV, V0
475 VPERMV,
476
477 // 3-op Variable Permute (VPERMT2).
478 // Res = VPERMV3 V0, MaskV, V1
479 VPERMV3,
480
481 // Bitwise ternary logic.
482 VPTERNLOG,
483 // Fix Up Special Packed Float32/64 values.
484 VFIXUPIMM,
485 VFIXUPIMM_SAE,
486 VFIXUPIMMS,
487 VFIXUPIMMS_SAE,
488 // Range Restriction Calculation For Packed Pairs of Float32/64 values.
489 VRANGE,
490 VRANGE_SAE,
491 VRANGES,
492 VRANGES_SAE,
493 // Reduce - Perform Reduction Transformation on scalar\packed FP.
494 VREDUCE,
495 VREDUCE_SAE,
496 VREDUCES,
497 VREDUCES_SAE,
498 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
499 // Also used by the legacy (V)ROUND intrinsics where we mask out the
500 // scaling part of the immediate.
501 VRNDSCALE,
502 VRNDSCALE_SAE,
503 VRNDSCALES,
504 VRNDSCALES_SAE,
505 // Tests Types Of a FP Values for packed types.
506 VFPCLASS,
507 // Tests Types Of a FP Values for scalar types.
508 VFPCLASSS,
509
510 // Broadcast (splat) scalar or element 0 of a vector. If the operand is
511 // a vector, this node may change the vector length as part of the splat.
512 VBROADCAST,
513 // Broadcast mask to vector.
514 VBROADCASTM,
515
516 /// SSE4A Extraction and Insertion.
517 EXTRQI,
518 INSERTQI,
519
520 // XOP arithmetic/logical shifts.
521 VPSHA,
522 VPSHL,
523 // XOP signed/unsigned integer comparisons.
524 VPCOM,
525 VPCOMU,
526 // XOP packed permute bytes.
527 VPPERM,
528 // XOP two source permutation.
529 VPERMIL2,
530
531 // Vector multiply packed unsigned doubleword integers.
532 PMULUDQ,
533 // Vector multiply packed signed doubleword integers.
534 PMULDQ,
535 // Vector Multiply Packed UnsignedIntegers with Round and Scale.
536 MULHRS,
537
538 // Multiply and Add Packed Integers.
539 VPMADDUBSW,
540 VPMADDWD,
541
542 // AVX512IFMA multiply and add.
543 // NOTE: These are different than the instruction and perform
544 // op0 x op1 + op2.
545 VPMADD52L,
546 VPMADD52H,
547
548 // VNNI
549 VPDPBUSD,
550 VPDPBUSDS,
551 VPDPWSSD,
552 VPDPWSSDS,
553
554 // FMA nodes.
555 // We use the target independent ISD::FMA for the non-inverted case.
556 FNMADD,
557 FMSUB,
558 FNMSUB,
559 FMADDSUB,
560 FMSUBADD,
561
562 // FMA with rounding mode.
563 FMADD_RND,
564 FNMADD_RND,
565 FMSUB_RND,
566 FNMSUB_RND,
567 FMADDSUB_RND,
568 FMSUBADD_RND,
569
570 // AVX512-FP16 complex addition and multiplication.
571 VFMADDC,
572 VFMADDC_RND,
573 VFCMADDC,
574 VFCMADDC_RND,
575
576 VFMULC,
577 VFMULC_RND,
578 VFCMULC,
579 VFCMULC_RND,
580
581 VFMADDCSH,
582 VFMADDCSH_RND,
583 VFCMADDCSH,
584 VFCMADDCSH_RND,
585
586 VFMULCSH,
587 VFMULCSH_RND,
588 VFCMULCSH,
589 VFCMULCSH_RND,
590
591 VPDPBSUD,
592 VPDPBSUDS,
593 VPDPBUUD,
594 VPDPBUUDS,
595 VPDPBSSD,
596 VPDPBSSDS,
597
598 // Compress and expand.
599 COMPRESS,
600 EXPAND,
601
602 // Bits shuffle
603 VPSHUFBITQMB,
604
605 // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
606 SINT_TO_FP_RND,
607 UINT_TO_FP_RND,
608 SCALAR_SINT_TO_FP,
609 SCALAR_UINT_TO_FP,
610 SCALAR_SINT_TO_FP_RND,
611 SCALAR_UINT_TO_FP_RND,
612
613 // Vector float/double to signed/unsigned integer.
614 CVTP2SI,
615 CVTP2UI,
616 CVTP2SI_RND,
617 CVTP2UI_RND,
618 // Scalar float/double to signed/unsigned integer.
619 CVTS2SI,
620 CVTS2UI,
621 CVTS2SI_RND,
622 CVTS2UI_RND,
623
624 // Vector float/double to signed/unsigned integer with truncation.
625 CVTTP2SI,
626 CVTTP2UI,
627 CVTTP2SI_SAE,
628 CVTTP2UI_SAE,
629 // Scalar float/double to signed/unsigned integer with truncation.
630 CVTTS2SI,
631 CVTTS2UI,
632 CVTTS2SI_SAE,
633 CVTTS2UI_SAE,
634
635 // Vector signed/unsigned integer to float/double.
636 CVTSI2P,
637 CVTUI2P,
638
639 // Masked versions of above. Used for v2f64->v4f32.
640 // SRC, PASSTHRU, MASK
641 MCVTP2SI,
642 MCVTP2UI,
643 MCVTTP2SI,
644 MCVTTP2UI,
645 MCVTSI2P,
646 MCVTUI2P,
647
648 // Vector float to bfloat16.
649 // Convert TWO packed single data to one packed BF16 data
650 CVTNE2PS2BF16,
651 // Convert packed single data to packed BF16 data
652 CVTNEPS2BF16,
653 // Masked version of above.
654 // SRC, PASSTHRU, MASK
655 MCVTNEPS2BF16,
656
657 // Dot product of BF16 pairs to accumulated into
658 // packed single precision.
659 DPBF16PS,
660
661 // A stack checking function call. On Windows it's _chkstk call.
662 DYN_ALLOCA,
663
664 // For allocating variable amounts of stack space when using
665 // segmented stacks. Check if the current stacklet has enough space, and
666 // falls back to heap allocation if not.
667 SEG_ALLOCA,
668
669 // For allocating stack space when using stack clash protector.
670 // Allocation is performed by block, and each block is probed.
671 PROBED_ALLOCA,
672
673 // Memory barriers.
674 MFENCE,
675
676 // Get a random integer and indicate whether it is valid in CF.
677 RDRAND,
678
679 // Get a NIST SP800-90B & C compliant random integer and
680 // indicate whether it is valid in CF.
681 RDSEED,
682
683 // Protection keys
684 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
685 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
686 // value for ECX.
687 RDPKRU,
688 WRPKRU,
689
690 // SSE42 string comparisons.
691 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
692 // will emit one or two instructions based on which results are used. If
693 // flags and index/mask this allows us to use a single instruction since
694 // we won't have to pick and opcode for flags. Instead we can rely on the
695 // DAG to CSE everything and decide at isel.
696 PCMPISTR,
697 PCMPESTR,
698
699 // Test if in transactional execution.
700 XTEST,
701
702 // ERI instructions.
703 RSQRT28,
704 RSQRT28_SAE,
705 RSQRT28S,
706 RSQRT28S_SAE,
707 RCP28,
708 RCP28_SAE,
709 RCP28S,
710 RCP28S_SAE,
711 EXP2,
712 EXP2_SAE,
713
714 // Conversions between float and half-float.
715 CVTPS2PH,
716 CVTPS2PH_SAE,
717 CVTPH2PS,
718 CVTPH2PS_SAE,
719
720 // Masked version of above.
721 // SRC, RND, PASSTHRU, MASK
722 MCVTPS2PH,
723 MCVTPS2PH_SAE,
724
725 // Galois Field Arithmetic Instructions
726 GF2P8AFFINEINVQB,
727 GF2P8AFFINEQB,
728 GF2P8MULB,
729
730 // LWP insert record.
731 LWPINS,
732
733 // User level wait
734 UMWAIT,
735 TPAUSE,
736
737 // Enqueue Stores Instructions
738 ENQCMD,
739 ENQCMDS,
740
741 // For avx512-vp2intersect
742 VP2INTERSECT,
743
744 // User level interrupts - testui
745 TESTUI,
746
747 // Perform an FP80 add after changing precision control in FPCW.
748 FP80_ADD,
749
750 /// X86 strict FP compare instructions.
751 STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
752 STRICT_FCMPS,
753
754 // Vector packed double/float comparison.
755 STRICT_CMPP,
756
757 /// Vector comparison generating mask bits for fp and
758 /// integer signed and unsigned data types.
759 STRICT_CMPM,
760
761 // Vector float/double to signed/unsigned integer with truncation.
762 STRICT_CVTTP2SI,
763 STRICT_CVTTP2UI,
764
765 // Vector FP extend.
766 STRICT_VFPEXT,
767
768 // Vector FP round.
769 STRICT_VFPROUND,
770
771 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
772 // Also used by the legacy (V)ROUND intrinsics where we mask out the
773 // scaling part of the immediate.
774 STRICT_VRNDSCALE,
775
776 // Vector signed/unsigned integer to float/double.
777 STRICT_CVTSI2P,
778 STRICT_CVTUI2P,
779
780 // Strict FMA nodes.
781 STRICT_FNMADD,
782 STRICT_FMSUB,
783 STRICT_FNMSUB,
784
785 // Conversions between float and half-float.
786 STRICT_CVTPS2PH,
787 STRICT_CVTPH2PS,
788
789 // Perform an FP80 add after changing precision control in FPCW.
790 STRICT_FP80_ADD,
791
792 // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
793 // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
794
795 // Compare and swap.
796 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
797 LCMPXCHG8_DAG,
798 LCMPXCHG16_DAG,
799 LCMPXCHG16_SAVE_RBX_DAG,
800
801 /// LOCK-prefixed arithmetic read-modify-write instructions.
802 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
803 LADD,
804 LSUB,
805 LOR,
806 LXOR,
807 LAND,
808 LBTS,
809 LBTC,
810 LBTR,
811 LBTS_RM,
812 LBTC_RM,
813 LBTR_RM,
814
815 /// RAO arithmetic instructions.
816 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
817 AADD,
818 AOR,
819 AXOR,
820 AAND,
821
822 // Load, scalar_to_vector, and zero extend.
823 VZEXT_LOAD,
824
825 // extract_vector_elt, store.
826 VEXTRACT_STORE,
827
828 // scalar broadcast from memory.
829 VBROADCAST_LOAD,
830
831 // subvector broadcast from memory.
832 SUBV_BROADCAST_LOAD,
833
834 // Store FP control word into i16 memory.
835 FNSTCW16m,
836
837 // Load FP control word from i16 memory.
838 FLDCW16m,
839
840 // Store x87 FPU environment into memory.
841 FNSTENVm,
842
843 // Load x87 FPU environment from memory.
844 FLDENVm,
845
846 /// This instruction implements FP_TO_SINT with the
847 /// integer destination in memory and a FP reg source. This corresponds
848 /// to the X86::FIST*m instructions and the rounding mode change stuff. It
849 /// has two inputs (token chain and address) and two outputs (int value
850 /// and token chain). Memory VT specifies the type to store to.
851 FP_TO_INT_IN_MEM,
852
853 /// This instruction implements SINT_TO_FP with the
854 /// integer source in memory and FP reg result. This corresponds to the
855 /// X86::FILD*m instructions. It has two inputs (token chain and address)
856 /// and two outputs (FP value and token chain). The integer source type is
857 /// specified by the memory VT.
858 FILD,
859
860 /// This instruction implements a fp->int store from FP stack
861 /// slots. This corresponds to the fist instruction. It takes a
862 /// chain operand, value to store, address, and glue. The memory VT
863 /// specifies the type to store as.
864 FIST,
865
866 /// This instruction implements an extending load to FP stack slots.
867 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
868 /// operand, and ptr to load from. The memory VT specifies the type to
869 /// load from.
870 FLD,
871
872 /// This instruction implements a truncating store from FP stack
873 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
874 /// chain operand, value to store, address, and glue. The memory VT
875 /// specifies the type to store as.
876 FST,
877
878 /// These instructions grab the address of the next argument
879 /// from a va_list. (reads and modifies the va_list in memory)
880 VAARG_64,
881 VAARG_X32,
882
883 // Vector truncating store with unsigned/signed saturation
884 VTRUNCSTOREUS,
885 VTRUNCSTORES,
886 // Vector truncating masked store with unsigned/signed saturation
887 VMTRUNCSTOREUS,
888 VMTRUNCSTORES,
889
890 // X86 specific gather and scatter
891 MGATHER,
892 MSCATTER,
893
894 // Key locker nodes that produce flags.
895 AESENC128KL,
896 AESDEC128KL,
897 AESENC256KL,
898 AESDEC256KL,
899 AESENCWIDE128KL,
900 AESDECWIDE128KL,
901 AESENCWIDE256KL,
902 AESDECWIDE256KL,
903
904 /// Compare and Add if Condition is Met. Compare value in operand 2 with
905 /// value in memory of operand 1. If condition of operand 4 is met, add
906 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
907 /// always updated with the original value from operand 1.
908 CMPCCXADD,
909
910 // Save xmm argument registers to the stack, according to %al. An operator
911 // is needed so that this can be expanded with control flow.
912 VASTART_SAVE_XMM_REGS,
913
914 // WARNING: Do not add anything in the end unless you want the node to
915 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
916 // opcodes will be thought as target memory ops!
917 };
918 } // end namespace X86ISD
919
920 namespace X86 {
921 /// Current rounding mode is represented in bits 11:10 of FPSR. These
922 /// values are same as corresponding constants for rounding mode used
923 /// in glibc.
924 enum RoundingMode {
925 rmToNearest = 0, // FE_TONEAREST
926 rmDownward = 1 << 10, // FE_DOWNWARD
927 rmUpward = 2 << 10, // FE_UPWARD
928 rmTowardZero = 3 << 10, // FE_TOWARDZERO
929 rmMask = 3 << 10 // Bit mask selecting rounding mode
930 };
931 }
932
933 /// Define some predicates that are used for node matching.
934 namespace X86 {
935 /// Returns true if Elt is a constant zero or floating point constant +0.0.
936 bool isZeroNode(SDValue Elt);
937
938 /// Returns true of the given offset can be
939 /// fit into displacement field of the instruction.
940 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
941 bool hasSymbolicDisplacement);
942
943 /// Determines whether the callee is required to pop its
944 /// own arguments. Callee pop is necessary to support tail calls.
945 bool isCalleePop(CallingConv::ID CallingConv,
946 bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
947
948 /// If Op is a constant whose elements are all the same constant or
949 /// undefined, return true and return the constant value in \p SplatVal.
950 /// If we have undef bits that don't cover an entire element, we treat these
951 /// as zero if AllowPartialUndefs is set, else we fail and return false.
952 bool isConstantSplat(SDValue Op, APInt &SplatVal,
953 bool AllowPartialUndefs = true);
954
955 /// Check if Op is a load operation that could be folded into some other x86
956 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
957 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
958 bool AssumeSingleUse = false);
959
960 /// Check if Op is a load operation that could be folded into a vector splat
961 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
962 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
963 const X86Subtarget &Subtarget,
964 bool AssumeSingleUse = false);
965
966 /// Check if Op is a value that could be used to fold a store into some
967 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
968 bool mayFoldIntoStore(SDValue Op);
969
970 /// Check if Op is an operation that could be folded into a zero extend x86
971 /// instruction.
972 bool mayFoldIntoZeroExtend(SDValue Op);
973
974 /// True if the target supports the extended frame for async Swift
975 /// functions.
976 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
977 const MachineFunction &MF);
978 } // end namespace X86
979
980 //===--------------------------------------------------------------------===//
981 // X86 Implementation of the TargetLowering interface
982 class X86TargetLowering final : public TargetLowering {
983 public:
984 explicit X86TargetLowering(const X86TargetMachine &TM,
985 const X86Subtarget &STI);
986
987 unsigned getJumpTableEncoding() const override;
988 bool useSoftFloat() const override;
989
990 void markLibCallAttributes(MachineFunction *MF, unsigned CC,
991 ArgListTy &Args) const override;
992
993 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
994 return MVT::i8;
995 }
996
997 const MCExpr *
998 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
999 const MachineBasicBlock *MBB, unsigned uid,
1000 MCContext &Ctx) const override;
1001
1002 /// Returns relocation base for the given PIC jumptable.
1003 SDValue getPICJumpTableRelocBase(SDValue Table,
1004 SelectionDAG &DAG) const override;
1005 const MCExpr *
1006 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1007 unsigned JTI, MCContext &Ctx) const override;
1008
1009 /// Return the desired alignment for ByVal aggregate
1010 /// function arguments in the caller parameter area. For X86, aggregates
1011 /// that contains are placed at 16-byte boundaries while the rest are at
1012 /// 4-byte boundaries.
1013 uint64_t getByValTypeAlignment(Type *Ty,
1014 const DataLayout &DL) const override;
1015
1016 EVT getOptimalMemOpType(const MemOp &Op,
1017 const AttributeList &FuncAttributes) const override;
1018
1019 /// Returns true if it's safe to use load / store of the
1020 /// specified type to expand memcpy / memset inline. This is mostly true
1021 /// for all types except for some special cases. For example, on X86
1022 /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1023 /// also does type conversion. Note the specified type doesn't have to be
1024 /// legal as the hook is used before type legalization.
1025 bool isSafeMemOpType(MVT VT) const override;
1026
1027 bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1028
1029 /// Returns true if the target allows unaligned memory accesses of the
1030 /// specified type. Returns whether it is "fast" in the last argument.
1031 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1032 MachineMemOperand::Flags Flags,
1033 unsigned *Fast) const override;
1034
1035 /// This function returns true if the memory access is aligned or if the
1036 /// target allows this specific unaligned memory access. If the access is
1037 /// allowed, the optional final parameter returns a relative speed of the
1038 /// access (as defined by the target).
1039 bool allowsMemoryAccess(
1040 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1041 Align Alignment,
1042 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1043 unsigned *Fast = nullptr) const override;
1044
1045 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1046 const MachineMemOperand &MMO,
1047 unsigned *Fast) const {
1048 return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(),
1049 Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast);
1050 }
1051
1052 /// Provide custom lowering hooks for some operations.
1053 ///
1054 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1055
1056 /// Replace the results of node with an illegal result
1057 /// type with new values built out of custom code.
1058 ///
1059 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1060 SelectionDAG &DAG) const override;
1061
1062 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1063
1064 bool preferABDSToABSWithNSW(EVT VT) const override;
1065
1066 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1067 EVT ExtVT) const override;
1068
1069 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1070 EVT VT) const override;
1071
1072 /// Return true if the target has native support for
1073 /// the specified value type and it is 'desirable' to use the type for the
1074 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1075 /// instruction encodings are longer and some i16 instructions are slow.
1076 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1077
1078 /// Return true if the target has native support for the
1079 /// specified value type and it is 'desirable' to use the type. e.g. On x86
1080 /// i16 is legal, but undesirable since i16 instruction encodings are longer
1081 /// and some i16 instructions are slow.
1082 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1083
1084 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1085 /// integer, None otherwise.
1086 TargetLowering::AndOrSETCCFoldKind
1087 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1088 const SDNode *SETCC0,
1089 const SDNode *SETCC1) const override;
1090
1091 /// Return the newly negated expression if the cost is not expensive and
1092 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1093 /// do the negation.
1094 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1095 bool LegalOperations, bool ForCodeSize,
1096 NegatibleCost &Cost,
1097 unsigned Depth) const override;
1098
1099 MachineBasicBlock *
1100 EmitInstrWithCustomInserter(MachineInstr &MI,
1101 MachineBasicBlock *MBB) const override;
1102
1103 /// This method returns the name of a target specific DAG node.
1104 const char *getTargetNodeName(unsigned Opcode) const override;
1105
1106 /// Do not merge vector stores after legalization because that may conflict
1107 /// with x86-specific store splitting optimizations.
1108 bool mergeStoresAfterLegalization(EVT MemVT) const override {
1109 return !MemVT.isVector();
1110 }
1111
1112 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1113 const MachineFunction &MF) const override;
1114
1115 bool isCheapToSpeculateCttz(Type *Ty) const override;
1116
1117 bool isCheapToSpeculateCtlz(Type *Ty) const override;
1118
1119 bool isCtlzFast() const override;
1120
1121 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1122 // If the pair to store is a mixture of float and int values, we will
1123 // save two bitwise instructions and one float-to-int instruction and
1124 // increase one store instruction. There is potentially a more
1125 // significant benefit because it avoids the float->int domain switch
1126 // for input value. So It is more likely a win.
1127 if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1128 (LTy.isInteger() && HTy.isFloatingPoint()))
1129 return true;
1130 // If the pair only contains int values, we will save two bitwise
1131 // instructions and increase one store instruction (costing one more
1132 // store buffer). Since the benefit is more blurred so we leave
1133 // such pair out until we get testcase to prove it is a win.
1134 return false;
1135 }
1136
1137 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1138
1139 bool hasAndNotCompare(SDValue Y) const override;
1140
1141 bool hasAndNot(SDValue Y) const override;
1142
1143 bool hasBitTest(SDValue X, SDValue Y) const override;
1144
1145 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1146 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1147 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1148 SelectionDAG &DAG) const override;
1149
1150 unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1151 EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1152 const APInt &ShiftOrRotateAmt,
1153 const std::optional<APInt> &AndMask) const override;
1154
1155 bool preferScalarizeSplat(SDNode *N) const override;
1156
1157 CondMergingParams
1158 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1159 const Value *Rhs) const override;
1160
1161 bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1162 CombineLevel Level) const override;
1163
1164 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1165
1166 bool
1167 shouldTransformSignedTruncationCheck(EVT XVT,
1168 unsigned KeptBits) const override {
1169 // For vectors, we don't have a preference..
1170 if (XVT.isVector())
1171 return false;
1172
1173 auto VTIsOk = [](EVT VT) -> bool {
1174 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1175 VT == MVT::i64;
1176 };
1177
1178 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1179 // XVT will be larger than KeptBitsVT.
1180 MVT KeptBitsVT = MVT::getIntegerVT(BitWidth: KeptBits);
1181 return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1182 }
1183
1184 ShiftLegalizationStrategy
1185 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1186 unsigned ExpansionFactor) const override;
1187
1188 bool shouldSplatInsEltVarIndex(EVT VT) const override;
1189
1190 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1191 // Converting to sat variants holds little benefit on X86 as we will just
1192 // need to saturate the value back using fp arithmatic.
1193 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1194 }
1195
1196 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1197 return VT.isScalarInteger();
1198 }
1199
1200 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1201 MVT hasFastEqualityCompare(unsigned NumBits) const override;
1202
1203 /// Return the value type to use for ISD::SETCC.
1204 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1205 EVT VT) const override;
1206
1207 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1208 const APInt &DemandedElts,
1209 TargetLoweringOpt &TLO) const override;
1210
1211 /// Determine which of the bits specified in Mask are known to be either
1212 /// zero or one and return them in the KnownZero/KnownOne bitsets.
1213 void computeKnownBitsForTargetNode(const SDValue Op,
1214 KnownBits &Known,
1215 const APInt &DemandedElts,
1216 const SelectionDAG &DAG,
1217 unsigned Depth = 0) const override;
1218
1219 /// Determine the number of bits in the operation that are sign bits.
1220 unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1221 const APInt &DemandedElts,
1222 const SelectionDAG &DAG,
1223 unsigned Depth) const override;
1224
1225 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1226 const APInt &DemandedElts,
1227 APInt &KnownUndef,
1228 APInt &KnownZero,
1229 TargetLoweringOpt &TLO,
1230 unsigned Depth) const override;
1231
1232 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1233 const APInt &DemandedElts,
1234 unsigned MaskIndex,
1235 TargetLoweringOpt &TLO,
1236 unsigned Depth) const;
1237
1238 bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1239 const APInt &DemandedBits,
1240 const APInt &DemandedElts,
1241 KnownBits &Known,
1242 TargetLoweringOpt &TLO,
1243 unsigned Depth) const override;
1244
1245 SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1246 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1247 SelectionDAG &DAG, unsigned Depth) const override;
1248
1249 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1250 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1251 bool PoisonOnly, unsigned Depth) const override;
1252
1253 bool canCreateUndefOrPoisonForTargetNode(
1254 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1255 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1256
1257 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1258 APInt &UndefElts, const SelectionDAG &DAG,
1259 unsigned Depth) const override;
1260
1261 bool isTargetCanonicalConstantNode(SDValue Op) const override {
1262 // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1263 // vector from memory.
1264 while (Op.getOpcode() == ISD::BITCAST ||
1265 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1266 (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1267 Op.getOperand(i: 0).isUndef()))
1268 Op = Op.getOperand(i: Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1269
1270 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1271 TargetLowering::isTargetCanonicalConstantNode(Op);
1272 }
1273
1274 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1275
1276 SDValue unwrapAddress(SDValue N) const override;
1277
1278 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1279
1280 bool ExpandInlineAsm(CallInst *CI) const override;
1281
1282 ConstraintType getConstraintType(StringRef Constraint) const override;
1283
1284 /// Examine constraint string and operand type and determine a weight value.
1285 /// The operand object must already have been set up with the operand type.
1286 ConstraintWeight
1287 getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1288 const char *Constraint) const override;
1289
1290 const char *LowerXConstraint(EVT ConstraintVT) const override;
1291
1292 /// Lower the specified operand into the Ops vector. If it is invalid, don't
1293 /// add anything to Ops. If hasMemory is true it means one of the asm
1294 /// constraint of the inline asm instruction being processed is 'm'.
1295 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1296 std::vector<SDValue> &Ops,
1297 SelectionDAG &DAG) const override;
1298
1299 InlineAsm::ConstraintCode
1300 getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1301 if (ConstraintCode == "v")
1302 return InlineAsm::ConstraintCode::v;
1303 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1304 }
1305
1306 /// Handle Lowering flag assembly outputs.
1307 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1308 const SDLoc &DL,
1309 const AsmOperandInfo &Constraint,
1310 SelectionDAG &DAG) const override;
1311
1312 /// Given a physical register constraint
1313 /// (e.g. {edx}), return the register number and the register class for the
1314 /// register. This should only be used for C_Register constraints. On
1315 /// error, this returns a register number of 0.
1316 std::pair<unsigned, const TargetRegisterClass *>
1317 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1318 StringRef Constraint, MVT VT) const override;
1319
1320 /// Return true if the addressing mode represented
1321 /// by AM is legal for this target, for a load/store of the specified type.
1322 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1323 Type *Ty, unsigned AS,
1324 Instruction *I = nullptr) const override;
1325
1326 bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1327
1328 /// Return true if the specified immediate is legal
1329 /// icmp immediate, that is the target has icmp instructions which can
1330 /// compare a register against the immediate without having to materialize
1331 /// the immediate into a register.
1332 bool isLegalICmpImmediate(int64_t Imm) const override;
1333
1334 /// Return true if the specified immediate is legal
1335 /// add immediate, that is the target has add instructions which can
1336 /// add a register and the immediate without having to materialize
1337 /// the immediate into a register.
1338 bool isLegalAddImmediate(int64_t Imm) const override;
1339
1340 bool isLegalStoreImmediate(int64_t Imm) const override;
1341
1342 /// This is used to enable splatted operand transforms for vector shifts
1343 /// and vector funnel shifts.
1344 bool isVectorShiftByScalarCheap(Type *Ty) const override;
1345
1346 /// Add x86-specific opcodes to the default list.
1347 bool isBinOp(unsigned Opcode) const override;
1348
1349 /// Returns true if the opcode is a commutative binary operation.
1350 bool isCommutativeBinOp(unsigned Opcode) const override;
1351
1352 /// Return true if it's free to truncate a value of
1353 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1354 /// register EAX to i16 by referencing its sub-register AX.
1355 bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1356 bool isTruncateFree(EVT VT1, EVT VT2) const override;
1357
1358 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1359
1360 /// Return true if any actual instruction that defines a
1361 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1362 /// register. This does not necessarily include registers defined in
1363 /// unknown ways, such as incoming arguments, or copies from unknown
1364 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1365 /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1366 /// all instructions that define 32-bit values implicit zero-extend the
1367 /// result out to 64 bits.
1368 bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1369 bool isZExtFree(EVT VT1, EVT VT2) const override;
1370 bool isZExtFree(SDValue Val, EVT VT2) const override;
1371
1372 bool shouldSinkOperands(Instruction *I,
1373 SmallVectorImpl<Use *> &Ops) const override;
1374 bool shouldConvertPhiType(Type *From, Type *To) const override;
1375
1376 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1377 /// extend node) is profitable.
1378 bool isVectorLoadExtDesirable(SDValue) const override;
1379
1380 /// Return true if an FMA operation is faster than a pair of fmul and fadd
1381 /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1382 /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1383 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1384 EVT VT) const override;
1385
1386 /// Return true if it's profitable to narrow operations of type SrcVT to
1387 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1388 /// from i32 to i16.
1389 bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1390
1391 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1392 EVT VT) const override;
1393
1394 /// Given an intrinsic, checks if on the target the intrinsic will need to map
1395 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1396 /// true and stores the intrinsic information into the IntrinsicInfo that was
1397 /// passed to the function.
1398 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1399 MachineFunction &MF,
1400 unsigned Intrinsic) const override;
1401
1402 /// Returns true if the target can instruction select the
1403 /// specified FP immediate natively. If false, the legalizer will
1404 /// materialize the FP immediate as a load from a constant pool.
1405 bool isFPImmLegal(const APFloat &Imm, EVT VT,
1406 bool ForCodeSize) const override;
1407
1408 /// Targets can use this to indicate that they only support *some*
1409 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1410 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1411 /// be legal.
1412 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1413
1414 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1415 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1416 /// constant pool entry.
1417 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1418
1419 /// Returns true if lowering to a jump table is allowed.
1420 bool areJTsAllowed(const Function *Fn) const override;
1421
1422 MVT getPreferredSwitchConditionType(LLVMContext &Context,
1423 EVT ConditionVT) const override;
1424
1425 /// If true, then instruction selection should
1426 /// seek to shrink the FP constant of the specified type to a smaller type
1427 /// in order to save space and / or reduce runtime.
1428 bool ShouldShrinkFPConstant(EVT VT) const override;
1429
1430 /// Return true if we believe it is correct and profitable to reduce the
1431 /// load node to a smaller type.
1432 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1433 EVT NewVT) const override;
1434
1435 /// Return true if the specified scalar FP type is computed in an SSE
1436 /// register, not on the X87 floating point stack.
1437 bool isScalarFPTypeInSSEReg(EVT VT) const;
1438
1439 /// Returns true if it is beneficial to convert a load of a constant
1440 /// to just the constant itself.
1441 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1442 Type *Ty) const override;
1443
1444 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1445
1446 bool convertSelectOfConstantsToMath(EVT VT) const override;
1447
1448 bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1449 SDValue C) const override;
1450
1451 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1452 /// with this index.
1453 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1454 unsigned Index) const override;
1455
1456 /// Scalar ops always have equal or better analysis/performance/power than
1457 /// the vector equivalent, so this always makes sense if the scalar op is
1458 /// supported.
1459 bool shouldScalarizeBinop(SDValue) const override;
1460
1461 /// Extract of a scalar FP value from index 0 of a vector is free.
1462 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1463 EVT EltVT = VT.getScalarType();
1464 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1465 }
1466
1467 /// Overflow nodes should get combined/lowered to optimal instructions
1468 /// (they should allow eliminating explicit compares by getting flags from
1469 /// math ops).
1470 bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1471 bool MathUsed) const override;
1472
1473 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1474 unsigned AddrSpace) const override {
1475 // If we can replace more than 2 scalar stores, there will be a reduction
1476 // in instructions even after we add a vector constant load.
1477 return IsZero || NumElem > 2;
1478 }
1479
1480 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1481 const SelectionDAG &DAG,
1482 const MachineMemOperand &MMO) const override;
1483
1484 /// Intel processors have a unified instruction and data cache
1485 const char * getClearCacheBuiltinName() const override {
1486 return nullptr; // nothing to do, move along.
1487 }
1488
1489 Register getRegisterByName(const char* RegName, LLT VT,
1490 const MachineFunction &MF) const override;
1491
1492 /// If a physical register, this returns the register that receives the
1493 /// exception address on entry to an EH pad.
1494 Register
1495 getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1496
1497 /// If a physical register, this returns the register that receives the
1498 /// exception typeid on entry to a landing pad.
1499 Register
1500 getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1501
1502 bool needsFixedCatchObjects() const override;
1503
1504 /// This method returns a target specific FastISel object,
1505 /// or null if the target does not support "fast" ISel.
1506 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1507 const TargetLibraryInfo *libInfo) const override;
1508
1509 /// If the target has a standard location for the stack protector cookie,
1510 /// returns the address of that location. Otherwise, returns nullptr.
1511 Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1512
1513 bool useLoadStackGuardNode() const override;
1514 bool useStackGuardXorFP() const override;
1515 void insertSSPDeclarations(Module &M) const override;
1516 Value *getSDagStackGuard(const Module &M) const override;
1517 Function *getSSPStackGuardCheck(const Module &M) const override;
1518 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1519 const SDLoc &DL) const override;
1520
1521
1522 /// Return true if the target stores SafeStack pointer at a fixed offset in
1523 /// some non-standard address space, and populates the address space and
1524 /// offset as appropriate.
1525 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1526
1527 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1528 SDValue Chain, SDValue Pointer,
1529 MachinePointerInfo PtrInfo,
1530 Align Alignment,
1531 SelectionDAG &DAG) const;
1532
1533 /// Customize the preferred legalization strategy for certain types.
1534 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1535
1536 bool softPromoteHalfType() const override { return true; }
1537
1538 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1539 EVT VT) const override;
1540
1541 unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1542 CallingConv::ID CC,
1543 EVT VT) const override;
1544
1545 unsigned getVectorTypeBreakdownForCallingConv(
1546 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1547 unsigned &NumIntermediates, MVT &RegisterVT) const override;
1548
1549 bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1550
1551 bool supportSwiftError() const override;
1552
1553 bool supportKCFIBundles() const override { return true; }
1554
1555 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1556 MachineBasicBlock::instr_iterator &MBBI,
1557 const TargetInstrInfo *TII) const override;
1558
1559 bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1560 bool hasInlineStackProbe(const MachineFunction &MF) const override;
1561 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1562
1563 unsigned getStackProbeSize(const MachineFunction &MF) const;
1564
1565 bool hasVectorBlend() const override { return true; }
1566
1567 unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1568
1569 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1570 unsigned OpNo) const override;
1571
1572 /// Lower interleaved load(s) into target specific
1573 /// instructions/intrinsics.
1574 bool lowerInterleavedLoad(LoadInst *LI,
1575 ArrayRef<ShuffleVectorInst *> Shuffles,
1576 ArrayRef<unsigned> Indices,
1577 unsigned Factor) const override;
1578
1579 /// Lower interleaved store(s) into target specific
1580 /// instructions/intrinsics.
1581 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1582 unsigned Factor) const override;
1583
1584 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1585 int JTI, SelectionDAG &DAG) const override;
1586
1587 Align getPrefLoopAlignment(MachineLoop *ML) const override;
1588
1589 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1590 if (VT == MVT::f80)
1591 return EVT::getIntegerVT(Context, BitWidth: 96);
1592 return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1593 }
1594
1595 protected:
1596 std::pair<const TargetRegisterClass *, uint8_t>
1597 findRepresentativeClass(const TargetRegisterInfo *TRI,
1598 MVT VT) const override;
1599
1600 private:
1601 /// Keep a reference to the X86Subtarget around so that we can
1602 /// make the right decision when generating code for different targets.
1603 const X86Subtarget &Subtarget;
1604
1605 /// A list of legal FP immediates.
1606 std::vector<APFloat> LegalFPImmediates;
1607
1608 /// Indicate that this x86 target can instruction
1609 /// select the specified FP immediate natively.
1610 void addLegalFPImmediate(const APFloat& Imm) {
1611 LegalFPImmediates.push_back(x: Imm);
1612 }
1613
1614 SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1615 CallingConv::ID CallConv, bool isVarArg,
1616 const SmallVectorImpl<ISD::InputArg> &Ins,
1617 const SDLoc &dl, SelectionDAG &DAG,
1618 SmallVectorImpl<SDValue> &InVals,
1619 uint32_t *RegMask) const;
1620 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1621 const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1622 const SDLoc &dl, SelectionDAG &DAG,
1623 const CCValAssign &VA, MachineFrameInfo &MFI,
1624 unsigned i) const;
1625 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1626 const SDLoc &dl, SelectionDAG &DAG,
1627 const CCValAssign &VA,
1628 ISD::ArgFlagsTy Flags, bool isByval) const;
1629
1630 // Call lowering helpers.
1631
1632 /// Check whether the call is eligible for tail call optimization. Targets
1633 /// that want to do tail call optimization should implement this function.
1634 bool IsEligibleForTailCallOptimization(
1635 SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1636 bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1637 const SmallVectorImpl<SDValue> &OutVals,
1638 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1639 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1640 SDValue Chain, bool IsTailCall,
1641 bool Is64Bit, int FPDiff,
1642 const SDLoc &dl) const;
1643
1644 unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1645 SelectionDAG &DAG) const;
1646
1647 unsigned getAddressSpace() const;
1648
1649 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1650 SDValue &Chain) const;
1651 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1652
1653 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1654 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1655 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1656 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1657
1658 unsigned getGlobalWrapperKind(const GlobalValue *GV,
1659 const unsigned char OpFlags) const;
1660 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1661 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1662 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1663 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1664 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1665
1666 /// Creates target global address or external symbol nodes for calls or
1667 /// other uses.
1668 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1669 bool ForCall) const;
1670
1671 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1672 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1673 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1674 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1675 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1676 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1677 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1678 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1679 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1680 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1681 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1682 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1683 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1684 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1685 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1686 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1687 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1688 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1689 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1690 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1691 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1692 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1693 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1694 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1695 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1696 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1697 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1698 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1699 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1700 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1701 SDValue &Chain) const;
1702 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1703 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1704 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1705 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1706 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1707 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1708 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1709
1710 SDValue
1711 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1712 const SmallVectorImpl<ISD::InputArg> &Ins,
1713 const SDLoc &dl, SelectionDAG &DAG,
1714 SmallVectorImpl<SDValue> &InVals) const override;
1715 SDValue LowerCall(CallLoweringInfo &CLI,
1716 SmallVectorImpl<SDValue> &InVals) const override;
1717
1718 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1719 const SmallVectorImpl<ISD::OutputArg> &Outs,
1720 const SmallVectorImpl<SDValue> &OutVals,
1721 const SDLoc &dl, SelectionDAG &DAG) const override;
1722
1723 bool supportSplitCSR(MachineFunction *MF) const override {
1724 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1725 MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1726 }
1727 void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1728 void insertCopiesSplitCSR(
1729 MachineBasicBlock *Entry,
1730 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1731
1732 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1733
1734 bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1735
1736 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1737 ISD::NodeType ExtendKind) const override;
1738
1739 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1740 bool isVarArg,
1741 const SmallVectorImpl<ISD::OutputArg> &Outs,
1742 LLVMContext &Context) const override;
1743
1744 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1745 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1746
1747 TargetLoweringBase::AtomicExpansionKind
1748 shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1749 TargetLoweringBase::AtomicExpansionKind
1750 shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1751 TargetLoweringBase::AtomicExpansionKind
1752 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1753 TargetLoweringBase::AtomicExpansionKind
1754 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1755 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1756 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1757
1758 LoadInst *
1759 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1760
1761 bool needsCmpXchgNb(Type *MemType) const;
1762
1763 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1764 MachineBasicBlock *DispatchBB, int FI) const;
1765
1766 // Utility function to emit the low-level va_arg code for X86-64.
1767 MachineBasicBlock *
1768 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1769
1770 /// Utility function to emit the xmm reg save portion of va_start.
1771 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1772 MachineInstr &MI2,
1773 MachineBasicBlock *BB) const;
1774
1775 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1776 MachineBasicBlock *BB) const;
1777
1778 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1779 MachineBasicBlock *BB) const;
1780
1781 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1782 MachineBasicBlock *BB) const;
1783
1784 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1785 MachineBasicBlock *BB) const;
1786
1787 MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1788 MachineBasicBlock *BB) const;
1789
1790 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1791 MachineBasicBlock *BB) const;
1792
1793 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1794 MachineBasicBlock *BB) const;
1795
1796 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1797 MachineBasicBlock *MBB) const;
1798
1799 void emitSetJmpShadowStackFix(MachineInstr &MI,
1800 MachineBasicBlock *MBB) const;
1801
1802 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1803 MachineBasicBlock *MBB) const;
1804
1805 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1806 MachineBasicBlock *MBB) const;
1807
1808 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1809 MachineBasicBlock *MBB) const;
1810
1811 /// Emit flags for the given setcc condition and operands. Also returns the
1812 /// corresponding X86 condition code constant in X86CC.
1813 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1814 const SDLoc &dl, SelectionDAG &DAG,
1815 SDValue &X86CC) const;
1816
1817 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1818 SDValue IntPow2) const override;
1819
1820 /// Check if replacement of SQRT with RSQRT should be disabled.
1821 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1822
1823 /// Use rsqrt* to speed up sqrt calculations.
1824 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1825 int &RefinementSteps, bool &UseOneConstNR,
1826 bool Reciprocal) const override;
1827
1828 /// Use rcp* to speed up fdiv calculations.
1829 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1830 int &RefinementSteps) const override;
1831
1832 /// Reassociate floating point divisions into multiply by reciprocal.
1833 unsigned combineRepeatedFPDivisors() const override;
1834
1835 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1836 SmallVectorImpl<SDNode *> &Created) const override;
1837
1838 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1839 SDValue V2) const;
1840 };
1841
1842 namespace X86 {
1843 FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1844 const TargetLibraryInfo *libInfo);
1845 } // end namespace X86
1846
1847 // X86 specific Gather/Scatter nodes.
1848 // The class has the same order of operands as MaskedGatherScatterSDNode for
1849 // convenience.
1850 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1851 public:
1852 // This is a intended as a utility and should never be directly created.
1853 X86MaskedGatherScatterSDNode() = delete;
1854 ~X86MaskedGatherScatterSDNode() = delete;
1855
1856 const SDValue &getBasePtr() const { return getOperand(Num: 3); }
1857 const SDValue &getIndex() const { return getOperand(Num: 4); }
1858 const SDValue &getMask() const { return getOperand(Num: 2); }
1859 const SDValue &getScale() const { return getOperand(Num: 5); }
1860
1861 static bool classof(const SDNode *N) {
1862 return N->getOpcode() == X86ISD::MGATHER ||
1863 N->getOpcode() == X86ISD::MSCATTER;
1864 }
1865 };
1866
1867 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1868 public:
1869 const SDValue &getPassThru() const { return getOperand(Num: 1); }
1870
1871 static bool classof(const SDNode *N) {
1872 return N->getOpcode() == X86ISD::MGATHER;
1873 }
1874 };
1875
1876 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1877 public:
1878 const SDValue &getValue() const { return getOperand(Num: 1); }
1879
1880 static bool classof(const SDNode *N) {
1881 return N->getOpcode() == X86ISD::MSCATTER;
1882 }
1883 };
1884
1885 /// Generate unpacklo/unpackhi shuffle mask.
1886 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1887 bool Unary);
1888
1889 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1890 /// imposed by AVX and specific to the unary pattern. Example:
1891 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1892 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1893 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1894
1895} // end namespace llvm
1896
1897#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1898

source code of llvm/lib/Target/X86/X86ISelLowering.h