AVRISelLowering.cpp source code [llvm/lib/Target/AVR/AVRISelLowering.cpp]

1	//===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the interfaces that AVR uses to lower LLVM code into a
10	// selection DAG.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "AVRISelLowering.h"
15
16	#include "llvm/ADT/ArrayRef.h"
17	#include "llvm/ADT/STLExtras.h"
18	#include "llvm/ADT/StringSwitch.h"
19	#include "llvm/CodeGen/CallingConvLower.h"
20	#include "llvm/CodeGen/MachineFrameInfo.h"
21	#include "llvm/CodeGen/MachineInstrBuilder.h"
22	#include "llvm/CodeGen/MachineRegisterInfo.h"
23	#include "llvm/CodeGen/SelectionDAG.h"
24	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
25	#include "llvm/IR/Function.h"
26	#include "llvm/Support/ErrorHandling.h"
27
28	#include "AVR.h"
29	#include "AVRMachineFunctionInfo.h"
30	#include "AVRSubtarget.h"
31	#include "AVRTargetMachine.h"
32	#include "MCTargetDesc/AVRMCTargetDesc.h"
33
34	namespace llvm {
35
36	AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
37	const AVRSubtarget &STI)
38	: TargetLowering(TM), Subtarget(STI) {
39	// Set up the register classes.
40	addRegisterClass(MVT::VT: i8, RC: &AVR::GPR8RegClass);
41	addRegisterClass(MVT::VT: i16, RC: &AVR::DREGSRegClass);
42
43	// Compute derived properties from the register classes.
44	computeRegisterProperties(Subtarget.getRegisterInfo());
45
46	setBooleanContents(ZeroOrOneBooleanContent);
47	setBooleanVectorContents(ZeroOrOneBooleanContent);
48	setSchedulingPreference(Sched::RegPressure);
49	setStackPointerRegisterToSaveRestore(AVR::SP);
50	setSupportsUnalignedAtomics(true);
51
52	setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
53	setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
54
55	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
56	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
57	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
58	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
59
60	setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
61
62	for (MVT VT : MVT::integer_valuetypes()) {
63	for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
64	setLoadExtAction(N, VT, MVT::i1, Promote);
65	setLoadExtAction(N, VT, MVT::i8, Expand);
66	}
67	}
68
69	setTruncStoreAction(MVT::ValVT: i16, MVT::MemVT: i8, Action: Expand);
70
71	for (MVT VT : MVT::integer_valuetypes()) {
72	setOperationAction(ISD::ADDC, VT, Legal);
73	setOperationAction(ISD::SUBC, VT, Legal);
74	setOperationAction(ISD::ADDE, VT, Legal);
75	setOperationAction(ISD::SUBE, VT, Legal);
76	}
77
78	// sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
79	// revert into a sub since we don't have an add with immediate instruction.
80	setOperationAction(ISD::ADD, MVT::i32, Custom);
81	setOperationAction(ISD::ADD, MVT::i64, Custom);
82
83	// our shift instructions are only able to shift 1 bit at a time, so handle
84	// this in a custom way.
85	setOperationAction(ISD::SRA, MVT::i8, Custom);
86	setOperationAction(ISD::SHL, MVT::i8, Custom);
87	setOperationAction(ISD::SRL, MVT::i8, Custom);
88	setOperationAction(ISD::SRA, MVT::i16, Custom);
89	setOperationAction(ISD::SHL, MVT::i16, Custom);
90	setOperationAction(ISD::SRL, MVT::i16, Custom);
91	setOperationAction(ISD::SRA, MVT::i32, Custom);
92	setOperationAction(ISD::SHL, MVT::i32, Custom);
93	setOperationAction(ISD::SRL, MVT::i32, Custom);
94	setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
95	setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
96	setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
97
98	setOperationAction(ISD::ROTL, MVT::i8, Custom);
99	setOperationAction(ISD::ROTL, MVT::i16, Expand);
100	setOperationAction(ISD::ROTR, MVT::i8, Custom);
101	setOperationAction(ISD::ROTR, MVT::i16, Expand);
102
103	setOperationAction(ISD::BR_CC, MVT::i8, Custom);
104	setOperationAction(ISD::BR_CC, MVT::i16, Custom);
105	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
106	setOperationAction(ISD::BR_CC, MVT::i64, Custom);
107	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
108
109	setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
110	setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
111	setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
112	setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
113	setOperationAction(ISD::SETCC, MVT::i8, Custom);
114	setOperationAction(ISD::SETCC, MVT::i16, Custom);
115	setOperationAction(ISD::SETCC, MVT::i32, Custom);
116	setOperationAction(ISD::SETCC, MVT::i64, Custom);
117	setOperationAction(ISD::SELECT, MVT::i8, Expand);
118	setOperationAction(ISD::SELECT, MVT::i16, Expand);
119
120	setOperationAction(ISD::BSWAP, MVT::i16, Expand);
121
122	// Add support for postincrement and predecrement load/stores.
123	setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
124	setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
125	setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);
126	setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);
127	setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
128	setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
129	setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);
130	setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);
131
132	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
133
134	setOperationAction(ISD::VASTART, MVT::Other, Custom);
135	setOperationAction(ISD::VAEND, MVT::Other, Expand);
136	setOperationAction(ISD::VAARG, MVT::Other, Expand);
137	setOperationAction(ISD::VACOPY, MVT::Other, Expand);
138
139	// Atomic operations which must be lowered to rtlib calls
140	for (MVT VT : MVT::integer_valuetypes()) {
141	setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
142	setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
143	setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
144	setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
145	setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
146	setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
147	setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
148	}
149
150	// Division/remainder
151	setOperationAction(ISD::UDIV, MVT::i8, Expand);
152	setOperationAction(ISD::UDIV, MVT::i16, Expand);
153	setOperationAction(ISD::UREM, MVT::i8, Expand);
154	setOperationAction(ISD::UREM, MVT::i16, Expand);
155	setOperationAction(ISD::SDIV, MVT::i8, Expand);
156	setOperationAction(ISD::SDIV, MVT::i16, Expand);
157	setOperationAction(ISD::SREM, MVT::i8, Expand);
158	setOperationAction(ISD::SREM, MVT::i16, Expand);
159
160	// Make division and modulus custom
161	setOperationAction(ISD::UDIVREM, MVT::i8, Custom);
162	setOperationAction(ISD::UDIVREM, MVT::i16, Custom);
163	setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
164	setOperationAction(ISD::SDIVREM, MVT::i8, Custom);
165	setOperationAction(ISD::SDIVREM, MVT::i16, Custom);
166	setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
167
168	// Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
169	setOperationAction(ISD::MUL, MVT::i8, Expand);
170	setOperationAction(ISD::MUL, MVT::i16, Expand);
171
172	// Expand 16 bit multiplications.
173	setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
174	setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
175
176	// Expand multiplications to libcalls when there is
177	// no hardware MUL.
178	if (!Subtarget.supportsMultiplication()) {
179	setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
180	setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
181	}
182
183	for (MVT VT : MVT::integer_valuetypes()) {
184	setOperationAction(ISD::MULHS, VT, Expand);
185	setOperationAction(ISD::MULHU, VT, Expand);
186	}
187
188	for (MVT VT : MVT::integer_valuetypes()) {
189	setOperationAction(ISD::CTPOP, VT, Expand);
190	setOperationAction(ISD::CTLZ, VT, Expand);
191	setOperationAction(ISD::CTTZ, VT, Expand);
192	}
193
194	for (MVT VT : MVT::integer_valuetypes()) {
195	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
196	// TODO: The generated code is pretty poor. Investigate using the
197	// same "shift and subtract with carry" trick that we do for
198	// extending 8-bit to 16-bit. This may require infrastructure
199	// improvements in how we treat 16-bit "registers" to be feasible.
200	}
201
202	// Division rtlib functions (not supported), use divmod functions instead
203	setLibcallName(Call: RTLIB::SDIV_I8, Name: nullptr);
204	setLibcallName(Call: RTLIB::SDIV_I16, Name: nullptr);
205	setLibcallName(Call: RTLIB::SDIV_I32, Name: nullptr);
206	setLibcallName(Call: RTLIB::UDIV_I8, Name: nullptr);
207	setLibcallName(Call: RTLIB::UDIV_I16, Name: nullptr);
208	setLibcallName(Call: RTLIB::UDIV_I32, Name: nullptr);
209
210	// Modulus rtlib functions (not supported), use divmod functions instead
211	setLibcallName(Call: RTLIB::SREM_I8, Name: nullptr);
212	setLibcallName(Call: RTLIB::SREM_I16, Name: nullptr);
213	setLibcallName(Call: RTLIB::SREM_I32, Name: nullptr);
214	setLibcallName(Call: RTLIB::UREM_I8, Name: nullptr);
215	setLibcallName(Call: RTLIB::UREM_I16, Name: nullptr);
216	setLibcallName(Call: RTLIB::UREM_I32, Name: nullptr);
217
218	// Division and modulus rtlib functions
219	setLibcallName(Call: RTLIB::SDIVREM_I8, Name: "__divmodqi4");
220	setLibcallName(Call: RTLIB::SDIVREM_I16, Name: "__divmodhi4");
221	setLibcallName(Call: RTLIB::SDIVREM_I32, Name: "__divmodsi4");
222	setLibcallName(Call: RTLIB::UDIVREM_I8, Name: "__udivmodqi4");
223	setLibcallName(Call: RTLIB::UDIVREM_I16, Name: "__udivmodhi4");
224	setLibcallName(Call: RTLIB::UDIVREM_I32, Name: "__udivmodsi4");
225
226	// Several of the runtime library functions use a special calling conv
227	setLibcallCallingConv(Call: RTLIB::SDIVREM_I8, CC: CallingConv::AVR_BUILTIN);
228	setLibcallCallingConv(Call: RTLIB::SDIVREM_I16, CC: CallingConv::AVR_BUILTIN);
229	setLibcallCallingConv(Call: RTLIB::UDIVREM_I8, CC: CallingConv::AVR_BUILTIN);
230	setLibcallCallingConv(Call: RTLIB::UDIVREM_I16, CC: CallingConv::AVR_BUILTIN);
231
232	// Trigonometric rtlib functions
233	setLibcallName(Call: RTLIB::SIN_F32, Name: "sin");
234	setLibcallName(Call: RTLIB::COS_F32, Name: "cos");
235
236	setMinFunctionAlignment(Align (`2`));
237	setMinimumJumpTableEntries(UINT_MAX);
238	}
239
240	const char AVRTargetLowering::getTargetNodeName(unsigned* Opcode) const {
241	#define NODE(name) \
242	case AVRISD::name: \
243	return #name
244
245	switch (Opcode) {
246	default:
247	return nullptr;
248	NODE(RET_GLUE);
249	NODE(RETI_GLUE);
250	NODE(CALL);
251	NODE(WRAPPER);
252	NODE(LSL);
253	NODE(LSLW);
254	NODE(LSR);
255	NODE(LSRW);
256	NODE(ROL);
257	NODE(ROR);
258	NODE(ASR);
259	NODE(ASRW);
260	NODE(LSLLOOP);
261	NODE(LSRLOOP);
262	NODE(ROLLOOP);
263	NODE(RORLOOP);
264	NODE(ASRLOOP);
265	NODE(BRCOND);
266	NODE(CMP);
267	NODE(CMPC);
268	NODE(TST);
269	NODE(SELECT_CC);
270	#undef NODE
271	}
272	}
273
274	EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
275	EVT VT) const {
276	assert(!VT.isVector() && "No AVR SetCC type for vectors!");
277	return MVT::i8;
278	}
279
280	SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
281	unsigned Opc8;
282	const SDNode *N = Op.getNode();
283	EVT VT = Op.getValueType();
284	SDLoc dl(N);
285	assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&
286	"Expected power-of-2 shift amount");
287
288	if (VT.getSizeInBits() == `32`) {
289	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
290	// 32-bit shifts are converted to a loop in IR.
291	// This should be unreachable.
292	report_fatal_error(reason: "Expected a constant shift amount!");
293	}
294	SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16);
295	SDValue SrcLo =
296	DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(`0`),
297	DAG.getConstant(`0`, dl, MVT::i16));
298	SDValue SrcHi =
299	DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(`0`),
300	DAG.getConstant(`1`, dl, MVT::i16));
301	uint64_t ShiftAmount = N->getConstantOperandVal(Num: `1`);
302	if (ShiftAmount == `16`) {
303	// Special case these two operations because they appear to be used by the
304	// generic codegen parts to lower 32-bit numbers.
305	// TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit
306	// shift of a part of the 32-bit value?
307	switch (Op.getOpcode()) {
308	case ISD::SHL: {
309	SDValue Zero = DAG.getConstant(`0`, dl, MVT::i16);
310	return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo);
311	}
312	case ISD::SRL: {
313	SDValue Zero = DAG.getConstant(`0`, dl, MVT::i16);
314	return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero);
315	}
316	}
317	}
318	SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8);
319	unsigned Opc;
320	switch (Op.getOpcode()) {
321	default:
322	llvm_unreachable("Invalid 32-bit shift opcode!");
323	case ISD::SHL:
324	Opc = AVRISD::LSLW;
325	break;
326	case ISD::SRL:
327	Opc = AVRISD::LSRW;
328	break;
329	case ISD::SRA:
330	Opc = AVRISD::ASRW;
331	break;
332	}
333	SDValue Result = DAG.getNode(Opcode: Opc, DL: dl, VTList: ResTys, N1: SrcLo, N2: SrcHi, N3: Cnt);
334	return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(`0`),
335	Result.getValue(`1`));
336	}
337
338	// Expand non-constant shifts to loops.
339	if (!isa<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
340	switch (Op.getOpcode()) {
341	default:
342	llvm_unreachable("Invalid shift opcode!");
343	case ISD::SHL:
344	return DAG.getNode(Opcode: AVRISD::LSLLOOP, DL: dl, VT, N1: N->getOperand(Num: `0`),
345	N2: N->getOperand(Num: `1`));
346	case ISD::SRL:
347	return DAG.getNode(Opcode: AVRISD::LSRLOOP, DL: dl, VT, N1: N->getOperand(Num: `0`),
348	N2: N->getOperand(Num: `1`));
349	case ISD::ROTL: {
350	SDValue Amt = N->getOperand(Num: `1`);
351	EVT AmtVT = Amt.getValueType();
352	Amt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Amt,
353	N2: DAG.getConstant(Val: VT.getSizeInBits() - `1`, DL: dl, VT: AmtVT));
354	return DAG.getNode(Opcode: AVRISD::ROLLOOP, DL: dl, VT, N1: N->getOperand(Num: `0`), N2: Amt);
355	}
356	case ISD::ROTR: {
357	SDValue Amt = N->getOperand(Num: `1`);
358	EVT AmtVT = Amt.getValueType();
359	Amt = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: AmtVT, N1: Amt,
360	N2: DAG.getConstant(Val: VT.getSizeInBits() - `1`, DL: dl, VT: AmtVT));
361	return DAG.getNode(Opcode: AVRISD::RORLOOP, DL: dl, VT, N1: N->getOperand(Num: `0`), N2: Amt);
362	}
363	case ISD::SRA:
364	return DAG.getNode(Opcode: AVRISD::ASRLOOP, DL: dl, VT, N1: N->getOperand(Num: `0`),
365	N2: N->getOperand(Num: `1`));
366	}
367	}
368
369	uint64_t ShiftAmount = N->getConstantOperandVal(Num: `1`);
370	SDValue Victim = N->getOperand(Num: `0`);
371
372	switch (Op.getOpcode()) {
373	case ISD::SRA:
374	Opc8 = AVRISD::ASR;
375	break;
376	case ISD::ROTL:
377	Opc8 = AVRISD::ROL;
378	ShiftAmount = ShiftAmount % VT.getSizeInBits();
379	break;
380	case ISD::ROTR:
381	Opc8 = AVRISD::ROR;
382	ShiftAmount = ShiftAmount % VT.getSizeInBits();
383	break;
384	case ISD::SRL:
385	Opc8 = AVRISD::LSR;
386	break;
387	case ISD::SHL:
388	Opc8 = AVRISD::LSL;
389	break;
390	default:
391	llvm_unreachable("Invalid shift opcode");
392	}
393
394	// Optimize int8/int16 shifts.
395	if (VT.getSizeInBits() == `8`) {
396	if (Op.getOpcode() == ISD::SHL && `4` <= ShiftAmount && ShiftAmount < `7`) {
397	// Optimize LSL when 4 <= ShiftAmount <= 6.
398	Victim = DAG.getNode(Opcode: AVRISD::SWAP, DL: dl, VT, Operand: Victim);
399	Victim =
400	DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `0xf0`, DL: dl, VT));
401	ShiftAmount -= `4`;
402	} else if (Op.getOpcode() == ISD::SRL && `4` <= ShiftAmount &&
403	ShiftAmount < `7`) {
404	// Optimize LSR when 4 <= ShiftAmount <= 6.
405	Victim = DAG.getNode(Opcode: AVRISD::SWAP, DL: dl, VT, Operand: Victim);
406	Victim =
407	DAG.getNode(Opcode: ISD::AND, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `0x0f`, DL: dl, VT));
408	ShiftAmount -= `4`;
409	} else if (Op.getOpcode() == ISD::SHL && ShiftAmount == `7`) {
410	// Optimize LSL when ShiftAmount == 7.
411	Victim = DAG.getNode(Opcode: AVRISD::LSLBN, DL: dl, VT, N1: Victim,
412	N2: DAG.getConstant(Val: `7`, DL: dl, VT));
413	ShiftAmount = `0`;
414	} else if (Op.getOpcode() == ISD::SRL && ShiftAmount == `7`) {
415	// Optimize LSR when ShiftAmount == 7.
416	Victim = DAG.getNode(Opcode: AVRISD::LSRBN, DL: dl, VT, N1: Victim,
417	N2: DAG.getConstant(Val: `7`, DL: dl, VT));
418	ShiftAmount = `0`;
419	} else if (Op.getOpcode() == ISD::SRA && ShiftAmount == `6`) {
420	// Optimize ASR when ShiftAmount == 6.
421	Victim = DAG.getNode(Opcode: AVRISD::ASRBN, DL: dl, VT, N1: Victim,
422	N2: DAG.getConstant(Val: `6`, DL: dl, VT));
423	ShiftAmount = `0`;
424	} else if (Op.getOpcode() == ISD::SRA && ShiftAmount == `7`) {
425	// Optimize ASR when ShiftAmount == 7.
426	Victim = DAG.getNode(Opcode: AVRISD::ASRBN, DL: dl, VT, N1: Victim,
427	N2: DAG.getConstant(Val: `7`, DL: dl, VT));
428	ShiftAmount = `0`;
429	} else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == `3`) {
430	// Optimize left rotation 3 bits to swap then right rotation 1 bit.
431	Victim = DAG.getNode(Opcode: AVRISD::SWAP, DL: dl, VT, Operand: Victim);
432	Victim =
433	DAG.getNode(Opcode: AVRISD::ROR, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
434	ShiftAmount = `0`;
435	} else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == `3`) {
436	// Optimize right rotation 3 bits to swap then left rotation 1 bit.
437	Victim = DAG.getNode(Opcode: AVRISD::SWAP, DL: dl, VT, Operand: Victim);
438	Victim =
439	DAG.getNode(Opcode: AVRISD::ROL, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
440	ShiftAmount = `0`;
441	} else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == `7`) {
442	// Optimize left rotation 7 bits to right rotation 1 bit.
443	Victim =
444	DAG.getNode(Opcode: AVRISD::ROR, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
445	ShiftAmount = `0`;
446	} else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == `7`) {
447	// Optimize right rotation 7 bits to left rotation 1 bit.
448	Victim =
449	DAG.getNode(Opcode: AVRISD::ROL, DL: dl, VT, N1: Victim, N2: DAG.getConstant(Val: `1`, DL: dl, VT));
450	ShiftAmount = `0`;
451	} else if ((Op.getOpcode() == ISD::ROTR \|\| Op.getOpcode() == ISD::ROTL) &&
452	ShiftAmount >= `4`) {
453	// Optimize left/right rotation with the SWAP instruction.
454	Victim = DAG.getNode(Opcode: AVRISD::SWAP, DL: dl, VT, Operand: Victim);
455	ShiftAmount -= `4`;
456	}
457	} else if (VT.getSizeInBits() == `16`) {
458	if (Op.getOpcode() == ISD::SRA)
459	// Special optimization for int16 arithmetic right shift.
460	switch (ShiftAmount) {
461	case `15`:
462	Victim = DAG.getNode(Opcode: AVRISD::ASRWN, DL: dl, VT, N1: Victim,
463	N2: DAG.getConstant(Val: `15`, DL: dl, VT));
464	ShiftAmount = `0`;
465	break;
466	case `14`:
467	Victim = DAG.getNode(Opcode: AVRISD::ASRWN, DL: dl, VT, N1: Victim,
468	N2: DAG.getConstant(Val: `14`, DL: dl, VT));
469	ShiftAmount = `0`;
470	break;
471	case `7`:
472	Victim = DAG.getNode(Opcode: AVRISD::ASRWN, DL: dl, VT, N1: Victim,
473	N2: DAG.getConstant(Val: `7`, DL: dl, VT));
474	ShiftAmount = `0`;
475	break;
476	default:
477	break;
478	}
479	if (`4` <= ShiftAmount && ShiftAmount < `8`)
480	switch (Op.getOpcode()) {
481	case ISD::SHL:
482	Victim = DAG.getNode(Opcode: AVRISD::LSLWN, DL: dl, VT, N1: Victim,
483	N2: DAG.getConstant(Val: `4`, DL: dl, VT));
484	ShiftAmount -= `4`;
485	break;
486	case ISD::SRL:
487	Victim = DAG.getNode(Opcode: AVRISD::LSRWN, DL: dl, VT, N1: Victim,
488	N2: DAG.getConstant(Val: `4`, DL: dl, VT));
489	ShiftAmount -= `4`;
490	break;
491	default:
492	break;
493	}
494	else if (`8` <= ShiftAmount && ShiftAmount < `12`)
495	switch (Op.getOpcode()) {
496	case ISD::SHL:
497	Victim = DAG.getNode(Opcode: AVRISD::LSLWN, DL: dl, VT, N1: Victim,
498	N2: DAG.getConstant(Val: `8`, DL: dl, VT));
499	ShiftAmount -= `8`;
500	// Only operate on the higher byte for remaining shift bits.
501	Opc8 = AVRISD::LSLHI;
502	break;
503	case ISD::SRL:
504	Victim = DAG.getNode(Opcode: AVRISD::LSRWN, DL: dl, VT, N1: Victim,
505	N2: DAG.getConstant(Val: `8`, DL: dl, VT));
506	ShiftAmount -= `8`;
507	// Only operate on the lower byte for remaining shift bits.
508	Opc8 = AVRISD::LSRLO;
509	break;
510	case ISD::SRA:
511	Victim = DAG.getNode(Opcode: AVRISD::ASRWN, DL: dl, VT, N1: Victim,
512	N2: DAG.getConstant(Val: `8`, DL: dl, VT));
513	ShiftAmount -= `8`;
514	// Only operate on the lower byte for remaining shift bits.
515	Opc8 = AVRISD::ASRLO;
516	break;
517	default:
518	break;
519	}
520	else if (`12` <= ShiftAmount)
521	switch (Op.getOpcode()) {
522	case ISD::SHL:
523	Victim = DAG.getNode(Opcode: AVRISD::LSLWN, DL: dl, VT, N1: Victim,
524	N2: DAG.getConstant(Val: `12`, DL: dl, VT));
525	ShiftAmount -= `12`;
526	// Only operate on the higher byte for remaining shift bits.
527	Opc8 = AVRISD::LSLHI;
528	break;
529	case ISD::SRL:
530	Victim = DAG.getNode(Opcode: AVRISD::LSRWN, DL: dl, VT, N1: Victim,
531	N2: DAG.getConstant(Val: `12`, DL: dl, VT));
532	ShiftAmount -= `12`;
533	// Only operate on the lower byte for remaining shift bits.
534	Opc8 = AVRISD::LSRLO;
535	break;
536	case ISD::SRA:
537	Victim = DAG.getNode(Opcode: AVRISD::ASRWN, DL: dl, VT, N1: Victim,
538	N2: DAG.getConstant(Val: `8`, DL: dl, VT));
539	ShiftAmount -= `8`;
540	// Only operate on the lower byte for remaining shift bits.
541	Opc8 = AVRISD::ASRLO;
542	break;
543	default:
544	break;
545	}
546	}
547
548	while (ShiftAmount--) {
549	Victim = DAG.getNode(Opcode: Opc8, DL: dl, VT, Operand: Victim);
550	}
551
552	return Victim;
553	}
554
555	SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
556	unsigned Opcode = Op ->getOpcode();
557	assert((Opcode == ISD::SDIVREM \|\| Opcode == ISD::UDIVREM) &&
558	"Invalid opcode for Div/Rem lowering");
559	bool IsSigned = (Opcode == ISD::SDIVREM);
560	EVT VT = Op ->getValueType(ResNo: `0`);
561	Type Ty = VT.getTypeForEVT(Context&: DAG.getContext());
562
563	RTLIB::Libcall LC;
564	switch (VT.getSimpleVT().SimpleTy) {
565	default:
566	llvm_unreachable("Unexpected request for libcall!");
567	case MVT::i8:
568	LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
569	break;
570	case MVT::i16:
571	LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
572	break;
573	case MVT::i32:
574	LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
575	break;
576	}
577
578	SDValue InChain = DAG.getEntryNode();
579
580	TargetLowering::ArgListTy Args;
581	TargetLowering::ArgListEntry Entry;
582	for (SDValue const &Value : Op ->op_values()) {
583	Entry.Node = Value;
584	Entry.Ty = Value.getValueType().getTypeForEVT(Context&: *DAG.getContext());
585	Entry.IsSExt = IsSigned;
586	Entry.IsZExt = !IsSigned;
587	Args.push_back(x: Entry);
588	}
589
590	SDValue Callee = DAG.getExternalSymbol(Sym: getLibcallName(Call: LC),
591	VT: getPointerTy(DL: DAG.getDataLayout()));
592
593	Type RetTy = (Type )StructType::get(elt1: Ty, elts: Ty);
594
595	SDLoc dl(Op);
596	TargetLowering::CallLoweringInfo CLI(DAG);
597	CLI.setDebugLoc(dl)
598	.setChain(InChain)
599	.setLibCallee(CC: getLibcallCallingConv(Call: LC), ResultType: RetTy, Target: Callee, ArgsList: std::move(Args))
600	.setInRegister()
601	.setSExtResult(IsSigned)
602	.setZExtResult(!IsSigned);
603
604	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
605	return CallInfo.first;
606	}
607
608	SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,
609	SelectionDAG &DAG) const {
610	auto DL = DAG.getDataLayout();
611
612	const GlobalValue *GV = cast<GlobalAddressSDNode>(Val&: Op)->getGlobal();
613	int64_t Offset = cast<GlobalAddressSDNode>(Val&: Op)->getOffset();
614
615	// Create the TargetGlobalAddress node, folding in the constant offset.
616	SDValue Result =
617	DAG.getTargetGlobalAddress(GV, DL: SDLoc (Op), VT: getPointerTy(DL), offset: Offset);
618	return DAG.getNode(Opcode: AVRISD::WRAPPER, DL: SDLoc (Op), VT: getPointerTy(DL), Operand: Result);
619	}
620
621	SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,
622	SelectionDAG &DAG) const {
623	auto DL = DAG.getDataLayout();
624	const BlockAddress *BA = cast<BlockAddressSDNode>(Val&: Op)->getBlockAddress();
625
626	SDValue Result = DAG.getTargetBlockAddress(BA, VT: getPointerTy(DL));
627
628	return DAG.getNode(Opcode: AVRISD::WRAPPER, DL: SDLoc (Op), VT: getPointerTy(DL), Operand: Result);
629	}
630
631	/// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.
632	static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
633	switch (CC) {
634	default:
635	llvm_unreachable("Unknown condition code!");
636	case ISD::SETEQ:
637	return AVRCC::COND_EQ;
638	case ISD::SETNE:
639	return AVRCC::COND_NE;
640	case ISD::SETGE:
641	return AVRCC::COND_GE;
642	case ISD::SETLT:
643	return AVRCC::COND_LT;
644	case ISD::SETUGE:
645	return AVRCC::COND_SH;
646	case ISD::SETULT:
647	return AVRCC::COND_LO;
648	}
649	}
650
651	/// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands.
652	SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
653	SelectionDAG &DAG, SDLoc DL) const {
654	assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
655	"LHS and RHS have different types");
656	assert(((LHS.getSimpleValueType() == MVT::i16) \|\|
657	(LHS.getSimpleValueType() == MVT::i8)) &&
658	"invalid comparison type");
659
660	SDValue Cmp;
661
662	if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {
663	uint64_t Imm = RHS ->getAsZExtVal();
664	// Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero
665	// register for the constant RHS if its lower or higher byte is zero.
666	SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
667	DAG.getIntPtrConstant(`0`, DL));
668	SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
669	DAG.getIntPtrConstant(`1`, DL));
670	SDValue RHSlo = (Imm & `0xff`) == `0`
671	? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
672	: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
673	DAG.getIntPtrConstant(`0`, DL));
674	SDValue RHShi = (Imm & `0xff00`) == `0`
675	? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
676	: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
677	DAG.getIntPtrConstant(`1`, DL));
678	Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
679	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
680	} else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {
681	// Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero
682	// register for the constant LHS if its lower or higher byte is zero.
683	uint64_t Imm = LHS ->getAsZExtVal();
684	SDValue LHSlo = (Imm & `0xff`) == `0`
685	? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
686	: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
687	DAG.getIntPtrConstant(`0`, DL));
688	SDValue LHShi = (Imm & `0xff00`) == `0`
689	? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
690	: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
691	DAG.getIntPtrConstant(`1`, DL));
692	SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
693	DAG.getIntPtrConstant(`0`, DL));
694	SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
695	DAG.getIntPtrConstant(`1`, DL));
696	Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
697	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
698	} else {
699	// Generate ordinary 16-bit comparison.
700	Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
701	}
702
703	return Cmp;
704	}
705
706	/// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
707	/// the given operands.
708	SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
709	SDValue &AVRcc, SelectionDAG &DAG,
710	SDLoc DL) const {
711	SDValue Cmp;
712	EVT VT = LHS.getValueType();
713	bool UseTest = false;
714
715	switch (CC) {
716	default:
717	break;
718	case ISD::SETLE: {
719	// Swap operands and reverse the branching condition.
720	std::swap(a&: LHS, b&: RHS);
721	CC = ISD::SETGE;
722	break;
723	}
724	case ISD::SETGT: {
725	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
726	switch (C->getSExtValue()) {
727	case -`1`: {
728	// When doing lhs > -1 use a tst instruction on the top part of lhs
729	// and use brpl instead of using a chain of cp/cpc.
730	UseTest = true;
731	AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);
732	break;
733	}
734	case `0`: {
735	// Turn lhs > 0 into 0 < lhs since 0 can be materialized with
736	// __zero_reg__ in lhs.
737	RHS = LHS;
738	LHS = DAG.getConstant(Val: `0`, DL, VT);
739	CC = ISD::SETLT;
740	break;
741	}
742	default: {
743	// Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows
744	// us to fold the constant into the cmp instruction.
745	RHS = DAG.getConstant(Val: C->getSExtValue() + `1`, DL, VT);
746	CC = ISD::SETGE;
747	break;
748	}
749	}
750	break;
751	}
752	// Swap operands and reverse the branching condition.
753	std::swap(a&: LHS, b&: RHS);
754	CC = ISD::SETLT;
755	break;
756	}
757	case ISD::SETLT: {
758	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
759	switch (C->getSExtValue()) {
760	case `1`: {
761	// Turn lhs < 1 into 0 >= lhs since 0 can be materialized with
762	// __zero_reg__ in lhs.
763	RHS = LHS;
764	LHS = DAG.getConstant(Val: `0`, DL, VT);
765	CC = ISD::SETGE;
766	break;
767	}
768	case `0`: {
769	// When doing lhs < 0 use a tst instruction on the top part of lhs
770	// and use brmi instead of using a chain of cp/cpc.
771	UseTest = true;
772	AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);
773	break;
774	}
775	}
776	}
777	break;
778	}
779	case ISD::SETULE: {
780	// Swap operands and reverse the branching condition.
781	std::swap(a&: LHS, b&: RHS);
782	CC = ISD::SETUGE;
783	break;
784	}
785	case ISD::SETUGT: {
786	// Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
787	// fold the constant into the cmp instruction.
788	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val&: RHS)) {
789	RHS = DAG.getConstant(Val: C->getSExtValue() + `1`, DL, VT);
790	CC = ISD::SETUGE;
791	break;
792	}
793	// Swap operands and reverse the branching condition.
794	std::swap(a&: LHS, b&: RHS);
795	CC = ISD::SETULT;
796	break;
797	}
798	}
799
800	// Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of
801	// using the default and/or/xor expansion code which is much longer.
802	if (VT == MVT::i32) {
803	SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
804	DAG.getIntPtrConstant(`0`, DL));
805	SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
806	DAG.getIntPtrConstant(`1`, DL));
807	SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
808	DAG.getIntPtrConstant(`0`, DL));
809	SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
810	DAG.getIntPtrConstant(`1`, DL));
811
812	if (UseTest) {
813	// When using tst we only care about the highest part.
814	SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,
815	DAG.getIntPtrConstant(`1`, DL));
816	Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
817	} else {
818	Cmp = getAVRCmp(LHS: LHSlo, RHS: RHSlo, DAG, DL);
819	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
820	}
821	} else if (VT == MVT::i64) {
822	SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
823	DAG.getIntPtrConstant(`0`, DL));
824	SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
825	DAG.getIntPtrConstant(`1`, DL));
826
827	SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
828	DAG.getIntPtrConstant(`0`, DL));
829	SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
830	DAG.getIntPtrConstant(`1`, DL));
831	SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
832	DAG.getIntPtrConstant(`0`, DL));
833	SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
834	DAG.getIntPtrConstant(`1`, DL));
835
836	SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
837	DAG.getIntPtrConstant(`0`, DL));
838	SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
839	DAG.getIntPtrConstant(`1`, DL));
840
841	SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
842	DAG.getIntPtrConstant(`0`, DL));
843	SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
844	DAG.getIntPtrConstant(`1`, DL));
845	SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
846	DAG.getIntPtrConstant(`0`, DL));
847	SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
848	DAG.getIntPtrConstant(`1`, DL));
849
850	if (UseTest) {
851	// When using tst we only care about the highest part.
852	SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,
853	DAG.getIntPtrConstant(`1`, DL));
854	Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
855	} else {
856	Cmp = getAVRCmp(LHS: LHS0, RHS: RHS0, DAG, DL);
857	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
858	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
859	Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
860	}
861	} else if (VT == MVT::i8 \|\| VT == MVT::i16) {
862	if (UseTest) {
863	// When using tst we only care about the highest part.
864	Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,
865	(VT == MVT::i8)
866	? LHS
867	: DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
868	LHS, DAG.getIntPtrConstant(`1`, DL)));
869	} else {
870	Cmp = getAVRCmp(LHS, RHS, DAG, DL);
871	}
872	} else {
873	llvm_unreachable("Invalid comparison size");
874	}
875
876	// When using a test instruction AVRcc is already set.
877	if (!UseTest) {
878	AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);
879	}
880
881	return Cmp;
882	}
883
884	SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
885	SDValue Chain = Op.getOperand(i: `0`);
886	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `1`))->get();
887	SDValue LHS = Op.getOperand(i: `2`);
888	SDValue RHS = Op.getOperand(i: `3`);
889	SDValue Dest = Op.getOperand(i: `4`);
890	SDLoc dl(Op);
891
892	SDValue TargetCC;
893	SDValue Cmp = getAVRCmp(LHS, RHS, CC, AVRcc&: TargetCC, DAG, DL: dl);
894
895	return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,
896	Cmp);
897	}
898
899	SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
900	SDValue LHS = Op.getOperand(i: `0`);
901	SDValue RHS = Op.getOperand(i: `1`);
902	SDValue TrueV = Op.getOperand(i: `2`);
903	SDValue FalseV = Op.getOperand(i: `3`);
904	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `4`))->get();
905	SDLoc dl(Op);
906
907	SDValue TargetCC;
908	SDValue Cmp = getAVRCmp(LHS, RHS, CC, AVRcc&: TargetCC, DAG, DL: dl);
909
910	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
911	SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
912
913	return DAG.getNode(Opcode: AVRISD::SELECT_CC, DL: dl, VTList: VTs, Ops);
914	}
915
916	SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
917	SDValue LHS = Op.getOperand(i: `0`);
918	SDValue RHS = Op.getOperand(i: `1`);
919	ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: `2`))->get();
920	SDLoc DL(Op);
921
922	SDValue TargetCC;
923	SDValue Cmp = getAVRCmp(LHS, RHS, CC, AVRcc&: TargetCC, DAG, DL);
924
925	SDValue TrueV = DAG.getConstant(Val: `1`, DL, VT: Op.getValueType());
926	SDValue FalseV = DAG.getConstant(Val: `0`, DL, VT: Op.getValueType());
927	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
928	SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
929
930	return DAG.getNode(Opcode: AVRISD::SELECT_CC, DL, VTList: VTs, Ops);
931	}
932
933	SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
934	const MachineFunction &MF = DAG.getMachineFunction();
935	const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
936	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
937	auto DL = DAG.getDataLayout();
938	SDLoc dl(Op);
939
940	// Vastart just stores the address of the VarArgsFrameIndex slot into the
941	// memory location argument.
942	SDValue FI = DAG.getFrameIndex(FI: AFI->getVarArgsFrameIndex(), VT: getPointerTy(DL));
943
944	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl, Val: FI, Ptr: Op.getOperand(i: `1`),
945	PtrInfo: MachinePointerInfo (SV));
946	}
947
948	// Modify the existing ISD::INLINEASM node to add the implicit zero register.
949	SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
950	SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8);
951	if (Op.getOperand(i: Op.getNumOperands() - `1`) == ZeroReg \|\|
952	Op.getOperand(i: Op.getNumOperands() - `2`) == ZeroReg) {
953	// Zero register has already been added. Don't add it again.
954	// If this isn't handled, we get called over and over again.
955	return Op;
956	}
957
958	// Get a list of operands to the new INLINEASM node. This is mostly a copy,
959	// with some edits.
960	// Add the following operands at the end (but before the glue node, if it's
961	// there):
962	// - The flags of the implicit zero register operand.
963	// - The implicit zero register operand itself.
964	SDLoc dl(Op);
965	SmallVector<SDValue, `8`> Ops;
966	SDNode *N = Op.getNode();
967	SDValue Glue;
968	for (unsigned I = `0`; I < N->getNumOperands(); I++) {
969	SDValue Operand = N->getOperand(Num: I);
970	if (Operand.getValueType() == MVT::Glue) {
971	// The glue operand always needs to be at the end, so we need to treat it
972	// specially.
973	Glue = Operand;
974	} else {
975	Ops.push_back(Elt: Operand);
976	}
977	}
978	InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, `1`);
979	Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32));
980	Ops.push_back(Elt: ZeroReg);
981	if (Glue) {
982	Ops.push_back(Elt: Glue);
983	}
984
985	// Replace the current INLINEASM node with a new one that has the zero
986	// register as implicit parameter.
987	SDValue New = DAG.getNode(Opcode: N->getOpcode(), DL: dl, VTList: N->getVTList(), Ops);
988	DAG.ReplaceAllUsesOfValueWith(From: Op, To: New);
989	DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: `1`), To: New.getValue(R: `1`));
990
991	return New;
992	}
993
994	SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
995	switch (Op.getOpcode()) {
996	default:
997	llvm_unreachable("Don't know how to custom lower this!");
998	case ISD::SHL:
999	case ISD::SRA:
1000	case ISD::SRL:
1001	case ISD::ROTL:
1002	case ISD::ROTR:
1003	return LowerShifts(Op, DAG);
1004	case ISD::GlobalAddress:
1005	return LowerGlobalAddress(Op, DAG);
1006	case ISD::BlockAddress:
1007	return LowerBlockAddress(Op, DAG);
1008	case ISD::BR_CC:
1009	return LowerBR_CC(Op, DAG);
1010	case ISD::SELECT_CC:
1011	return LowerSELECT_CC(Op, DAG);
1012	case ISD::SETCC:
1013	return LowerSETCC(Op, DAG);
1014	case ISD::VASTART:
1015	return LowerVASTART(Op, DAG);
1016	case ISD::SDIVREM:
1017	case ISD::UDIVREM:
1018	return LowerDivRem(Op, DAG);
1019	case ISD::INLINEASM:
1020	return LowerINLINEASM(Op, DAG);
1021	}
1022
1023	return SDValue ();
1024	}
1025
1026	/// Replace a node with an illegal result type
1027	/// with a new node built out of custom code.
1028	void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
1029	SmallVectorImpl<SDValue> &Results,
1030	SelectionDAG &DAG) const {
1031	SDLoc DL(N);
1032
1033	switch (N->getOpcode()) {
1034	case ISD::ADD: {
1035	// Convert add (x, imm) into sub (x, -imm).
1036	if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`))) {
1037	SDValue Sub = DAG.getNode(
1038	Opcode: ISD::SUB, DL, VT: N->getValueType(ResNo: `0`), N1: N->getOperand(Num: `0`),
1039	N2: DAG.getConstant(Val: -C->getAPIntValue(), DL, VT: C->getValueType(ResNo: `0`)));
1040	Results.push_back(Elt: Sub);
1041	}
1042	break;
1043	}
1044	default: {
1045	SDValue Res = LowerOperation(Op: SDValue (N, `0`), DAG);
1046
1047	for (unsigned I = `0`, E = Res ->getNumValues(); I != E; ++I)
1048	Results.push_back(Elt: Res.getValue(R: I));
1049
1050	break;
1051	}
1052	}
1053	}
1054
1055	/// Return true if the addressing mode represented
1056	/// by AM is legal for this target, for a load/store of the specified type.
1057	bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1058	const AddrMode &AM, Type *Ty,
1059	unsigned AS,
1060	Instruction I) const* {
1061	int64_t Offs = AM.BaseOffs;
1062
1063	// Allow absolute addresses.
1064	if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == `0` && Offs == `0`) {
1065	return true;
1066	}
1067
1068	// Flash memory instructions only allow zero offsets.
1069	if (isa<PointerType>(Val: Ty) && AS == AVR::ProgramMemory) {
1070	return false;
1071	}
1072
1073	// Allow reg+<6bit> offset.
1074	if (Offs < `0`)
1075	Offs = -Offs;
1076	if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == `0` &&
1077	isUInt<`6`>(x: Offs)) {
1078	return true;
1079	}
1080
1081	return false;
1082	}
1083
1084	/// Returns true by value, base pointer and
1085	/// offset pointer and addressing mode by reference if the node's address
1086	/// can be legally represented as pre-indexed load / store address.
1087	bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1088	SDValue &Offset,
1089	ISD::MemIndexedMode &AM,
1090	SelectionDAG &DAG) const {
1091	EVT VT;
1092	const SDNode *Op;
1093	SDLoc DL(N);
1094
1095	if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1096	VT = LD->getMemoryVT();
1097	Op = LD->getBasePtr().getNode();
1098	if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1099	return false;
1100	if (AVR::isProgramMemoryAccess(N: LD)) {
1101	return false;
1102	}
1103	} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
1104	VT = ST->getMemoryVT();
1105	Op = ST->getBasePtr().getNode();
1106	if (AVR::isProgramMemoryAccess(N: ST)) {
1107	return false;
1108	}
1109	} else {
1110	return false;
1111	}
1112
1113	if (VT != MVT::i8 && VT != MVT::i16) {
1114	return false;
1115	}
1116
1117	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1118	return false;
1119	}
1120
1121	if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: `1`))) {
1122	int RHSC = RHS->getSExtValue();
1123	if (Op->getOpcode() == ISD::SUB)
1124	RHSC = -RHSC;
1125
1126	if ((VT == MVT::i16 && RHSC != -`2`) \|\| (VT == MVT::i8 && RHSC != -`1`)) {
1127	return false;
1128	}
1129
1130	Base = Op->getOperand(Num: `0`);
1131	Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1132	AM = ISD::PRE_DEC;
1133
1134	return true;
1135	}
1136
1137	return false;
1138	}
1139
1140	/// Returns true by value, base pointer and
1141	/// offset pointer and addressing mode by reference if this node can be
1142	/// combined with a load / store to form a post-indexed load / store.
1143	bool AVRTargetLowering::getPostIndexedAddressParts(SDNode N, SDNode Op,
1144	SDValue &Base,
1145	SDValue &Offset,
1146	ISD::MemIndexedMode &AM,
1147	SelectionDAG &DAG) const {
1148	EVT VT;
1149	SDLoc DL(N);
1150
1151	if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N)) {
1152	VT = LD->getMemoryVT();
1153	if (LD->getExtensionType() != ISD::NON_EXTLOAD)
1154	return false;
1155	} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: N)) {
1156	VT = ST->getMemoryVT();
1157	// We can not store to program memory.
1158	if (AVR::isProgramMemoryAccess(N: ST))
1159	return false;
1160	// Since the high byte need to be stored first, we can not emit
1161	// i16 post increment store like:
1162	// st X+, r24
1163	// st X+, r25
1164	if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
1165	return false;
1166	} else {
1167	return false;
1168	}
1169
1170	if (VT != MVT::i8 && VT != MVT::i16) {
1171	return false;
1172	}
1173
1174	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
1175	return false;
1176	}
1177
1178	if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Val: Op->getOperand(Num: `1`))) {
1179	int RHSC = RHS->getSExtValue();
1180	if (Op->getOpcode() == ISD::SUB)
1181	RHSC = -RHSC;
1182	if ((VT == MVT::i16 && RHSC != `2`) \|\| (VT == MVT::i8 && RHSC != `1`)) {
1183	return false;
1184	}
1185
1186	// FIXME: We temporarily disable post increment load from program memory,
1187	// due to bug https://github.com/llvm/llvm-project/issues/59914.
1188	if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Val: N))
1189	if (AVR::isProgramMemoryAccess(N: LD))
1190	return false;
1191
1192	Base = Op->getOperand(Num: `0`);
1193	Offset = DAG.getConstant(RHSC, DL, MVT::i8);
1194	AM = ISD::POST_INC;
1195
1196	return true;
1197	}
1198
1199	return false;
1200	}
1201
1202	bool AVRTargetLowering::isOffsetFoldingLegal(
1203	const GlobalAddressSDNode GA) const* {
1204	return true;
1205	}
1206
1207	//===----------------------------------------------------------------------===//
1208	// Formal Arguments Calling Convention Implementation
1209	//===----------------------------------------------------------------------===//
1210
1211	#include "AVRGenCallingConv.inc"
1212
1213	/// Registers for calling conventions, ordered in reverse as required by ABI.
1214	/// Both arrays must be of the same length.
1215	static const MCPhysReg RegList8AVR[] = {
1216	AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20,
1217	AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
1218	AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};
1219	static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23,
1220	AVR::R22, AVR::R21, AVR::R20};
1221	static const MCPhysReg RegList16AVR[] = {
1222	AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
1223	AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
1224	AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
1225	AVR::R11R10, AVR::R10R9, AVR::R9R8};
1226	static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24,
1227	AVR::R24R23, AVR::R23R22,
1228	AVR::R22R21, AVR::R21R20};
1229
1230	static_assert(std::size(RegList8AVR) == std::size(RegList16AVR),
1231	"8-bit and 16-bit register arrays must be of equal length");
1232	static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny),
1233	"8-bit and 16-bit register arrays must be of equal length");
1234
1235	/// Analyze incoming and outgoing function arguments. We need custom C++ code
1236	/// to handle special constraints in the ABI.
1237	/// In addition, all pieces of a certain argument have to be passed either
1238	/// using registers or the stack but never mixing both.
1239	template <typename ArgT>
1240	static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
1241	const Function F, const* DataLayout *TD,
1242	const SmallVectorImpl<ArgT> &Args,
1243	SmallVectorImpl<CCValAssign> &ArgLocs,
1244	CCState &CCInfo, bool Tiny) {
1245	// Choose the proper register list for argument passing according to the ABI.
1246	ArrayRef<MCPhysReg> RegList8;
1247	ArrayRef<MCPhysReg> RegList16;
1248	if (Tiny) {
1249	RegList8 = ArrayRef(RegList8Tiny);
1250	RegList16 = ArrayRef(RegList16Tiny);
1251	} else {
1252	RegList8 = ArrayRef(RegList8AVR);
1253	RegList16 = ArrayRef(RegList16AVR);
1254	}
1255
1256	unsigned NumArgs = Args.size();
1257	// This is the index of the last used register, in RegList.*
1258	// -1 means R26 (R26 is never actually used in CC).
1259	int RegLastIdx = -`1`;
1260	// Once a value is passed to the stack it will always be used
1261	bool UseStack = false;
1262	for (unsigned i = `0`; i != NumArgs;) {
1263	MVT VT = Args[i].VT;
1264	// We have to count the number of bytes for each function argument, that is
1265	// those Args with the same OrigArgIndex. This is important in case the
1266	// function takes an aggregate type.
1267	// Current argument will be between [i..j).
1268	unsigned ArgIndex = Args[i].OrigArgIndex;
1269	unsigned TotalBytes = VT.getStoreSize();
1270	unsigned j = i + `1`;
1271	for (; j != NumArgs; ++j) {
1272	if (Args[j].OrigArgIndex != ArgIndex)
1273	break;
1274	TotalBytes += Args[j].VT.getStoreSize();
1275	}
1276	// Round up to even number of bytes.
1277	TotalBytes = alignTo(Value: TotalBytes, Align: `2`);
1278	// Skip zero sized arguments
1279	if (TotalBytes == `0`)
1280	continue;
1281	// The index of the first register to be used
1282	unsigned RegIdx = RegLastIdx + TotalBytes;
1283	RegLastIdx = RegIdx;
1284	// If there are not enough registers, use the stack
1285	if (RegIdx >= RegList8.size()) {
1286	UseStack = true;
1287	}
1288	for (; i != j; ++i) {
1289	MVT VT = Args[i].VT;
1290
1291	if (UseStack) {
1292	auto evt = EVT (VT).getTypeForEVT(Context&: CCInfo.getContext());
1293	unsigned Offset = CCInfo.AllocateStack(Size: TD->getTypeAllocSize(Ty: evt),
1294	Alignment: TD->getABITypeAlign(Ty: evt));
1295	CCInfo.addLoc(
1296	V: CCValAssign::getMem(ValNo: i, ValVT: VT, Offset, LocVT: VT, HTP: CCValAssign::Full));
1297	} else {
1298	unsigned Reg;
1299	if (VT == MVT::i8) {
1300	Reg = CCInfo.AllocateReg(Reg: RegList8 [RegIdx]);
1301	} else if (VT == MVT::i16) {
1302	Reg = CCInfo.AllocateReg(Reg: RegList16 [RegIdx]);
1303	} else {
1304	llvm_unreachable(
1305	"calling convention can only manage i8 and i16 types");
1306	}
1307	assert(Reg && "register not available in calling convention");
1308	CCInfo.addLoc(V: CCValAssign::getReg(ValNo: i, ValVT: VT, RegNo: Reg, LocVT: VT, HTP: CCValAssign::Full));
1309	// Registers inside a particular argument are sorted in increasing order
1310	// (remember the array is reversed).
1311	RegIdx -= VT.getStoreSize();
1312	}
1313	}
1314	}
1315	}
1316
1317	/// Count the total number of bytes needed to pass or return these arguments.
1318	template <typename ArgT>
1319	static unsigned
1320	getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
1321	unsigned TotalBytes = `0`;
1322
1323	for (const ArgT &Arg : Args) {
1324	TotalBytes += Arg.VT.getStoreSize();
1325	}
1326	return TotalBytes;
1327	}
1328
1329	/// Analyze incoming and outgoing value of returning from a function.
1330	/// The algorithm is similar to analyzeArguments, but there can only be
1331	/// one value, possibly an aggregate, and it is limited to 8 bytes.
1332	template <typename ArgT>
1333	static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
1334	CCState &CCInfo, bool Tiny) {
1335	unsigned NumArgs = Args.size();
1336	unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
1337	// CanLowerReturn() guarantees this assertion.
1338	if (Tiny)
1339	assert(TotalBytes <= `4` &&
1340	"return values greater than 4 bytes cannot be lowered on AVRTiny");
1341	else
1342	assert(TotalBytes <= `8` &&
1343	"return values greater than 8 bytes cannot be lowered on AVR");
1344
1345	// Choose the proper register list for argument passing according to the ABI.
1346	ArrayRef<MCPhysReg> RegList8;
1347	ArrayRef<MCPhysReg> RegList16;
1348	if (Tiny) {
1349	RegList8 = ArrayRef(RegList8Tiny, std::size(RegList8Tiny));
1350	RegList16 = ArrayRef(RegList16Tiny, std::size(RegList16Tiny));
1351	} else {
1352	RegList8 = ArrayRef(RegList8AVR, std::size(RegList8AVR));
1353	RegList16 = ArrayRef(RegList16AVR, std::size(RegList16AVR));
1354	}
1355
1356	// GCC-ABI says that the size is rounded up to the next even number,
1357	// but actually once it is more than 4 it will always round up to 8.
1358	if (TotalBytes > `4`) {
1359	TotalBytes = `8`;
1360	} else {
1361	TotalBytes = alignTo(Value: TotalBytes, Align: `2`);
1362	}
1363
1364	// The index of the first register to use.
1365	int RegIdx = TotalBytes - `1`;
1366	for (unsigned i = `0`; i != NumArgs; ++i) {
1367	MVT VT = Args[i].VT;
1368	unsigned Reg;
1369	if (VT == MVT::i8) {
1370	Reg = CCInfo.AllocateReg(Reg: RegList8 [RegIdx]);
1371	} else if (VT == MVT::i16) {
1372	Reg = CCInfo.AllocateReg(Reg: RegList16 [RegIdx]);
1373	} else {
1374	llvm_unreachable("calling convention can only manage i8 and i16 types");
1375	}
1376	assert(Reg && "register not available in calling convention");
1377	CCInfo.addLoc(V: CCValAssign::getReg(ValNo: i, ValVT: VT, RegNo: Reg, LocVT: VT, HTP: CCValAssign::Full));
1378	// Registers sort in increasing order
1379	RegIdx -= VT.getStoreSize();
1380	}
1381	}
1382
1383	SDValue AVRTargetLowering::LowerFormalArguments(
1384	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1385	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1386	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1387	MachineFunction &MF = DAG.getMachineFunction();
1388	MachineFrameInfo &MFI = MF.getFrameInfo();
1389	auto DL = DAG.getDataLayout();
1390
1391	// Assign locations to all of the incoming arguments.
1392	SmallVector<CCValAssign, `16`> ArgLocs;
1393	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1394	*DAG.getContext());
1395
1396	// Variadic functions do not need all the analysis below.
1397	if (isVarArg) {
1398	CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg);
1399	} else {
1400	analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo,
1401	Subtarget.hasTinyEncoding());
1402	}
1403
1404	SDValue ArgValue;
1405	for (CCValAssign &VA : ArgLocs) {
1406
1407	// Arguments stored on registers.
1408	if (VA.isRegLoc()) {
1409	EVT RegVT = VA.getLocVT();
1410	const TargetRegisterClass *RC;
1411	if (RegVT == MVT::i8) {
1412	RC = &AVR::GPR8RegClass;
1413	} else if (RegVT == MVT::i16) {
1414	RC = &AVR::DREGSRegClass;
1415	} else {
1416	llvm_unreachable("Unknown argument type!");
1417	}
1418
1419	Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1420	ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1421
1422	// :NOTE: Clang should not promote any i8 into i16 but for safety the
1423	// following code will handle zexts or sexts generated by other
1424	// front ends. Otherwise:
1425	// If this is an 8 bit value, it is really passed promoted
1426	// to 16 bits. Insert an assert[sz]ext to capture this, then
1427	// truncate to the right size.
1428	switch (VA.getLocInfo()) {
1429	default:
1430	llvm_unreachable("Unknown loc info!");
1431	case CCValAssign::Full:
1432	break;
1433	case CCValAssign::BCvt:
1434	ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
1435	break;
1436	case CCValAssign::SExt:
1437	ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1438	DAG.getValueType(VA.getValVT()));
1439	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1440	break;
1441	case CCValAssign::ZExt:
1442	ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1443	DAG.getValueType(VA.getValVT()));
1444	ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1445	break;
1446	}
1447
1448	InVals.push_back(ArgValue);
1449	} else {
1450	// Only arguments passed on the stack should make it here.
1451	assert(VA.isMemLoc());
1452
1453	EVT LocVT = VA.getLocVT();
1454
1455	// Create the frame index object for this incoming parameter.
1456	int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / `8`,
1457	VA.getLocMemOffset(), true);
1458
1459	// Create the SelectionDAG nodes corresponding to a load
1460	// from this parameter.
1461	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
1462	InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
1463	MachinePointerInfo::getFixedStack(MF, FI)));
1464	}
1465	}
1466
1467	// If the function takes variable number of arguments, make a frame index for
1468	// the start of the first vararg value... for expansion of llvm.va_start.
1469	if (isVarArg) {
1470	unsigned StackSize = CCInfo.getStackSize();
1471	AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1472
1473	AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(Size: `2`, SPOffset: StackSize, IsImmutable: true));
1474	}
1475
1476	return Chain;
1477	}
1478
1479	//===----------------------------------------------------------------------===//
1480	// Call Calling Convention Implementation
1481	//===----------------------------------------------------------------------===//
1482
1483	SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1484	SmallVectorImpl<SDValue> &InVals) const {
1485	SelectionDAG &DAG = CLI.DAG;
1486	SDLoc &DL = CLI.DL;
1487	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1488	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1489	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1490	SDValue Chain = CLI.Chain;
1491	SDValue Callee = CLI.Callee;
1492	bool &isTailCall = CLI.IsTailCall;
1493	CallingConv::ID CallConv = CLI.CallConv;
1494	bool isVarArg = CLI.IsVarArg;
1495
1496	MachineFunction &MF = DAG.getMachineFunction();
1497
1498	// AVR does not yet support tail call optimization.
1499	isTailCall = false;
1500
1501	// Analyze operands of the call, assigning locations to each operand.
1502	SmallVector<CCValAssign, `16`> ArgLocs;
1503	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1504	*DAG.getContext());
1505
1506	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1507	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1508	// node so that legalize doesn't hack it.
1509	const Function F = nullptr*;
1510	if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1511	const GlobalValue *GV = G->getGlobal();
1512	if (isa<Function>(GV))
1513	F = cast<Function>(GV);
1514	Callee =
1515	DAG.getTargetGlobalAddress(GV, DL, VT: getPointerTy(DL: DAG.getDataLayout()));
1516	} else if (const ExternalSymbolSDNode *ES =
1517	dyn_cast<ExternalSymbolSDNode>(Callee)) {
1518	Callee = DAG.getTargetExternalSymbol(Sym: ES->getSymbol(),
1519	VT: getPointerTy(DL: DAG.getDataLayout()));
1520	}
1521
1522	// Variadic functions do not need all the analysis below.
1523	if (isVarArg) {
1524	CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg);
1525	} else {
1526	analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo,
1527	Subtarget.hasTinyEncoding());
1528	}
1529
1530	// Get a count of how many bytes are to be pushed on the stack.
1531	unsigned NumBytes = CCInfo.getStackSize();
1532
1533	Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: `0`, DL);
1534
1535	SmallVector<std::pair<unsigned, SDValue>, `8`> RegsToPass;
1536
1537	// First, walk the register assignments, inserting copies.
1538	unsigned AI, AE;
1539	bool HasStackArgs = false;
1540	for (AI = `0`, AE = ArgLocs.size(); AI != AE; ++AI) {
1541	CCValAssign &VA = ArgLocs[AI];
1542	EVT RegVT = VA.getLocVT();
1543	SDValue Arg = OutVals [AI];
1544
1545	// Promote the value if needed. With Clang this should not happen.
1546	switch (VA.getLocInfo()) {
1547	default:
1548	llvm_unreachable("Unknown loc info!");
1549	case CCValAssign::Full:
1550	break;
1551	case CCValAssign::SExt:
1552	Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: RegVT, Operand: Arg);
1553	break;
1554	case CCValAssign::ZExt:
1555	Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: RegVT, Operand: Arg);
1556	break;
1557	case CCValAssign::AExt:
1558	Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: RegVT, Operand: Arg);
1559	break;
1560	case CCValAssign::BCvt:
1561	Arg = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: RegVT, Operand: Arg);
1562	break;
1563	}
1564
1565	// Stop when we encounter a stack argument, we need to process them
1566	// in reverse order in the loop below.
1567	if (VA.isMemLoc()) {
1568	HasStackArgs = true;
1569	break;
1570	}
1571
1572	// Arguments that can be passed on registers must be kept in the RegsToPass
1573	// vector.
1574	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1575	}
1576
1577	// Second, stack arguments have to walked.
1578	// Previously this code created chained stores but those chained stores appear
1579	// to be unchained in the legalization phase. Therefore, do not attempt to
1580	// chain them here. In fact, chaining them here somehow causes the first and
1581	// second store to be reversed which is the exact opposite of the intended
1582	// effect.
1583	if (HasStackArgs) {
1584	SmallVector<SDValue, `8`> MemOpChains;
1585	for (; AI != AE; AI++) {
1586	CCValAssign &VA = ArgLocs[AI];
1587	SDValue Arg = OutVals [AI];
1588
1589	assert(VA.isMemLoc());
1590
1591	// SP points to one stack slot further so add one to adjust it.
1592	SDValue PtrOff = DAG.getNode(
1593	ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
1594	DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),
1595	DAG.getIntPtrConstant(VA.getLocMemOffset() + `1`, DL));
1596
1597	MemOpChains.push_back(
1598	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: PtrOff,
1599	PtrInfo: MachinePointerInfo::getStack(MF, Offset: VA.getLocMemOffset())));
1600	}
1601
1602	if (!MemOpChains.empty())
1603	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1604	}
1605
1606	// Build a sequence of copy-to-reg nodes chained together with token chain and
1607	// flag operands which copy the outgoing args into registers. The InGlue in
1608	// necessary since all emited instructions must be stuck together.
1609	SDValue InGlue;
1610	for (auto Reg : RegsToPass) {
1611	Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);
1612	InGlue = Chain.getValue(`1`);
1613	}
1614
1615	// Returns a chain & a flag for retval copy to use.
1616	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1617	SmallVector<SDValue, `8`> Ops;
1618	Ops.push_back(Elt: Chain);
1619	Ops.push_back(Elt: Callee);
1620
1621	// Add argument registers to the end of the list so that they are known live
1622	// into the call.
1623	for (auto Reg : RegsToPass) {
1624	Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1625	}
1626
1627	// The zero register (usually R1) must be passed as an implicit register so
1628	// that this register is correctly zeroed in interrupts.
1629	Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1630
1631	// Add a register mask operand representing the call-preserved registers.
1632	const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1633	const uint32_t *Mask =
1634	TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CallConv);
1635	assert(Mask && "Missing call preserved mask for calling convention");
1636	Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask));
1637
1638	if (InGlue.getNode()) {
1639	Ops.push_back(Elt: InGlue);
1640	}
1641
1642	Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
1643	InGlue = Chain.getValue(R: `1`);
1644
1645	// Create the CALLSEQ_END node.
1646	Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: `0`, Glue: InGlue, DL);
1647
1648	if (!Ins.empty()) {
1649	InGlue = Chain.getValue(R: `1`);
1650	}
1651
1652	// Handle result values, copying them out of physregs into vregs that we
1653	// return.
1654	return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl: DL, DAG,
1655	InVals);
1656	}
1657
1658	/// Lower the result values of a call into the
1659	/// appropriate copies out of appropriate physical registers.
1660	///
1661	SDValue AVRTargetLowering::LowerCallResult(
1662	SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1663	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1664	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1665
1666	// Assign locations to each value returned by this call.
1667	SmallVector<CCValAssign, `16`> RVLocs;
1668	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1669	*DAG.getContext());
1670
1671	// Handle runtime calling convs.
1672	if (CallConv == CallingConv::AVR_BUILTIN) {
1673	CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN);
1674	} else {
1675	analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding());
1676	}
1677
1678	// Copy all of the result registers out of their specified physreg.
1679	for (CCValAssign const &RVLoc : RVLocs) {
1680	Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
1681	InGlue)
1682	.getValue(`1`);
1683	InGlue = Chain.getValue(`2`);
1684	InVals.push_back(Chain.getValue(`0`));
1685	}
1686
1687	return Chain;
1688	}
1689
1690	//===----------------------------------------------------------------------===//
1691	// Return Value Calling Convention Implementation
1692	//===----------------------------------------------------------------------===//
1693
1694	bool AVRTargetLowering::CanLowerReturn(
1695	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
1696	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1697	if (CallConv == CallingConv::AVR_BUILTIN) {
1698	SmallVector<CCValAssign, `16`> RVLocs;
1699	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
1700	return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN);
1701	}
1702
1703	unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs);
1704	return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? `4` : `8`);
1705	}
1706
1707	SDValue
1708	AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1709	bool isVarArg,
1710	const SmallVectorImpl<ISD::OutputArg> &Outs,
1711	const SmallVectorImpl<SDValue> &OutVals,
1712	const SDLoc &dl, SelectionDAG &DAG) const {
1713	// CCValAssign - represent the assignment of the return value to locations.
1714	SmallVector<CCValAssign, `16`> RVLocs;
1715
1716	// CCState - Info about the registers and stack slot.
1717	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1718	*DAG.getContext());
1719
1720	MachineFunction &MF = DAG.getMachineFunction();
1721
1722	// Analyze return values.
1723	if (CallConv == CallingConv::AVR_BUILTIN) {
1724	CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN);
1725	} else {
1726	analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
1727	}
1728
1729	SDValue Glue;
1730	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
1731	// Copy the result values into the output registers.
1732	for (unsigned i = `0`, e = RVLocs.size(); i != e; ++i) {
1733	CCValAssign &VA = RVLocs[i];
1734	assert(VA.isRegLoc() && "Can only return in registers!");
1735
1736	Chain = DAG.getCopyToReg(Chain, dl, Reg: VA.getLocReg(), N: OutVals [i], Glue);
1737
1738	// Guarantee that all emitted copies are stuck together with flags.
1739	Glue = Chain.getValue(R: `1`);
1740	RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
1741	}
1742
1743	// Don't emit the ret/reti instruction when the naked attribute is present in
1744	// the function being compiled.
1745	if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
1746	return Chain;
1747	}
1748
1749	const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
1750
1751	if (!AFI->isInterruptOrSignalHandler()) {
1752	// The return instruction has an implicit zero register operand: it must
1753	// contain zero on return.
1754	// This is not needed in interrupts however, where the zero register is
1755	// handled specially (only pushed/popped when needed).
1756	RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8));
1757	}
1758
1759	unsigned RetOpc =
1760	AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;
1761
1762	RetOps [`0`] = Chain; // Update chain.
1763
1764	if (Glue.getNode()) {
1765	RetOps.push_back(Elt: Glue);
1766	}
1767
1768	return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
1769	}
1770
1771	//===----------------------------------------------------------------------===//
1772	// Custom Inserters
1773	//===----------------------------------------------------------------------===//
1774
1775	MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
1776	MachineBasicBlock *BB,
1777	bool Tiny) const {
1778	unsigned Opc;
1779	const TargetRegisterClass *RC;
1780	bool HasRepeatedOperand = false;
1781	MachineFunction *F = BB->getParent();
1782	MachineRegisterInfo &RI = F->getRegInfo();
1783	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1784	DebugLoc dl = MI.getDebugLoc();
1785
1786	switch (MI.getOpcode()) {
1787	default:
1788	llvm_unreachable("Invalid shift opcode!");
1789	case AVR::Lsl8:
1790	Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd
1791	RC = &AVR::GPR8RegClass;
1792	HasRepeatedOperand = true;
1793	break;
1794	case AVR::Lsl16:
1795	Opc = AVR::LSLWRd;
1796	RC = &AVR::DREGSRegClass;
1797	break;
1798	case AVR::Asr8:
1799	Opc = AVR::ASRRd;
1800	RC = &AVR::GPR8RegClass;
1801	break;
1802	case AVR::Asr16:
1803	Opc = AVR::ASRWRd;
1804	RC = &AVR::DREGSRegClass;
1805	break;
1806	case AVR::Lsr8:
1807	Opc = AVR::LSRRd;
1808	RC = &AVR::GPR8RegClass;
1809	break;
1810	case AVR::Lsr16:
1811	Opc = AVR::LSRWRd;
1812	RC = &AVR::DREGSRegClass;
1813	break;
1814	case AVR::Rol8:
1815	Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;
1816	RC = &AVR::GPR8RegClass;
1817	break;
1818	case AVR::Rol16:
1819	Opc = AVR::ROLWRd;
1820	RC = &AVR::DREGSRegClass;
1821	break;
1822	case AVR::Ror8:
1823	Opc = AVR::RORBRd;
1824	RC = &AVR::GPR8RegClass;
1825	break;
1826	case AVR::Ror16:
1827	Opc = AVR::RORWRd;
1828	RC = &AVR::DREGSRegClass;
1829	break;
1830	}
1831
1832	const BasicBlock *LLVM_BB = BB->getBasicBlock();
1833
1834	MachineFunction::iterator I;
1835	for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
1836	;
1837	if (I != F->end())
1838	++I;
1839
1840	// Create loop block.
1841	MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1842	MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1843	MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1844
1845	F->insert(MBBI: I, MBB: LoopBB);
1846	F->insert(MBBI: I, MBB: CheckBB);
1847	F->insert(MBBI: I, MBB: RemBB);
1848
1849	// Update machine-CFG edges by transferring all successors of the current
1850	// block to the block containing instructions after shift.
1851	RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator (MI)),
1852	BB->end());
1853	RemBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
1854
1855	// Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB.
1856	BB->addSuccessor(Succ: CheckBB);
1857	LoopBB->addSuccessor(Succ: CheckBB);
1858	CheckBB->addSuccessor(Succ: LoopBB);
1859	CheckBB->addSuccessor(Succ: RemBB);
1860
1861	Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass);
1862	Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass);
1863	Register ShiftReg = RI.createVirtualRegister(RegClass: RC);
1864	Register ShiftReg2 = RI.createVirtualRegister(RegClass: RC);
1865	Register ShiftAmtSrcReg = MI.getOperand(i: `2`).getReg();
1866	Register SrcReg = MI.getOperand(i: `1`).getReg();
1867	Register DstReg = MI.getOperand(i: `0`).getReg();
1868
1869	// BB:
1870	// rjmp CheckBB
1871	BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB);
1872
1873	// LoopBB:
1874	// ShiftReg2 = shift ShiftReg
1875	auto ShiftMI = BuildMI(BB: LoopBB, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg: ShiftReg2).addReg(RegNo: ShiftReg);
1876	if (HasRepeatedOperand)
1877	ShiftMI.addReg(ShiftReg);
1878
1879	// CheckBB:
1880	// ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
1881	// ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
1882	// DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
1883	// ShiftAmt2 = ShiftAmt - 1;
1884	// if (ShiftAmt2 >= 0) goto LoopBB;
1885	BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg)
1886	.addReg(SrcReg)
1887	.addMBB(BB)
1888	.addReg(ShiftReg2)
1889	.addMBB(LoopBB);
1890	BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
1891	.addReg(ShiftAmtSrcReg)
1892	.addMBB(BB)
1893	.addReg(ShiftAmtReg2)
1894	.addMBB(LoopBB);
1895	BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg)
1896	.addReg(SrcReg)
1897	.addMBB(BB)
1898	.addReg(ShiftReg2)
1899	.addMBB(LoopBB);
1900
1901	BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
1902	BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
1903
1904	MI.eraseFromParent(); // The pseudo instruction is gone now.
1905	return RemBB;
1906	}
1907
1908	// Do a multibyte AVR shift. Insert shift instructions and put the output
1909	// registers in the Regs array.
1910	// Because AVR does not have a normal shift instruction (only a single bit shift
1911	// instruction), we have to emulate this behavior with other instructions.
1912	// It first tries large steps (moving registers around) and then smaller steps
1913	// like single bit shifts.
1914	// Large shifts actually reduce the number of shifted registers, so the below
1915	// algorithms have to work independently of the number of registers that are
1916	// shifted.
1917	// For more information and background, see this blogpost:
1918	// https://aykevl.nl/2021/02/avr-bitshift
1919	static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB,
1920	MutableArrayRef<std::pair<Register, int>> Regs,
1921	ISD::NodeType Opc, int64_t ShiftAmt) {
1922	const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1923	const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>();
1924	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
1925	const DebugLoc &dl = MI.getDebugLoc();
1926
1927	const bool ShiftLeft = Opc == ISD::SHL;
1928	const bool ArithmeticShift = Opc == ISD::SRA;
1929
1930	// Zero a register, for use in later operations.
1931	Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1932	BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg)
1933	.addReg(STI.getZeroRegister());
1934
1935	// Do a shift modulo 6 or 7. This is a bit more complicated than most shifts
1936	// and is hard to compose with the rest, so these are special cased.
1937	// The basic idea is to shift one or two bits in the opposite direction and
1938	// then move registers around to get the correct end result.
1939	if (ShiftLeft && (ShiftAmt % `8`) >= `6`) {
1940	// Left shift modulo 6 or 7.
1941
1942	// Create a slice of the registers we're going to modify, to ease working
1943	// with them.
1944	size_t ShiftRegsOffset = ShiftAmt / `8`;
1945	size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset;
1946	MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1947	Regs.slice(ShiftRegsOffset, ShiftRegsSize);
1948
1949	// Shift one to the right, keeping the least significant bit as the carry
1950	// bit.
1951	insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, `1`);
1952
1953	// Rotate the least significant bit from the carry bit into a new register
1954	// (that starts out zero).
1955	Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1956	BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg);
1957
1958	// Shift one more to the right if this is a modulo-6 shift.
1959	if (ShiftAmt % `8` == `6`) {
1960	insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, `1`);
1961	Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1962	BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte);
1963	LowByte = NewLowByte;
1964	}
1965
1966	// Move all registers to the left, zeroing the bottom registers as needed.
1967	for (size_t I = `0`; I < Regs.size(); I++) {
1968	int ShiftRegsIdx = I + `1`;
1969	if (ShiftRegsIdx < (int)ShiftRegs.size()) {
1970	Regs[I] = ShiftRegs[ShiftRegsIdx];
1971	} else if (ShiftRegsIdx == (int)ShiftRegs.size()) {
1972	Regs[I] = std::pair(LowByte, `0`);
1973	} else {
1974	Regs[I] = std::pair(ZeroReg, `0`);
1975	}
1976	}
1977
1978	return;
1979	}
1980
1981	// Right shift modulo 6 or 7.
1982	if (!ShiftLeft && (ShiftAmt % `8`) >= `6`) {
1983	// Create a view on the registers we're going to modify, to ease working
1984	// with them.
1985	size_t ShiftRegsSize = Regs.size() - (ShiftAmt / `8`);
1986	MutableArrayRef<std::pair<Register, int>> ShiftRegs =
1987	Regs.slice(`0`, ShiftRegsSize);
1988
1989	// Shift one to the left.
1990	insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, `1`);
1991
1992	// Sign or zero extend the most significant register into a new register.
1993	// The HighByte is the byte that still has one (or two) bits from the
1994	// original value. The ExtByte is purely a zero/sign extend byte (all bits
1995	// are either 0 or 1).
1996	Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass);
1997	Register ExtByte = `0`;
1998	if (ArithmeticShift) {
1999	// Sign-extend bit that was shifted out last.
2000	BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte)
2001	.addReg(HighByte, RegState::Undef)
2002	.addReg(HighByte, RegState::Undef);
2003	ExtByte = HighByte;
2004	// The highest bit of the original value is the same as the zero-extend
2005	// byte, so HighByte and ExtByte are the same.
2006	} else {
2007	// Use the zero register for zero extending.
2008	ExtByte = ZeroReg;
2009	// Rotate most significant bit into a new register (that starts out zero).
2010	BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte)
2011	.addReg(ExtByte)
2012	.addReg(ExtByte);
2013	}
2014
2015	// Shift one more to the left for modulo 6 shifts.
2016	if (ShiftAmt % `8` == `6`) {
2017	insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, `1`);
2018	// Shift the topmost bit into the HighByte.
2019	Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2020	BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt)
2021	.addReg(HighByte)
2022	.addReg(HighByte);
2023	HighByte = NewExt;
2024	}
2025
2026	// Move all to the right, while sign or zero extending.
2027	for (int I = Regs.size() - `1`; I >= `0`; I--) {
2028	int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - `1`;
2029	if (ShiftRegsIdx >= `0`) {
2030	Regs[I] = ShiftRegs[ShiftRegsIdx];
2031	} else if (ShiftRegsIdx == -`1`) {
2032	Regs[I] = std::pair(HighByte, `0`);
2033	} else {
2034	Regs[I] = std::pair(ExtByte, `0`);
2035	}
2036	}
2037
2038	return;
2039	}
2040
2041	// For shift amounts of at least one register, simply rename the registers and
2042	// zero the bottom registers.
2043	while (ShiftLeft && ShiftAmt >= `8`) {
2044	// Move all registers one to the left.
2045	for (size_t I = `0`; I < Regs.size() - `1`; I++) {
2046	Regs[I] = Regs[I + `1`];
2047	}
2048
2049	// Zero the least significant register.
2050	Regs[Regs.size() - `1`] = std::pair(ZeroReg, `0`);
2051
2052	// Continue shifts with the leftover registers.
2053	Regs = Regs.drop_back(`1`);
2054
2055	ShiftAmt -= `8`;
2056	}
2057
2058	// And again, the same for right shifts.
2059	Register ShrExtendReg = `0`;
2060	if (!ShiftLeft && ShiftAmt >= `8`) {
2061	if (ArithmeticShift) {
2062	// Sign extend the most significant register into ShrExtendReg.
2063	ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2064	Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2065	BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp)
2066	.addReg(Regs[`0`].first, `0`, Regs[`0`].second)
2067	.addReg(Regs[`0`].first, `0`, Regs[`0`].second);
2068	BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg)
2069	.addReg(Tmp)
2070	.addReg(Tmp);
2071	} else {
2072	ShrExtendReg = ZeroReg;
2073	}
2074	for (; ShiftAmt >= `8`; ShiftAmt -= `8`) {
2075	// Move all registers one to the right.
2076	for (size_t I = Regs.size() - `1`; I != `0`; I--) {
2077	Regs[I] = Regs[I - `1`];
2078	}
2079
2080	// Zero or sign extend the most significant register.
2081	Regs[`0`] = std::pair(ShrExtendReg, `0`);
2082
2083	// Continue shifts with the leftover registers.
2084	Regs = Regs.drop_front(`1`);
2085	}
2086	}
2087
2088	// The bigger shifts are already handled above.
2089	assert((ShiftAmt < `8`) && "Unexpect shift amount");
2090
2091	// Shift by four bits, using a complicated swap/eor/andi/eor sequence.
2092	// It only works for logical shifts because the bits shifted in are all
2093	// zeroes.
2094	// To shift a single byte right, it produces code like this:
2095	// swap r0
2096	// andi r0, 0x0f
2097	// For a two-byte (16-bit) shift, it adds the following instructions to shift
2098	// the upper byte into the lower byte:
2099	// swap r1
2100	// eor r0, r1
2101	// andi r1, 0x0f
2102	// eor r0, r1
2103	// For bigger shifts, it repeats the above sequence. For example, for a 3-byte
2104	// (24-bit) shift it adds:
2105	// swap r2
2106	// eor r1, r2
2107	// andi r2, 0x0f
2108	// eor r1, r2
2109	if (!ArithmeticShift && ShiftAmt >= `4`) {
2110	Register Prev = `0`;
2111	for (size_t I = `0`; I < Regs.size(); I++) {
2112	size_t Idx = ShiftLeft ? I : Regs.size() - I - `1`;
2113	Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2114	BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg)
2115	.addReg(Regs[Idx].first, `0`, Regs[Idx].second);
2116	if (I != `0`) {
2117	Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2118	BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2119	.addReg(Prev)
2120	.addReg(SwapReg);
2121	Prev = R;
2122	}
2123	Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass);
2124	BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg)
2125	.addReg(SwapReg)
2126	.addImm(ShiftLeft ? `0xf0` : `0x0f`);
2127	if (I != `0`) {
2128	Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2129	BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R)
2130	.addReg(Prev)
2131	.addReg(AndReg);
2132	size_t PrevIdx = ShiftLeft ? Idx - `1` : Idx + `1`;
2133	Regs[PrevIdx] = std::pair(R, `0`);
2134	}
2135	Prev = AndReg;
2136	Regs[Idx] = std::pair(AndReg, `0`);
2137	}
2138	ShiftAmt -= `4`;
2139	}
2140
2141	// Shift by one. This is the fallback that always works, and the shift
2142	// operation that is used for 1, 2, and 3 bit shifts.
2143	while (ShiftLeft && ShiftAmt) {
2144	// Shift one to the left.
2145	for (ssize_t I = Regs.size() - `1`; I >= `0`; I--) {
2146	Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2147	Register In = Regs[I].first;
2148	Register InSubreg = Regs[I].second;
2149	if (I == (ssize_t)Regs.size() - `1`) { // first iteration
2150	BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out)
2151	.addReg(In, `0`, InSubreg)
2152	.addReg(In, `0`, InSubreg);
2153	} else {
2154	BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out)
2155	.addReg(In, `0`, InSubreg)
2156	.addReg(In, `0`, InSubreg);
2157	}
2158	Regs[I] = std::pair(Out, `0`);
2159	}
2160	ShiftAmt--;
2161	}
2162	while (!ShiftLeft && ShiftAmt) {
2163	// Shift one to the right.
2164	for (size_t I = `0`; I < Regs.size(); I++) {
2165	Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass);
2166	Register In = Regs[I].first;
2167	Register InSubreg = Regs[I].second;
2168	if (I == `0`) {
2169	unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd;
2170	BuildMI(BB&: *BB, I&: MI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg: Out).addReg(RegNo: In, flags: `0`, SubReg: InSubreg);
2171	} else {
2172	BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, `0`, InSubreg);
2173	}
2174	Regs[I] = std::pair(Out, `0`);
2175	}
2176	ShiftAmt--;
2177	}
2178
2179	if (ShiftAmt != `0`) {
2180	llvm_unreachable("don't know how to shift!"); // sanity check
2181	}
2182	}
2183
2184	// Do a wide (32-bit) shift.
2185	MachineBasicBlock *
2186	AVRTargetLowering::insertWideShift(MachineInstr &MI,
2187	MachineBasicBlock BB) const* {
2188	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2189	const DebugLoc &dl = MI.getDebugLoc();
2190
2191	// How much to shift to the right (meaning: a negative number indicates a left
2192	// shift).
2193	int64_t ShiftAmt = MI.getOperand(i: `4`).getImm();
2194	ISD::NodeType Opc;
2195	switch (MI.getOpcode()) {
2196	case AVR::Lsl32:
2197	Opc = ISD::SHL;
2198	break;
2199	case AVR::Lsr32:
2200	Opc = ISD::SRL;
2201	break;
2202	case AVR::Asr32:
2203	Opc = ISD::SRA;
2204	break;
2205	}
2206
2207	// Read the input registers, with the most significant register at index 0.
2208	std::array<std::pair<Register, int>, `4`> Registers = {
2209	std::pair(MI.getOperand(`3`).getReg(), AVR::sub_hi),
2210	std::pair(MI.getOperand(`3`).getReg(), AVR::sub_lo),
2211	std::pair(MI.getOperand(`2`).getReg(), AVR::sub_hi),
2212	std::pair(MI.getOperand(`2`).getReg(), AVR::sub_lo),
2213	};
2214
2215	// Do the shift. The registers are modified in-place.
2216	insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt);
2217
2218	// Combine the 8-bit registers into 16-bit register pairs.
2219	// This done either from LSB to MSB or from MSB to LSB, depending on the
2220	// shift. It's an optimization so that the register allocator will use the
2221	// fewest movs possible (which order we use isn't a correctness issue, just an
2222	// optimization issue).
2223	// - lsl prefers starting from the most significant byte (2nd case).
2224	// - lshr prefers starting from the least significant byte (1st case).
2225	// - for ashr it depends on the number of shifted bytes.
2226	// Some shift operations still don't get the most optimal mov sequences even
2227	// with this distinction. TODO: figure out why and try to fix it (but we're
2228	// already equal to or faster than avr-gcc in all cases except ashr 8).
2229	if (Opc != ISD::SHL &&
2230	(Opc != ISD::SRA \|\| (ShiftAmt < `16` \|\| ShiftAmt >= `22`))) {
2231	// Use the resulting registers starting with the least significant byte.
2232	BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(`0`).getReg())
2233	.addReg(Registers[`3`].first, `0`, Registers[`3`].second)
2234	.addImm(AVR::sub_lo)
2235	.addReg(Registers[`2`].first, `0`, Registers[`2`].second)
2236	.addImm(AVR::sub_hi);
2237	BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(`1`).getReg())
2238	.addReg(Registers[`1`].first, `0`, Registers[`1`].second)
2239	.addImm(AVR::sub_lo)
2240	.addReg(Registers[`0`].first, `0`, Registers[`0`].second)
2241	.addImm(AVR::sub_hi);
2242	} else {
2243	// Use the resulting registers starting with the most significant byte.
2244	BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(`1`).getReg())
2245	.addReg(Registers[`0`].first, `0`, Registers[`0`].second)
2246	.addImm(AVR::sub_hi)
2247	.addReg(Registers[`1`].first, `0`, Registers[`1`].second)
2248	.addImm(AVR::sub_lo);
2249	BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(`0`).getReg())
2250	.addReg(Registers[`2`].first, `0`, Registers[`2`].second)
2251	.addImm(AVR::sub_hi)
2252	.addReg(Registers[`3`].first, `0`, Registers[`3`].second)
2253	.addImm(AVR::sub_lo);
2254	}
2255
2256	// Remove the pseudo instruction.
2257	MI.eraseFromParent();
2258	return BB;
2259	}
2260
2261	static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
2262	if (I->getOpcode() == AVR::COPY) {
2263	Register SrcReg = I ->getOperand(i: `1`).getReg();
2264	return (SrcReg == AVR::R0 \|\| SrcReg == AVR::R1);
2265	}
2266
2267	return false;
2268	}
2269
2270	// The mul instructions wreak havock on our zero_reg R1. We need to clear it
2271	// after the result has been evacuated. This is probably not the best way to do
2272	// it, but it works for now.
2273	MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
2274	MachineBasicBlock BB) const* {
2275	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2276	MachineBasicBlock::iterator I(MI);
2277	++I; // in any case insert after* the mul instruction*
2278	if (isCopyMulResult(I))
2279	++I;
2280	if (isCopyMulResult(I))
2281	++I;
2282	BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)
2283	.addReg(AVR::R1)
2284	.addReg(AVR::R1);
2285	return BB;
2286	}
2287
2288	// Insert a read from the zero register.
2289	MachineBasicBlock *
2290	AVRTargetLowering::insertCopyZero(MachineInstr &MI,
2291	MachineBasicBlock BB) const* {
2292	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2293	MachineBasicBlock::iterator I(MI);
2294	BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
2295	.add(MI.getOperand(`0`))
2296	.addReg(Subtarget.getZeroRegister());
2297	MI.eraseFromParent();
2298	return BB;
2299	}
2300
2301	// Lower atomicrmw operation to disable interrupts, do operation, and restore
2302	// interrupts. This works because all AVR microcontrollers are single core.
2303	MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp(
2304	MachineInstr &MI, MachineBasicBlock BB, unsigned* Opcode, int Width) const {
2305	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2306	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2307	MachineBasicBlock::iterator I(MI);
2308	DebugLoc dl = MI.getDebugLoc();
2309
2310	// Example instruction sequence, for an atomic 8-bit add:
2311	// ldi r25, 5
2312	// in r0, SREG
2313	// cli
2314	// ld r24, X
2315	// add r25, r24
2316	// st X, r25
2317	// out SREG, r0
2318
2319	const TargetRegisterClass *RC =
2320	(Width == `8`) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass;
2321	unsigned LoadOpcode = (Width == `8`) ? AVR::LDRdPtr : AVR::LDWRdPtr;
2322	unsigned StoreOpcode = (Width == `8`) ? AVR::STPtrRr : AVR::STWPtrRr;
2323
2324	// Disable interrupts.
2325	BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister())
2326	.addImm(Subtarget.getIORegSREG());
2327	BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(`7`);
2328
2329	// Load the original value.
2330	BuildMI(BB&: *BB, I, MIMD: dl, MCID: TII.get(Opcode: LoadOpcode), DestReg: MI.getOperand(i: `0`).getReg())
2331	.add(MO: MI.getOperand(i: `1`));
2332
2333	// Do the arithmetic operation.
2334	Register Result = MRI.createVirtualRegister(RegClass: RC);
2335	BuildMI(BB&: *BB, I, MIMD: dl, MCID: TII.get(Opcode), DestReg: Result)
2336	.addReg(RegNo: MI.getOperand(i: `0`).getReg())
2337	.add(MO: MI.getOperand(i: `2`));
2338
2339	// Store the result.
2340	BuildMI(BB&: *BB, I, MIMD: dl, MCID: TII.get(Opcode: StoreOpcode))
2341	.add(MO: MI.getOperand(i: `1`))
2342	.addReg(RegNo: Result);
2343
2344	// Restore interrupts.
2345	BuildMI(*BB, I, dl, TII.get(AVR::OUTARr))
2346	.addImm(Subtarget.getIORegSREG())
2347	.addReg(Subtarget.getTmpRegister());
2348
2349	// Remove the pseudo instruction.
2350	MI.eraseFromParent();
2351	return BB;
2352	}
2353
2354	MachineBasicBlock *
2355	AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2356	MachineBasicBlock MBB) const* {
2357	int Opc = MI.getOpcode();
2358	const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();
2359
2360	// Pseudo shift instructions with a non constant shift amount are expanded
2361	// into a loop.
2362	switch (Opc) {
2363	case AVR::Lsl8:
2364	case AVR::Lsl16:
2365	case AVR::Lsr8:
2366	case AVR::Lsr16:
2367	case AVR::Rol8:
2368	case AVR::Rol16:
2369	case AVR::Ror8:
2370	case AVR::Ror16:
2371	case AVR::Asr8:
2372	case AVR::Asr16:
2373	return insertShift(MI, BB: MBB, Tiny: STI.hasTinyEncoding());
2374	case AVR::Lsl32:
2375	case AVR::Lsr32:
2376	case AVR::Asr32:
2377	return insertWideShift(MI, BB: MBB);
2378	case AVR::MULRdRr:
2379	case AVR::MULSRdRr:
2380	return insertMul(MI, BB: MBB);
2381	case AVR::CopyZero:
2382	return insertCopyZero(MI, BB: MBB);
2383	case AVR::AtomicLoadAdd8:
2384	return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, `8`);
2385	case AVR::AtomicLoadAdd16:
2386	return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, `16`);
2387	case AVR::AtomicLoadSub8:
2388	return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, `8`);
2389	case AVR::AtomicLoadSub16:
2390	return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, `16`);
2391	case AVR::AtomicLoadAnd8:
2392	return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, `8`);
2393	case AVR::AtomicLoadAnd16:
2394	return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, `16`);
2395	case AVR::AtomicLoadOr8:
2396	return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, `8`);
2397	case AVR::AtomicLoadOr16:
2398	return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, `16`);
2399	case AVR::AtomicLoadXor8:
2400	return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, `8`);
2401	case AVR::AtomicLoadXor16:
2402	return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, `16`);
2403	}
2404
2405	assert((Opc == AVR::Select16 \|\| Opc == AVR::Select8) &&
2406	"Unexpected instr type to insert");
2407
2408	const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()
2409	->getParent()
2410	->getSubtarget()
2411	.getInstrInfo();
2412	DebugLoc dl = MI.getDebugLoc();
2413
2414	// To "insert" a SELECT instruction, we insert the diamond
2415	// control-flow pattern. The incoming instruction knows the
2416	// destination vreg to set, the condition code register to branch
2417	// on, the true/false values to select between, and a branch opcode
2418	// to use.
2419
2420	MachineFunction *MF = MBB->getParent();
2421	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2422	MachineBasicBlock *FallThrough = MBB->getFallThrough();
2423
2424	// If the current basic block falls through to another basic block,
2425	// we must insert an unconditional branch to the fallthrough destination
2426	// if we are to insert basic blocks at the prior fallthrough point.
2427	if (FallThrough != nullptr) {
2428	BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough);
2429	}
2430
2431	MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
2432	MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(BB: LLVM_BB);
2433
2434	MachineFunction::iterator I;
2435	for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
2436	;
2437	if (I != MF->end())
2438	++I;
2439	MF->insert(MBBI: I, MBB: trueMBB);
2440	MF->insert(MBBI: I, MBB: falseMBB);
2441
2442	// Set the call frame size on entry to the new basic blocks.
2443	unsigned CallFrameSize = TII.getCallFrameSizeAt(MI);
2444	trueMBB->setCallFrameSize(CallFrameSize);
2445	falseMBB->setCallFrameSize(CallFrameSize);
2446
2447	// Transfer remaining instructions and all successors of the current
2448	// block to the block which will contain the Phi node for the
2449	// select.
2450	trueMBB->splice(trueMBB->begin(), MBB,
2451	std::next(MachineBasicBlock::iterator (MI)), MBB->end());
2452	trueMBB->transferSuccessorsAndUpdatePHIs(FromMBB: MBB);
2453
2454	AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(i: `3`).getImm();
2455	BuildMI(BB: MBB, MIMD: dl, MCID: TII.getBrCond(CC)).addMBB(MBB: trueMBB);
2456	BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);
2457	MBB->addSuccessor(Succ: falseMBB);
2458	MBB->addSuccessor(Succ: trueMBB);
2459
2460	// Unconditionally flow back to the true block
2461	BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);
2462	falseMBB->addSuccessor(Succ: trueMBB);
2463
2464	// Set up the Phi node to determine where we came from
2465	BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
2466	MI.getOperand(`0`).getReg())
2467	.addReg(MI.getOperand(`1`).getReg())
2468	.addMBB(MBB)
2469	.addReg(MI.getOperand(`2`).getReg())
2470	.addMBB(falseMBB);
2471
2472	MI.eraseFromParent(); // The pseudo instruction is gone now.
2473	return trueMBB;
2474	}
2475
2476	//===----------------------------------------------------------------------===//
2477	// Inline Asm Support
2478	//===----------------------------------------------------------------------===//
2479
2480	AVRTargetLowering::ConstraintType
2481	AVRTargetLowering::getConstraintType(StringRef Constraint) const {
2482	if (Constraint.size() == `1`) {
2483	// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
2484	switch (Constraint [`0`]) {
2485	default:
2486	break;
2487	case `'a'`: // Simple upper registers
2488	case `'b'`: // Base pointer registers pairs
2489	case `'d'`: // Upper register
2490	case `'l'`: // Lower registers
2491	case `'e'`: // Pointer register pairs
2492	case `'q'`: // Stack pointer register
2493	case `'r'`: // Any register
2494	case `'w'`: // Special upper register pairs
2495	return C_RegisterClass;
2496	case `'t'`: // Temporary register
2497	case `'x'`:
2498	case `'X'`: // Pointer register pair X
2499	case `'y'`:
2500	case `'Y'`: // Pointer register pair Y
2501	case `'z'`:
2502	case `'Z'`: // Pointer register pair Z
2503	return C_Register;
2504	case `'Q'`: // A memory address based on Y or Z pointer with displacement.
2505	return C_Memory;
2506	case `'G'`: // Floating point constant
2507	case `'I'`: // 6-bit positive integer constant
2508	case `'J'`: // 6-bit negative integer constant
2509	case `'K'`: // Integer constant (Range: 2)
2510	case `'L'`: // Integer constant (Range: 0)
2511	case `'M'`: // 8-bit integer constant
2512	case `'N'`: // Integer constant (Range: -1)
2513	case `'O'`: // Integer constant (Range: 8, 16, 24)
2514	case `'P'`: // Integer constant (Range: 1)
2515	case `'R'`: // Integer constant (Range: -6 to 5)x
2516	return C_Immediate;
2517	}
2518	}
2519
2520	return TargetLowering::getConstraintType(Constraint);
2521	}
2522
2523	InlineAsm::ConstraintCode
2524	AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
2525	// Not sure if this is actually the right thing to do, but we got to do
2526	// something* [agnat]*
2527	switch (ConstraintCode [`0`]) {
2528	case `'Q'`:
2529	return InlineAsm::ConstraintCode::Q;
2530	}
2531	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
2532	}
2533
2534	AVRTargetLowering::ConstraintWeight
2535	AVRTargetLowering::getSingleConstraintMatchWeight(
2536	AsmOperandInfo &info, const char constraint) const* {
2537	ConstraintWeight weight = CW_Invalid;
2538	Value *CallOperandVal = info.CallOperandVal;
2539
2540	// If we don't have a value, we can't do a match,
2541	// but allow it at the lowest weight.
2542	// (this behaviour has been copied from the ARM backend)
2543	if (!CallOperandVal) {
2544	return CW_Default;
2545	}
2546
2547	// Look at the constraint type.
2548	switch (*constraint) {
2549	default:
2550	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
2551	break;
2552	case `'d'`:
2553	case `'r'`:
2554	case `'l'`:
2555	weight = CW_Register;
2556	break;
2557	case `'a'`:
2558	case `'b'`:
2559	case `'e'`:
2560	case `'q'`:
2561	case `'t'`:
2562	case `'w'`:
2563	case `'x'`:
2564	case `'X'`:
2565	case `'y'`:
2566	case `'Y'`:
2567	case `'z'`:
2568	case `'Z'`:
2569	weight = CW_SpecificReg;
2570	break;
2571	case `'G'`:
2572	if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {
2573	if (C->isZero()) {
2574	weight = CW_Constant;
2575	}
2576	}
2577	break;
2578	case `'I'`:
2579	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2580	if (isUInt<`6`>(C->getZExtValue())) {
2581	weight = CW_Constant;
2582	}
2583	}
2584	break;
2585	case `'J'`:
2586	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2587	if ((C->getSExtValue() >= -`63`) && (C->getSExtValue() <= `0`)) {
2588	weight = CW_Constant;
2589	}
2590	}
2591	break;
2592	case `'K'`:
2593	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2594	if (C->getZExtValue() == `2`) {
2595	weight = CW_Constant;
2596	}
2597	}
2598	break;
2599	case `'L'`:
2600	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2601	if (C->getZExtValue() == `0`) {
2602	weight = CW_Constant;
2603	}
2604	}
2605	break;
2606	case `'M'`:
2607	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2608	if (isUInt<`8`>(C->getZExtValue())) {
2609	weight = CW_Constant;
2610	}
2611	}
2612	break;
2613	case `'N'`:
2614	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2615	if (C->getSExtValue() == -`1`) {
2616	weight = CW_Constant;
2617	}
2618	}
2619	break;
2620	case `'O'`:
2621	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2622	if ((C->getZExtValue() == `8`) \|\| (C->getZExtValue() == `16`) \|\|
2623	(C->getZExtValue() == `24`)) {
2624	weight = CW_Constant;
2625	}
2626	}
2627	break;
2628	case `'P'`:
2629	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2630	if (C->getZExtValue() == `1`) {
2631	weight = CW_Constant;
2632	}
2633	}
2634	break;
2635	case `'R'`:
2636	if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
2637	if ((C->getSExtValue() >= -`6`) && (C->getSExtValue() <= `5`)) {
2638	weight = CW_Constant;
2639	}
2640	}
2641	break;
2642	case `'Q'`:
2643	weight = CW_Memory;
2644	break;
2645	}
2646
2647	return weight;
2648	}
2649
2650	std::pair<unsigned, const TargetRegisterClass *>
2651	AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2652	StringRef Constraint,
2653	MVT VT) const {
2654	if (Constraint.size() == `1`) {
2655	switch (Constraint [`0`]) {
2656	case `'a'`: // Simple upper registers r16..r23.
2657	if (VT == MVT::i8)
2658	return std::make_pair(`0U`, &AVR::LD8loRegClass);
2659	else if (VT == MVT::i16)
2660	return std::make_pair(`0U`, &AVR::DREGSLD8loRegClass);
2661	break;
2662	case `'b'`: // Base pointer registers: y, z.
2663	if (VT == MVT::i8 \|\| VT == MVT::i16)
2664	return std::make_pair(`0U`, &AVR::PTRDISPREGSRegClass);
2665	break;
2666	case `'d'`: // Upper registers r16..r31.
2667	if (VT == MVT::i8)
2668	return std::make_pair(`0U`, &AVR::LD8RegClass);
2669	else if (VT == MVT::i16)
2670	return std::make_pair(`0U`, &AVR::DLDREGSRegClass);
2671	break;
2672	case `'l'`: // Lower registers r0..r15.
2673	if (VT == MVT::i8)
2674	return std::make_pair(`0U`, &AVR::GPR8loRegClass);
2675	else if (VT == MVT::i16)
2676	return std::make_pair(`0U`, &AVR::DREGSloRegClass);
2677	break;
2678	case `'e'`: // Pointer register pairs: x, y, z.
2679	if (VT == MVT::i8 \|\| VT == MVT::i16)
2680	return std::make_pair(`0U`, &AVR::PTRREGSRegClass);
2681	break;
2682	case `'q'`: // Stack pointer register: SPH:SPL.
2683	return std::make_pair(`0U`, &AVR::GPRSPRegClass);
2684	case `'r'`: // Any register: r0..r31.
2685	if (VT == MVT::i8)
2686	return std::make_pair(`0U`, &AVR::GPR8RegClass);
2687	else if (VT == MVT::i16)
2688	return std::make_pair(`0U`, &AVR::DREGSRegClass);
2689	break;
2690	case `'t'`: // Temporary register: r0.
2691	if (VT == MVT::i8)
2692	return std::make_pair(unsigned(Subtarget.getTmpRegister()),
2693	&AVR::GPR8RegClass);
2694	break;
2695	case `'w'`: // Special upper register pairs: r24, r26, r28, r30.
2696	if (VT == MVT::i8 \|\| VT == MVT::i16)
2697	return std::make_pair(`0U`, &AVR::IWREGSRegClass);
2698	break;
2699	case `'x'`: // Pointer register pair X: r27:r26.
2700	case `'X'`:
2701	if (VT == MVT::i8 \|\| VT == MVT::i16)
2702	return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);
2703	break;
2704	case `'y'`: // Pointer register pair Y: r29:r28.
2705	case `'Y'`:
2706	if (VT == MVT::i8 \|\| VT == MVT::i16)
2707	return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);
2708	break;
2709	case `'z'`: // Pointer register pair Z: r31:r30.
2710	case `'Z'`:
2711	if (VT == MVT::i8 \|\| VT == MVT::i16)
2712	return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);
2713	break;
2714	default:
2715	break;
2716	}
2717	}
2718
2719	return TargetLowering::getRegForInlineAsmConstraint(
2720	Subtarget.getRegisterInfo(), Constraint, VT);
2721	}
2722
2723	void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2724	StringRef Constraint,
2725	std::vector<SDValue> &Ops,
2726	SelectionDAG &DAG) const {
2727	SDValue Result;
2728	SDLoc DL(Op);
2729	EVT Ty = Op.getValueType();
2730
2731	// Currently only support length 1 constraints.
2732	if (Constraint.size() != `1`) {
2733	return;
2734	}
2735
2736	char ConstraintLetter = Constraint [`0`];
2737	switch (ConstraintLetter) {
2738	default:
2739	break;
2740	// Deal with integers first:
2741	case `'I'`:
2742	case `'J'`:
2743	case `'K'`:
2744	case `'L'`:
2745	case `'M'`:
2746	case `'N'`:
2747	case `'O'`:
2748	case `'P'`:
2749	case `'R'`: {
2750	const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
2751	if (!C) {
2752	return;
2753	}
2754
2755	int64_t CVal64 = C->getSExtValue();
2756	uint64_t CUVal64 = C->getZExtValue();
2757	switch (ConstraintLetter) {
2758	case `'I'`: // 0..63
2759	if (!isUInt<`6`>(CUVal64))
2760	return;
2761	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2762	break;
2763	case `'J'`: // -63..0
2764	if (CVal64 < -`63` \|\| CVal64 > `0`)
2765	return;
2766	Result = DAG.getTargetConstant(Val: CVal64, DL, VT: Ty);
2767	break;
2768	case `'K'`: // 2
2769	if (CUVal64 != `2`)
2770	return;
2771	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2772	break;
2773	case `'L'`: // 0
2774	if (CUVal64 != `0`)
2775	return;
2776	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2777	break;
2778	case `'M'`: // 0..255
2779	if (!isUInt<`8`>(CUVal64))
2780	return;
2781	// i8 type may be printed as a negative number,
2782	// e.g. 254 would be printed as -2,
2783	// so we force it to i16 at least.
2784	if (Ty.getSimpleVT() == MVT::i8) {
2785	Ty = MVT::i16;
2786	}
2787	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2788	break;
2789	case `'N'`: // -1
2790	if (CVal64 != -`1`)
2791	return;
2792	Result = DAG.getTargetConstant(Val: CVal64, DL, VT: Ty);
2793	break;
2794	case `'O'`: // 8, 16, 24
2795	if (CUVal64 != `8` && CUVal64 != `16` && CUVal64 != `24`)
2796	return;
2797	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2798	break;
2799	case `'P'`: // 1
2800	if (CUVal64 != `1`)
2801	return;
2802	Result = DAG.getTargetConstant(Val: CUVal64, DL, VT: Ty);
2803	break;
2804	case `'R'`: // -6..5
2805	if (CVal64 < -`6` \|\| CVal64 > `5`)
2806	return;
2807	Result = DAG.getTargetConstant(Val: CVal64, DL, VT: Ty);
2808	break;
2809	}
2810
2811	break;
2812	}
2813	case `'G'`:
2814	const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);
2815	if (!FC \|\| !FC->isZero())
2816	return;
2817	// Soften float to i8 0
2818	Result = DAG.getTargetConstant(`0`, DL, MVT::i8);
2819	break;
2820	}
2821
2822	if (Result.getNode()) {
2823	Ops.push_back(Result);
2824	return;
2825	}
2826
2827	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops&: Ops, DAG);
2828	}
2829
2830	Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT,
2831	const MachineFunction &MF) const {
2832	Register Reg;
2833
2834	if (VT == LLT::scalar(SizeInBits: `8`)) {
2835	Reg = StringSwitch<unsigned>(RegName)
2836	.Case("r0", AVR::R0)
2837	.Case("r1", AVR::R1)
2838	.Default(`0`);
2839	} else {
2840	Reg = StringSwitch<unsigned>(RegName)
2841	.Case("r0", AVR::R1R0)
2842	.Case("sp", AVR::SP)
2843	.Default(`0`);
2844	}
2845
2846	if (Reg)
2847	return Reg;
2848
2849	report_fatal_error(
2850	reason: Twine("Invalid register name \"" + StringRef (RegName) + "\"."));
2851	}
2852
2853	} // end of namespace llvm
2854

source code of llvm/lib/Target/AVR/AVRISelLowering.cpp