1/* Decompose multiword subregs.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "rtl.h"
27#include "tree.h"
28#include "cfghooks.h"
29#include "df.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "expmed.h"
33#include "insn-config.h"
34#include "emit-rtl.h"
35#include "recog.h"
36#include "cfgrtl.h"
37#include "cfgbuild.h"
38#include "dce.h"
39#include "expr.h"
40#include "tree-pass.h"
41#include "lower-subreg.h"
42#include "rtl-iter.h"
43#include "target.h"
44
45
46/* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74#define LOG_COSTS 0
75#define FORCE_LOWERING 0
76
77/* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79static bitmap decomposable_context;
80
81/* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83static bitmap non_decomposable_context;
84
85/* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89static bitmap subreg_context;
90
91/* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93static vec<bitmap> reg_copy_graph;
94
95struct target_lower_subreg default_target_lower_subreg;
96#if SWITCHABLE_TARGET
97struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99#endif
100
101#define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103#define choices \
104 this_target_lower_subreg->x_choices
105
106/* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109static inline bool
110interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112{
113 *bytes = GET_MODE_SIZE (mode);
114 *words = CEIL (*bytes, UNITS_PER_WORD);
115 return true;
116}
117
118/* RTXes used while computing costs. */
119struct cost_rtxes {
120 /* Source and target registers. */
121 rtx source;
122 rtx target;
123
124 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
125 rtx zext;
126
127 /* A shift of SOURCE. */
128 rtx shift;
129
130 /* A SET of TARGET. */
131 rtx set;
132};
133
134/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
135 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
136
137static int
138shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
139 machine_mode mode, int op1)
140{
141 PUT_CODE (rtxes->shift, code);
142 PUT_MODE (rtxes->shift, mode);
143 PUT_MODE (rtxes->source, mode);
144 XEXP (rtxes->shift, 1) = GEN_INT (op1);
145 return set_src_cost (rtxes->shift, mode, speed_p);
146}
147
148/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
149 to true if it is profitable to split a double-word CODE shift
150 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
151 for speed or size profitability.
152
153 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
154 the cost of moving zero into a word-mode register. WORD_MOVE_COST
155 is the cost of moving between word registers. */
156
157static void
158compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
159 bool *splitting, enum rtx_code code,
160 int word_move_zero_cost, int word_move_cost)
161{
162 int wide_cost, narrow_cost, upper_cost, i;
163
164 for (i = 0; i < BITS_PER_WORD; i++)
165 {
166 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
167 i + BITS_PER_WORD);
168 if (i == 0)
169 narrow_cost = word_move_cost;
170 else
171 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
172
173 if (code != ASHIFTRT)
174 upper_cost = word_move_zero_cost;
175 else if (i == BITS_PER_WORD - 1)
176 upper_cost = word_move_cost;
177 else
178 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
179 BITS_PER_WORD - 1);
180
181 if (LOG_COSTS)
182 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
183 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
184 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
185
186 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
187 splitting[i] = true;
188 }
189}
190
191/* Compute what we should do when optimizing for speed or size; SPEED_P
192 selects which. Use RTXES for computing costs. */
193
194static void
195compute_costs (bool speed_p, struct cost_rtxes *rtxes)
196{
197 unsigned int i;
198 int word_move_zero_cost, word_move_cost;
199
200 PUT_MODE (rtxes->target, word_mode);
201 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
202 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
203
204 SET_SRC (rtxes->set) = rtxes->source;
205 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
206
207 if (LOG_COSTS)
208 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
209 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
210
211 for (i = 0; i < MAX_MACHINE_MODE; i++)
212 {
213 machine_mode mode = (machine_mode) i;
214 unsigned int size, factor;
215 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
216 {
217 unsigned int mode_move_cost;
218
219 PUT_MODE (rtxes->target, mode);
220 PUT_MODE (rtxes->source, mode);
221 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
222
223 if (LOG_COSTS)
224 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
225 GET_MODE_NAME (mode), mode_move_cost,
226 word_move_cost, factor);
227
228 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
229 {
230 choices[speed_p].move_modes_to_split[i] = true;
231 choices[speed_p].something_to_do = true;
232 }
233 }
234 }
235
236 /* For the moves and shifts, the only case that is checked is one
237 where the mode of the target is an integer mode twice the width
238 of the word_mode.
239
240 If it is not profitable to split a double word move then do not
241 even consider the shifts or the zero extension. */
242 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
243 {
244 int zext_cost;
245
246 /* The only case here to check to see if moving the upper part with a
247 zero is cheaper than doing the zext itself. */
248 PUT_MODE (rtxes->source, word_mode);
249 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
250
251 if (LOG_COSTS)
252 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
253 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
254 zext_cost, word_move_cost, word_move_zero_cost);
255
256 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
257 choices[speed_p].splitting_zext = true;
258
259 compute_splitting_shift (speed_p, rtxes,
260 choices[speed_p].splitting_ashift, ASHIFT,
261 word_move_zero_cost, word_move_cost);
262 compute_splitting_shift (speed_p, rtxes,
263 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
264 word_move_zero_cost, word_move_cost);
265 compute_splitting_shift (speed_p, rtxes,
266 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
267 word_move_zero_cost, word_move_cost);
268 }
269}
270
271/* Do one-per-target initialisation. This involves determining
272 which operations on the machine are profitable. If none are found,
273 then the pass just returns when called. */
274
275void
276init_lower_subreg (void)
277{
278 struct cost_rtxes rtxes;
279
280 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
281
282 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
283
284 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
285 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
286 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
287 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
288 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
289
290 if (LOG_COSTS)
291 fprintf (stderr, "\nSize costs\n==========\n\n");
292 compute_costs (false, &rtxes);
293
294 if (LOG_COSTS)
295 fprintf (stderr, "\nSpeed costs\n===========\n\n");
296 compute_costs (true, &rtxes);
297}
298
299static bool
300simple_move_operand (rtx x)
301{
302 if (GET_CODE (x) == SUBREG)
303 x = SUBREG_REG (x);
304
305 if (!OBJECT_P (x))
306 return false;
307
308 if (GET_CODE (x) == LABEL_REF
309 || GET_CODE (x) == SYMBOL_REF
310 || GET_CODE (x) == HIGH
311 || GET_CODE (x) == CONST)
312 return false;
313
314 if (MEM_P (x)
315 && (MEM_VOLATILE_P (x)
316 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
317 return false;
318
319 return true;
320}
321
322/* If INSN is a single set between two objects that we want to split,
323 return the single set. SPEED_P says whether we are optimizing
324 INSN for speed or size.
325
326 INSN should have been passed to recog and extract_insn before this
327 is called. */
328
329static rtx
330simple_move (rtx_insn *insn, bool speed_p)
331{
332 rtx x;
333 rtx set;
334 machine_mode mode;
335
336 if (recog_data.n_operands != 2)
337 return NULL_RTX;
338
339 set = single_set (insn);
340 if (!set)
341 return NULL_RTX;
342
343 x = SET_DEST (set);
344 if (x != recog_data.operand[0] && x != recog_data.operand[1])
345 return NULL_RTX;
346 if (!simple_move_operand (x))
347 return NULL_RTX;
348
349 x = SET_SRC (set);
350 if (x != recog_data.operand[0] && x != recog_data.operand[1])
351 return NULL_RTX;
352 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
353 things like x86 rdtsc which returns a DImode value. */
354 if (GET_CODE (x) != ASM_OPERANDS
355 && !simple_move_operand (x))
356 return NULL_RTX;
357
358 /* We try to decompose in integer modes, to avoid generating
359 inefficient code copying between integer and floating point
360 registers. That means that we can't decompose if this is a
361 non-integer mode for which there is no integer mode of the same
362 size. */
363 mode = GET_MODE (SET_DEST (set));
364 if (!SCALAR_INT_MODE_P (mode)
365 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
366 return NULL_RTX;
367
368 /* Reject PARTIAL_INT modes. They are used for processor specific
369 purposes and it's probably best not to tamper with them. */
370 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
371 return NULL_RTX;
372
373 if (!choices[speed_p].move_modes_to_split[(int) mode])
374 return NULL_RTX;
375
376 return set;
377}
378
379/* If SET is a copy from one multi-word pseudo-register to another,
380 record that in reg_copy_graph. Return whether it is such a
381 copy. */
382
383static bool
384find_pseudo_copy (rtx set)
385{
386 rtx dest = SET_DEST (set);
387 rtx src = SET_SRC (set);
388 unsigned int rd, rs;
389 bitmap b;
390
391 if (!REG_P (dest) || !REG_P (src))
392 return false;
393
394 rd = REGNO (dest);
395 rs = REGNO (src);
396 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
397 return false;
398
399 b = reg_copy_graph[rs];
400 if (b == NULL)
401 {
402 b = BITMAP_ALLOC (NULL);
403 reg_copy_graph[rs] = b;
404 }
405
406 bitmap_set_bit (b, rd);
407
408 return true;
409}
410
411/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
412 where they are copied to another register, add the register to
413 which they are copied to DECOMPOSABLE_CONTEXT. Use
414 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
415 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
416
417static void
418propagate_pseudo_copies (void)
419{
420 auto_bitmap queue, propagate;
421
422 bitmap_copy (queue, decomposable_context);
423 do
424 {
425 bitmap_iterator iter;
426 unsigned int i;
427
428 bitmap_clear (propagate);
429
430 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
431 {
432 bitmap b = reg_copy_graph[i];
433 if (b)
434 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
435 }
436
437 bitmap_and_compl (queue, propagate, decomposable_context);
438 bitmap_ior_into (decomposable_context, propagate);
439 }
440 while (!bitmap_empty_p (queue));
441}
442
443/* A pointer to one of these values is passed to
444 find_decomposable_subregs. */
445
446enum classify_move_insn
447{
448 /* Not a simple move from one location to another. */
449 NOT_SIMPLE_MOVE,
450 /* A simple move we want to decompose. */
451 DECOMPOSABLE_SIMPLE_MOVE,
452 /* Any other simple move. */
453 SIMPLE_MOVE
454};
455
456/* If we find a SUBREG in *LOC which we could use to decompose a
457 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
458 unadorned register which is not a simple pseudo-register copy,
459 DATA will point at the type of move, and we set a bit in
460 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
461
462static void
463find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
464{
465 subrtx_var_iterator::array_type array;
466 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
467 {
468 rtx x = *iter;
469 if (GET_CODE (x) == SUBREG)
470 {
471 rtx inner = SUBREG_REG (x);
472 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
473
474 if (!REG_P (inner))
475 continue;
476
477 regno = REGNO (inner);
478 if (HARD_REGISTER_NUM_P (regno))
479 {
480 iter.skip_subrtxes ();
481 continue;
482 }
483
484 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
485 || !interesting_mode_p (GET_MODE (inner), &inner_size,
486 &inner_words))
487 continue;
488
489 /* We only try to decompose single word subregs of multi-word
490 registers. When we find one, we return -1 to avoid iterating
491 over the inner register.
492
493 ??? This doesn't allow, e.g., DImode subregs of TImode values
494 on 32-bit targets. We would need to record the way the
495 pseudo-register was used, and only decompose if all the uses
496 were the same number and size of pieces. Hopefully this
497 doesn't happen much. */
498
499 if (outer_words == 1 && inner_words > 1)
500 {
501 bitmap_set_bit (decomposable_context, regno);
502 iter.skip_subrtxes ();
503 continue;
504 }
505
506 /* If this is a cast from one mode to another, where the modes
507 have the same size, and they are not tieable, then mark this
508 register as non-decomposable. If we decompose it we are
509 likely to mess up whatever the backend is trying to do. */
510 if (outer_words > 1
511 && outer_size == inner_size
512 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
513 {
514 bitmap_set_bit (non_decomposable_context, regno);
515 bitmap_set_bit (subreg_context, regno);
516 iter.skip_subrtxes ();
517 continue;
518 }
519 }
520 else if (REG_P (x))
521 {
522 unsigned int regno, size, words;
523
524 /* We will see an outer SUBREG before we see the inner REG, so
525 when we see a plain REG here it means a direct reference to
526 the register.
527
528 If this is not a simple copy from one location to another,
529 then we can not decompose this register. If this is a simple
530 copy we want to decompose, and the mode is right,
531 then we mark the register as decomposable.
532 Otherwise we don't say anything about this register --
533 it could be decomposed, but whether that would be
534 profitable depends upon how it is used elsewhere.
535
536 We only set bits in the bitmap for multi-word
537 pseudo-registers, since those are the only ones we care about
538 and it keeps the size of the bitmaps down. */
539
540 regno = REGNO (x);
541 if (!HARD_REGISTER_NUM_P (regno)
542 && interesting_mode_p (GET_MODE (x), &size, &words)
543 && words > 1)
544 {
545 switch (*pcmi)
546 {
547 case NOT_SIMPLE_MOVE:
548 bitmap_set_bit (non_decomposable_context, regno);
549 break;
550 case DECOMPOSABLE_SIMPLE_MOVE:
551 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
552 bitmap_set_bit (decomposable_context, regno);
553 break;
554 case SIMPLE_MOVE:
555 break;
556 default:
557 gcc_unreachable ();
558 }
559 }
560 }
561 else if (MEM_P (x))
562 {
563 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
564
565 /* Any registers used in a MEM do not participate in a
566 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
567 here, and return -1 to block the parent's recursion. */
568 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
569 iter.skip_subrtxes ();
570 }
571 }
572}
573
574/* Decompose REGNO into word-sized components. We smash the REG node
575 in place. This ensures that (1) something goes wrong quickly if we
576 fail to make some replacement, and (2) the debug information inside
577 the symbol table is automatically kept up to date. */
578
579static void
580decompose_register (unsigned int regno)
581{
582 rtx reg;
583 unsigned int size, words, i;
584 rtvec v;
585
586 reg = regno_reg_rtx[regno];
587
588 regno_reg_rtx[regno] = NULL_RTX;
589
590 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
591 gcc_unreachable ();
592
593 v = rtvec_alloc (words);
594 for (i = 0; i < words; ++i)
595 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
596
597 PUT_CODE (reg, CONCATN);
598 XVEC (reg, 0) = v;
599
600 if (dump_file)
601 {
602 fprintf (dump_file, "; Splitting reg %u ->", regno);
603 for (i = 0; i < words; ++i)
604 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
605 fputc ('\n', dump_file);
606 }
607}
608
609/* Get a SUBREG of a CONCATN. */
610
611static rtx
612simplify_subreg_concatn (machine_mode outermode, rtx op,
613 unsigned int byte)
614{
615 unsigned int outer_size, outer_words, inner_size, inner_words;
616 machine_mode innermode, partmode;
617 rtx part;
618 unsigned int final_offset;
619
620 innermode = GET_MODE (op);
621 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
622 || !interesting_mode_p (innermode, &inner_size, &inner_words))
623 gcc_unreachable ();
624
625 gcc_assert (GET_CODE (op) == CONCATN);
626 gcc_assert (byte % outer_size == 0);
627
628 gcc_assert (byte < inner_size);
629 if (outer_size > inner_size)
630 return NULL_RTX;
631
632 inner_size /= XVECLEN (op, 0);
633 part = XVECEXP (op, 0, byte / inner_size);
634 partmode = GET_MODE (part);
635
636 final_offset = byte % inner_size;
637 if (final_offset + outer_size > inner_size)
638 return NULL_RTX;
639
640 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
641 regular CONST_VECTORs. They have vector or integer modes, depending
642 on the capabilities of the target. Cope with them. */
643 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
644 partmode = GET_MODE_INNER (innermode);
645 else if (partmode == VOIDmode)
646 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
647 GET_MODE_CLASS (innermode), 0).require ();
648
649 return simplify_gen_subreg (outermode, part, partmode, final_offset);
650}
651
652/* Wrapper around simplify_gen_subreg which handles CONCATN. */
653
654static rtx
655simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
656 machine_mode innermode, unsigned int byte)
657{
658 rtx ret;
659
660 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
661 If OP is a SUBREG of a CONCATN, then it must be a simple mode
662 change with the same size and offset 0, or it must extract a
663 part. We shouldn't see anything else here. */
664 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
665 {
666 rtx op2;
667
668 if ((GET_MODE_SIZE (GET_MODE (op))
669 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
670 && SUBREG_BYTE (op) == 0)
671 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
672 GET_MODE (SUBREG_REG (op)), byte);
673
674 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
675 SUBREG_BYTE (op));
676 if (op2 == NULL_RTX)
677 {
678 /* We don't handle paradoxical subregs here. */
679 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
680 gcc_assert (!paradoxical_subreg_p (op));
681 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
682 byte + SUBREG_BYTE (op));
683 gcc_assert (op2 != NULL_RTX);
684 return op2;
685 }
686
687 op = op2;
688 gcc_assert (op != NULL_RTX);
689 gcc_assert (innermode == GET_MODE (op));
690 }
691
692 if (GET_CODE (op) == CONCATN)
693 return simplify_subreg_concatn (outermode, op, byte);
694
695 ret = simplify_gen_subreg (outermode, op, innermode, byte);
696
697 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
698 resolve_simple_move will ask for the high part of the paradoxical
699 subreg, which does not have a value. Just return a zero. */
700 if (ret == NULL_RTX
701 && paradoxical_subreg_p (op))
702 return CONST0_RTX (outermode);
703
704 gcc_assert (ret != NULL_RTX);
705 return ret;
706}
707
708/* Return whether we should resolve X into the registers into which it
709 was decomposed. */
710
711static bool
712resolve_reg_p (rtx x)
713{
714 return GET_CODE (x) == CONCATN;
715}
716
717/* Return whether X is a SUBREG of a register which we need to
718 resolve. */
719
720static bool
721resolve_subreg_p (rtx x)
722{
723 if (GET_CODE (x) != SUBREG)
724 return false;
725 return resolve_reg_p (SUBREG_REG (x));
726}
727
728/* Look for SUBREGs in *LOC which need to be decomposed. */
729
730static bool
731resolve_subreg_use (rtx *loc, rtx insn)
732{
733 subrtx_ptr_iterator::array_type array;
734 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
735 {
736 rtx *loc = *iter;
737 rtx x = *loc;
738 if (resolve_subreg_p (x))
739 {
740 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
741 SUBREG_BYTE (x));
742
743 /* It is possible for a note to contain a reference which we can
744 decompose. In this case, return 1 to the caller to indicate
745 that the note must be removed. */
746 if (!x)
747 {
748 gcc_assert (!insn);
749 return true;
750 }
751
752 validate_change (insn, loc, x, 1);
753 iter.skip_subrtxes ();
754 }
755 else if (resolve_reg_p (x))
756 /* Return 1 to the caller to indicate that we found a direct
757 reference to a register which is being decomposed. This can
758 happen inside notes, multiword shift or zero-extend
759 instructions. */
760 return true;
761 }
762
763 return false;
764}
765
766/* Resolve any decomposed registers which appear in register notes on
767 INSN. */
768
769static void
770resolve_reg_notes (rtx_insn *insn)
771{
772 rtx *pnote, note;
773
774 note = find_reg_equal_equiv_note (insn);
775 if (note)
776 {
777 int old_count = num_validated_changes ();
778 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
779 remove_note (insn, note);
780 else
781 if (old_count != num_validated_changes ())
782 df_notes_rescan (insn);
783 }
784
785 pnote = &REG_NOTES (insn);
786 while (*pnote != NULL_RTX)
787 {
788 bool del = false;
789
790 note = *pnote;
791 switch (REG_NOTE_KIND (note))
792 {
793 case REG_DEAD:
794 case REG_UNUSED:
795 if (resolve_reg_p (XEXP (note, 0)))
796 del = true;
797 break;
798
799 default:
800 break;
801 }
802
803 if (del)
804 *pnote = XEXP (note, 1);
805 else
806 pnote = &XEXP (note, 1);
807 }
808}
809
810/* Return whether X can be decomposed into subwords. */
811
812static bool
813can_decompose_p (rtx x)
814{
815 if (REG_P (x))
816 {
817 unsigned int regno = REGNO (x);
818
819 if (HARD_REGISTER_NUM_P (regno))
820 {
821 unsigned int byte, num_bytes, num_words;
822
823 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
824 return false;
825 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
826 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
827 return false;
828 return true;
829 }
830 else
831 return !bitmap_bit_p (subreg_context, regno);
832 }
833
834 return true;
835}
836
837/* Decompose the registers used in a simple move SET within INSN. If
838 we don't change anything, return INSN, otherwise return the start
839 of the sequence of moves. */
840
841static rtx_insn *
842resolve_simple_move (rtx set, rtx_insn *insn)
843{
844 rtx src, dest, real_dest;
845 rtx_insn *insns;
846 machine_mode orig_mode, dest_mode;
847 unsigned int orig_size, words;
848 bool pushing;
849
850 src = SET_SRC (set);
851 dest = SET_DEST (set);
852 orig_mode = GET_MODE (dest);
853
854 if (!interesting_mode_p (orig_mode, &orig_size, &words))
855 gcc_unreachable ();
856 gcc_assert (words > 1);
857
858 start_sequence ();
859
860 /* We have to handle copying from a SUBREG of a decomposed reg where
861 the SUBREG is larger than word size. Rather than assume that we
862 can take a word_mode SUBREG of the destination, we copy to a new
863 register and then copy that to the destination. */
864
865 real_dest = NULL_RTX;
866
867 if (GET_CODE (src) == SUBREG
868 && resolve_reg_p (SUBREG_REG (src))
869 && (SUBREG_BYTE (src) != 0
870 || (GET_MODE_SIZE (orig_mode)
871 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
872 {
873 real_dest = dest;
874 dest = gen_reg_rtx (orig_mode);
875 if (REG_P (real_dest))
876 REG_ATTRS (dest) = REG_ATTRS (real_dest);
877 }
878
879 /* Similarly if we are copying to a SUBREG of a decomposed reg where
880 the SUBREG is larger than word size. */
881
882 if (GET_CODE (dest) == SUBREG
883 && resolve_reg_p (SUBREG_REG (dest))
884 && (SUBREG_BYTE (dest) != 0
885 || (GET_MODE_SIZE (orig_mode)
886 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
887 {
888 rtx reg, smove;
889 rtx_insn *minsn;
890
891 reg = gen_reg_rtx (orig_mode);
892 minsn = emit_move_insn (reg, src);
893 smove = single_set (minsn);
894 gcc_assert (smove != NULL_RTX);
895 resolve_simple_move (smove, minsn);
896 src = reg;
897 }
898
899 /* If we didn't have any big SUBREGS of decomposed registers, and
900 neither side of the move is a register we are decomposing, then
901 we don't have to do anything here. */
902
903 if (src == SET_SRC (set)
904 && dest == SET_DEST (set)
905 && !resolve_reg_p (src)
906 && !resolve_subreg_p (src)
907 && !resolve_reg_p (dest)
908 && !resolve_subreg_p (dest))
909 {
910 end_sequence ();
911 return insn;
912 }
913
914 /* It's possible for the code to use a subreg of a decomposed
915 register while forming an address. We need to handle that before
916 passing the address to emit_move_insn. We pass NULL_RTX as the
917 insn parameter to resolve_subreg_use because we can not validate
918 the insn yet. */
919 if (MEM_P (src) || MEM_P (dest))
920 {
921 int acg;
922
923 if (MEM_P (src))
924 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
925 if (MEM_P (dest))
926 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
927 acg = apply_change_group ();
928 gcc_assert (acg);
929 }
930
931 /* If SRC is a register which we can't decompose, or has side
932 effects, we need to move via a temporary register. */
933
934 if (!can_decompose_p (src)
935 || side_effects_p (src)
936 || GET_CODE (src) == ASM_OPERANDS)
937 {
938 rtx reg;
939
940 reg = gen_reg_rtx (orig_mode);
941
942 if (AUTO_INC_DEC)
943 {
944 rtx_insn *move = emit_move_insn (reg, src);
945 if (MEM_P (src))
946 {
947 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
948 if (note)
949 add_reg_note (move, REG_INC, XEXP (note, 0));
950 }
951 }
952 else
953 emit_move_insn (reg, src);
954
955 src = reg;
956 }
957
958 /* If DEST is a register which we can't decompose, or has side
959 effects, we need to first move to a temporary register. We
960 handle the common case of pushing an operand directly. We also
961 go through a temporary register if it holds a floating point
962 value. This gives us better code on systems which can't move
963 data easily between integer and floating point registers. */
964
965 dest_mode = orig_mode;
966 pushing = push_operand (dest, dest_mode);
967 if (!can_decompose_p (dest)
968 || (side_effects_p (dest) && !pushing)
969 || (!SCALAR_INT_MODE_P (dest_mode)
970 && !resolve_reg_p (dest)
971 && !resolve_subreg_p (dest)))
972 {
973 if (real_dest == NULL_RTX)
974 real_dest = dest;
975 if (!SCALAR_INT_MODE_P (dest_mode))
976 dest_mode = int_mode_for_mode (dest_mode).require ();
977 dest = gen_reg_rtx (dest_mode);
978 if (REG_P (real_dest))
979 REG_ATTRS (dest) = REG_ATTRS (real_dest);
980 }
981
982 if (pushing)
983 {
984 unsigned int i, j, jinc;
985
986 gcc_assert (orig_size % UNITS_PER_WORD == 0);
987 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
988 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
989
990 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
991 {
992 j = 0;
993 jinc = 1;
994 }
995 else
996 {
997 j = words - 1;
998 jinc = -1;
999 }
1000
1001 for (i = 0; i < words; ++i, j += jinc)
1002 {
1003 rtx temp;
1004
1005 temp = copy_rtx (XEXP (dest, 0));
1006 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1007 j * UNITS_PER_WORD);
1008 emit_move_insn (temp,
1009 simplify_gen_subreg_concatn (word_mode, src,
1010 orig_mode,
1011 j * UNITS_PER_WORD));
1012 }
1013 }
1014 else
1015 {
1016 unsigned int i;
1017
1018 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1019 emit_clobber (dest);
1020
1021 for (i = 0; i < words; ++i)
1022 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1023 dest_mode,
1024 i * UNITS_PER_WORD),
1025 simplify_gen_subreg_concatn (word_mode, src,
1026 orig_mode,
1027 i * UNITS_PER_WORD));
1028 }
1029
1030 if (real_dest != NULL_RTX)
1031 {
1032 rtx mdest, smove;
1033 rtx_insn *minsn;
1034
1035 if (dest_mode == orig_mode)
1036 mdest = dest;
1037 else
1038 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1039 minsn = emit_move_insn (real_dest, mdest);
1040
1041 if (AUTO_INC_DEC && MEM_P (real_dest)
1042 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1043 {
1044 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1045 if (note)
1046 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1047 }
1048
1049 smove = single_set (minsn);
1050 gcc_assert (smove != NULL_RTX);
1051
1052 resolve_simple_move (smove, minsn);
1053 }
1054
1055 insns = get_insns ();
1056 end_sequence ();
1057
1058 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1059
1060 emit_insn_before (insns, insn);
1061
1062 /* If we get here via self-recursion, then INSN is not yet in the insns
1063 chain and delete_insn will fail. We only want to remove INSN from the
1064 current sequence. See PR56738. */
1065 if (in_sequence_p ())
1066 remove_insn (insn);
1067 else
1068 delete_insn (insn);
1069
1070 return insns;
1071}
1072
1073/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1074 component registers. Return whether we changed something. */
1075
1076static bool
1077resolve_clobber (rtx pat, rtx_insn *insn)
1078{
1079 rtx reg;
1080 machine_mode orig_mode;
1081 unsigned int orig_size, words, i;
1082 int ret;
1083
1084 reg = XEXP (pat, 0);
1085 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1086 return false;
1087
1088 orig_mode = GET_MODE (reg);
1089 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1090 gcc_unreachable ();
1091
1092 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1093 simplify_gen_subreg_concatn (word_mode, reg,
1094 orig_mode, 0),
1095 0);
1096 df_insn_rescan (insn);
1097 gcc_assert (ret != 0);
1098
1099 for (i = words - 1; i > 0; --i)
1100 {
1101 rtx x;
1102
1103 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1104 i * UNITS_PER_WORD);
1105 x = gen_rtx_CLOBBER (VOIDmode, x);
1106 emit_insn_after (x, insn);
1107 }
1108
1109 resolve_reg_notes (insn);
1110
1111 return true;
1112}
1113
1114/* A USE of a decomposed register is no longer meaningful. Return
1115 whether we changed something. */
1116
1117static bool
1118resolve_use (rtx pat, rtx_insn *insn)
1119{
1120 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1121 {
1122 delete_insn (insn);
1123 return true;
1124 }
1125
1126 resolve_reg_notes (insn);
1127
1128 return false;
1129}
1130
1131/* A VAR_LOCATION can be simplified. */
1132
1133static void
1134resolve_debug (rtx_insn *insn)
1135{
1136 subrtx_ptr_iterator::array_type array;
1137 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1138 {
1139 rtx *loc = *iter;
1140 rtx x = *loc;
1141 if (resolve_subreg_p (x))
1142 {
1143 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1144 SUBREG_BYTE (x));
1145
1146 if (x)
1147 *loc = x;
1148 else
1149 x = copy_rtx (*loc);
1150 }
1151 if (resolve_reg_p (x))
1152 *loc = copy_rtx (x);
1153 }
1154
1155 df_insn_rescan (insn);
1156
1157 resolve_reg_notes (insn);
1158}
1159
1160/* Check if INSN is a decomposable multiword-shift or zero-extend and
1161 set the decomposable_context bitmap accordingly. SPEED_P is true
1162 if we are optimizing INSN for speed rather than size. Return true
1163 if INSN is decomposable. */
1164
1165static bool
1166find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1167{
1168 rtx set;
1169 rtx op;
1170 rtx op_operand;
1171
1172 set = single_set (insn);
1173 if (!set)
1174 return false;
1175
1176 op = SET_SRC (set);
1177 if (GET_CODE (op) != ASHIFT
1178 && GET_CODE (op) != LSHIFTRT
1179 && GET_CODE (op) != ASHIFTRT
1180 && GET_CODE (op) != ZERO_EXTEND)
1181 return false;
1182
1183 op_operand = XEXP (op, 0);
1184 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1185 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1186 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1187 || GET_MODE (op) != twice_word_mode)
1188 return false;
1189
1190 if (GET_CODE (op) == ZERO_EXTEND)
1191 {
1192 if (GET_MODE (op_operand) != word_mode
1193 || !choices[speed_p].splitting_zext)
1194 return false;
1195 }
1196 else /* left or right shift */
1197 {
1198 bool *splitting = (GET_CODE (op) == ASHIFT
1199 ? choices[speed_p].splitting_ashift
1200 : GET_CODE (op) == ASHIFTRT
1201 ? choices[speed_p].splitting_ashiftrt
1202 : choices[speed_p].splitting_lshiftrt);
1203 if (!CONST_INT_P (XEXP (op, 1))
1204 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1205 2 * BITS_PER_WORD - 1)
1206 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1207 return false;
1208
1209 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1210 }
1211
1212 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1213
1214 return true;
1215}
1216
1217/* Decompose a more than word wide shift (in INSN) of a multiword
1218 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1219 and 'set to zero' insn. Return a pointer to the new insn when a
1220 replacement was done. */
1221
1222static rtx_insn *
1223resolve_shift_zext (rtx_insn *insn)
1224{
1225 rtx set;
1226 rtx op;
1227 rtx op_operand;
1228 rtx_insn *insns;
1229 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1230 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1231 scalar_int_mode inner_mode;
1232
1233 set = single_set (insn);
1234 if (!set)
1235 return NULL;
1236
1237 op = SET_SRC (set);
1238 if (GET_CODE (op) != ASHIFT
1239 && GET_CODE (op) != LSHIFTRT
1240 && GET_CODE (op) != ASHIFTRT
1241 && GET_CODE (op) != ZERO_EXTEND)
1242 return NULL;
1243
1244 op_operand = XEXP (op, 0);
1245 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1246 return NULL;
1247
1248 /* We can tear this operation apart only if the regs were already
1249 torn apart. */
1250 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1251 return NULL;
1252
1253 /* src_reg_num is the number of the word mode register which we
1254 are operating on. For a left shift and a zero_extend on little
1255 endian machines this is register 0. */
1256 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1257 ? 1 : 0;
1258
1259 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1260 src_reg_num = 1 - src_reg_num;
1261
1262 if (GET_CODE (op) == ZERO_EXTEND)
1263 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1264 else
1265 dest_reg_num = 1 - src_reg_num;
1266
1267 offset1 = UNITS_PER_WORD * dest_reg_num;
1268 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1269 src_offset = UNITS_PER_WORD * src_reg_num;
1270
1271 start_sequence ();
1272
1273 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1274 GET_MODE (SET_DEST (set)),
1275 offset1);
1276 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1277 GET_MODE (SET_DEST (set)),
1278 offset2);
1279 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1280 GET_MODE (op_operand),
1281 src_offset);
1282 if (GET_CODE (op) == ASHIFTRT
1283 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1284 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1285 BITS_PER_WORD - 1, NULL_RTX, 0);
1286
1287 if (GET_CODE (op) != ZERO_EXTEND)
1288 {
1289 int shift_count = INTVAL (XEXP (op, 1));
1290 if (shift_count > BITS_PER_WORD)
1291 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1292 LSHIFT_EXPR : RSHIFT_EXPR,
1293 word_mode, src_reg,
1294 shift_count - BITS_PER_WORD,
1295 dest_reg, GET_CODE (op) != ASHIFTRT);
1296 }
1297
1298 if (dest_reg != src_reg)
1299 emit_move_insn (dest_reg, src_reg);
1300 if (GET_CODE (op) != ASHIFTRT)
1301 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1302 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1303 emit_move_insn (dest_upper, copy_rtx (src_reg));
1304 else
1305 emit_move_insn (dest_upper, upper_src);
1306 insns = get_insns ();
1307
1308 end_sequence ();
1309
1310 emit_insn_before (insns, insn);
1311
1312 if (dump_file)
1313 {
1314 rtx_insn *in;
1315 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1316 for (in = insns; in != insn; in = NEXT_INSN (in))
1317 fprintf (dump_file, "%d ", INSN_UID (in));
1318 fprintf (dump_file, "\n");
1319 }
1320
1321 delete_insn (insn);
1322 return insns;
1323}
1324
1325/* Print to dump_file a description of what we're doing with shift code CODE.
1326 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1327
1328static void
1329dump_shift_choices (enum rtx_code code, bool *splitting)
1330{
1331 int i;
1332 const char *sep;
1333
1334 fprintf (dump_file,
1335 " Splitting mode %s for %s lowering with shift amounts = ",
1336 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1337 sep = "";
1338 for (i = 0; i < BITS_PER_WORD; i++)
1339 if (splitting[i])
1340 {
1341 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1342 sep = ",";
1343 }
1344 fprintf (dump_file, "\n");
1345}
1346
1347/* Print to dump_file a description of what we're doing when optimizing
1348 for speed or size; SPEED_P says which. DESCRIPTION is a description
1349 of the SPEED_P choice. */
1350
1351static void
1352dump_choices (bool speed_p, const char *description)
1353{
1354 unsigned int size, factor, i;
1355
1356 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1357
1358 for (i = 0; i < MAX_MACHINE_MODE; i++)
1359 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1360 && factor > 1)
1361 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1362 choices[speed_p].move_modes_to_split[i]
1363 ? "Splitting"
1364 : "Skipping",
1365 GET_MODE_NAME ((machine_mode) i));
1366
1367 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1368 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1369 GET_MODE_NAME (twice_word_mode));
1370
1371 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1372 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1373 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1374 fprintf (dump_file, "\n");
1375}
1376
1377/* Look for registers which are always accessed via word-sized SUBREGs
1378 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1379 registers into several word-sized pseudo-registers. */
1380
1381static void
1382decompose_multiword_subregs (bool decompose_copies)
1383{
1384 unsigned int max;
1385 basic_block bb;
1386 bool speed_p;
1387
1388 if (dump_file)
1389 {
1390 dump_choices (false, "size");
1391 dump_choices (true, "speed");
1392 }
1393
1394 /* Check if this target even has any modes to consider lowering. */
1395 if (!choices[false].something_to_do && !choices[true].something_to_do)
1396 {
1397 if (dump_file)
1398 fprintf (dump_file, "Nothing to do!\n");
1399 return;
1400 }
1401
1402 max = max_reg_num ();
1403
1404 /* First see if there are any multi-word pseudo-registers. If there
1405 aren't, there is nothing we can do. This should speed up this
1406 pass in the normal case, since it should be faster than scanning
1407 all the insns. */
1408 {
1409 unsigned int i;
1410 bool useful_modes_seen = false;
1411
1412 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1413 if (regno_reg_rtx[i] != NULL)
1414 {
1415 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1416 if (choices[false].move_modes_to_split[(int) mode]
1417 || choices[true].move_modes_to_split[(int) mode])
1418 {
1419 useful_modes_seen = true;
1420 break;
1421 }
1422 }
1423
1424 if (!useful_modes_seen)
1425 {
1426 if (dump_file)
1427 fprintf (dump_file, "Nothing to lower in this function.\n");
1428 return;
1429 }
1430 }
1431
1432 if (df)
1433 {
1434 df_set_flags (DF_DEFER_INSN_RESCAN);
1435 run_word_dce ();
1436 }
1437
1438 /* FIXME: It may be possible to change this code to look for each
1439 multi-word pseudo-register and to find each insn which sets or
1440 uses that register. That should be faster than scanning all the
1441 insns. */
1442
1443 decomposable_context = BITMAP_ALLOC (NULL);
1444 non_decomposable_context = BITMAP_ALLOC (NULL);
1445 subreg_context = BITMAP_ALLOC (NULL);
1446
1447 reg_copy_graph.create (max);
1448 reg_copy_graph.safe_grow_cleared (max);
1449 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1450
1451 speed_p = optimize_function_for_speed_p (cfun);
1452 FOR_EACH_BB_FN (bb, cfun)
1453 {
1454 rtx_insn *insn;
1455
1456 FOR_BB_INSNS (bb, insn)
1457 {
1458 rtx set;
1459 enum classify_move_insn cmi;
1460 int i, n;
1461
1462 if (!INSN_P (insn)
1463 || GET_CODE (PATTERN (insn)) == CLOBBER
1464 || GET_CODE (PATTERN (insn)) == USE)
1465 continue;
1466
1467 recog_memoized (insn);
1468
1469 if (find_decomposable_shift_zext (insn, speed_p))
1470 continue;
1471
1472 extract_insn (insn);
1473
1474 set = simple_move (insn, speed_p);
1475
1476 if (!set)
1477 cmi = NOT_SIMPLE_MOVE;
1478 else
1479 {
1480 /* We mark pseudo-to-pseudo copies as decomposable during the
1481 second pass only. The first pass is so early that there is
1482 good chance such moves will be optimized away completely by
1483 subsequent optimizations anyway.
1484
1485 However, we call find_pseudo_copy even during the first pass
1486 so as to properly set up the reg_copy_graph. */
1487 if (find_pseudo_copy (set))
1488 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1489 else
1490 cmi = SIMPLE_MOVE;
1491 }
1492
1493 n = recog_data.n_operands;
1494 for (i = 0; i < n; ++i)
1495 {
1496 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1497
1498 /* We handle ASM_OPERANDS as a special case to support
1499 things like x86 rdtsc which returns a DImode value.
1500 We can decompose the output, which will certainly be
1501 operand 0, but not the inputs. */
1502
1503 if (cmi == SIMPLE_MOVE
1504 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1505 {
1506 gcc_assert (i == 0);
1507 cmi = NOT_SIMPLE_MOVE;
1508 }
1509 }
1510 }
1511 }
1512
1513 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1514 if (!bitmap_empty_p (decomposable_context))
1515 {
1516 unsigned int i;
1517 sbitmap_iterator sbi;
1518 bitmap_iterator iter;
1519 unsigned int regno;
1520
1521 propagate_pseudo_copies ();
1522
1523 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1524 bitmap_clear (sub_blocks);
1525
1526 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1527 decompose_register (regno);
1528
1529 FOR_EACH_BB_FN (bb, cfun)
1530 {
1531 rtx_insn *insn;
1532
1533 FOR_BB_INSNS (bb, insn)
1534 {
1535 rtx pat;
1536
1537 if (!INSN_P (insn))
1538 continue;
1539
1540 pat = PATTERN (insn);
1541 if (GET_CODE (pat) == CLOBBER)
1542 resolve_clobber (pat, insn);
1543 else if (GET_CODE (pat) == USE)
1544 resolve_use (pat, insn);
1545 else if (DEBUG_INSN_P (insn))
1546 resolve_debug (insn);
1547 else
1548 {
1549 rtx set;
1550 int i;
1551
1552 recog_memoized (insn);
1553 extract_insn (insn);
1554
1555 set = simple_move (insn, speed_p);
1556 if (set)
1557 {
1558 rtx_insn *orig_insn = insn;
1559 bool cfi = control_flow_insn_p (insn);
1560
1561 /* We can end up splitting loads to multi-word pseudos
1562 into separate loads to machine word size pseudos.
1563 When this happens, we first had one load that can
1564 throw, and after resolve_simple_move we'll have a
1565 bunch of loads (at least two). All those loads may
1566 trap if we can have non-call exceptions, so they
1567 all will end the current basic block. We split the
1568 block after the outer loop over all insns, but we
1569 make sure here that we will be able to split the
1570 basic block and still produce the correct control
1571 flow graph for it. */
1572 gcc_assert (!cfi
1573 || (cfun->can_throw_non_call_exceptions
1574 && can_throw_internal (insn)));
1575
1576 insn = resolve_simple_move (set, insn);
1577 if (insn != orig_insn)
1578 {
1579 recog_memoized (insn);
1580 extract_insn (insn);
1581
1582 if (cfi)
1583 bitmap_set_bit (sub_blocks, bb->index);
1584 }
1585 }
1586 else
1587 {
1588 rtx_insn *decomposed_shift;
1589
1590 decomposed_shift = resolve_shift_zext (insn);
1591 if (decomposed_shift != NULL_RTX)
1592 {
1593 insn = decomposed_shift;
1594 recog_memoized (insn);
1595 extract_insn (insn);
1596 }
1597 }
1598
1599 for (i = recog_data.n_operands - 1; i >= 0; --i)
1600 resolve_subreg_use (recog_data.operand_loc[i], insn);
1601
1602 resolve_reg_notes (insn);
1603
1604 if (num_validated_changes () > 0)
1605 {
1606 for (i = recog_data.n_dups - 1; i >= 0; --i)
1607 {
1608 rtx *pl = recog_data.dup_loc[i];
1609 int dup_num = recog_data.dup_num[i];
1610 rtx *px = recog_data.operand_loc[dup_num];
1611
1612 validate_unshare_change (insn, pl, *px, 1);
1613 }
1614
1615 i = apply_change_group ();
1616 gcc_assert (i);
1617 }
1618 }
1619 }
1620 }
1621
1622 /* If we had insns to split that caused control flow insns in the middle
1623 of a basic block, split those blocks now. Note that we only handle
1624 the case where splitting a load has caused multiple possibly trapping
1625 loads to appear. */
1626 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1627 {
1628 rtx_insn *insn, *end;
1629 edge fallthru;
1630
1631 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1632 insn = BB_HEAD (bb);
1633 end = BB_END (bb);
1634
1635 while (insn != end)
1636 {
1637 if (control_flow_insn_p (insn))
1638 {
1639 /* Split the block after insn. There will be a fallthru
1640 edge, which is OK so we keep it. We have to create the
1641 exception edges ourselves. */
1642 fallthru = split_block (bb, insn);
1643 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1644 bb = fallthru->dest;
1645 insn = BB_HEAD (bb);
1646 }
1647 else
1648 insn = NEXT_INSN (insn);
1649 }
1650 }
1651 }
1652
1653 {
1654 unsigned int i;
1655 bitmap b;
1656
1657 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1658 if (b)
1659 BITMAP_FREE (b);
1660 }
1661
1662 reg_copy_graph.release ();
1663
1664 BITMAP_FREE (decomposable_context);
1665 BITMAP_FREE (non_decomposable_context);
1666 BITMAP_FREE (subreg_context);
1667}
1668
1669/* Implement first lower subreg pass. */
1670
1671namespace {
1672
1673const pass_data pass_data_lower_subreg =
1674{
1675 RTL_PASS, /* type */
1676 "subreg1", /* name */
1677 OPTGROUP_NONE, /* optinfo_flags */
1678 TV_LOWER_SUBREG, /* tv_id */
1679 0, /* properties_required */
1680 0, /* properties_provided */
1681 0, /* properties_destroyed */
1682 0, /* todo_flags_start */
1683 0, /* todo_flags_finish */
1684};
1685
1686class pass_lower_subreg : public rtl_opt_pass
1687{
1688public:
1689 pass_lower_subreg (gcc::context *ctxt)
1690 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1691 {}
1692
1693 /* opt_pass methods: */
1694 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1695 virtual unsigned int execute (function *)
1696 {
1697 decompose_multiword_subregs (false);
1698 return 0;
1699 }
1700
1701}; // class pass_lower_subreg
1702
1703} // anon namespace
1704
1705rtl_opt_pass *
1706make_pass_lower_subreg (gcc::context *ctxt)
1707{
1708 return new pass_lower_subreg (ctxt);
1709}
1710
1711/* Implement second lower subreg pass. */
1712
1713namespace {
1714
1715const pass_data pass_data_lower_subreg2 =
1716{
1717 RTL_PASS, /* type */
1718 "subreg2", /* name */
1719 OPTGROUP_NONE, /* optinfo_flags */
1720 TV_LOWER_SUBREG, /* tv_id */
1721 0, /* properties_required */
1722 0, /* properties_provided */
1723 0, /* properties_destroyed */
1724 0, /* todo_flags_start */
1725 TODO_df_finish, /* todo_flags_finish */
1726};
1727
1728class pass_lower_subreg2 : public rtl_opt_pass
1729{
1730public:
1731 pass_lower_subreg2 (gcc::context *ctxt)
1732 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1733 {}
1734
1735 /* opt_pass methods: */
1736 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1737 virtual unsigned int execute (function *)
1738 {
1739 decompose_multiword_subregs (true);
1740 return 0;
1741 }
1742
1743}; // class pass_lower_subreg2
1744
1745} // anon namespace
1746
1747rtl_opt_pass *
1748make_pass_lower_subreg2 (gcc::context *ctxt)
1749{
1750 return new pass_lower_subreg2 (ctxt);
1751}
1752