Warning: That file was not part of the compilation database. It may have many parsing errors.

1/* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "stringpool.h"
30#include "attribs.h"
31#include "memmodel.h"
32#include "gimple.h"
33#include "df.h"
34#include "predict.h"
35#include "tm_p.h"
36#include "ssa.h"
37#include "expmed.h"
38#include "optabs.h"
39#include "regs.h"
40#include "emit-rtl.h"
41#include "recog.h"
42#include "diagnostic-core.h"
43#include "alias.h"
44#include "fold-const.h"
45#include "stor-layout.h"
46#include "calls.h"
47#include "varasm.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "explow.h"
51#include "expr.h"
52#include "reload.h"
53#include "except.h"
54#include "common/common-target.h"
55#include "debug.h"
56#include "langhooks.h"
57#include "cfgrtl.h"
58#include "tree-pass.h"
59#include "context.h"
60#include "gimple-iterator.h"
61#include "gimplify.h"
62#include "tree-stdarg.h"
63#include "tm-constrs.h"
64#include "libfuncs.h"
65#include "params.h"
66#include "builtins.h"
67#include "rtl-iter.h"
68
69/* This file should be included last. */
70#include "target-def.h"
71
72/* Specify which cpu to schedule for. */
73enum processor_type alpha_tune;
74
75/* Which cpu we're generating code for. */
76enum processor_type alpha_cpu;
77
78static const char * const alpha_cpu_name[] =
79{
80 "ev4", "ev5", "ev6"
81};
82
83/* Specify how accurate floating-point traps need to be. */
84
85enum alpha_trap_precision alpha_tp;
86
87/* Specify the floating-point rounding mode. */
88
89enum alpha_fp_rounding_mode alpha_fprm;
90
91/* Specify which things cause traps. */
92
93enum alpha_fp_trap_mode alpha_fptm;
94
95/* Nonzero if inside of a function, because the Alpha asm can't
96 handle .files inside of functions. */
97
98static int inside_function = FALSE;
99
100/* The number of cycles of latency we should assume on memory reads. */
101
102static int alpha_memory_latency = 3;
103
104/* Whether the function needs the GP. */
105
106static int alpha_function_needs_gp;
107
108/* The assembler name of the current function. */
109
110static const char *alpha_fnname;
111
112/* The next explicit relocation sequence number. */
113extern GTY(()) int alpha_next_sequence_number;
114int alpha_next_sequence_number = 1;
115
116/* The literal and gpdisp sequence numbers for this insn, as printed
117 by %# and %* respectively. */
118extern GTY(()) int alpha_this_literal_sequence_number;
119extern GTY(()) int alpha_this_gpdisp_sequence_number;
120int alpha_this_literal_sequence_number;
121int alpha_this_gpdisp_sequence_number;
122
123/* Costs of various operations on the different architectures. */
124
125struct alpha_rtx_cost_data
126{
127 unsigned char fp_add;
128 unsigned char fp_mult;
129 unsigned char fp_div_sf;
130 unsigned char fp_div_df;
131 unsigned char int_mult_si;
132 unsigned char int_mult_di;
133 unsigned char int_shift;
134 unsigned char int_cmov;
135 unsigned short int_div;
136};
137
138static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
139{
140 { /* EV4 */
141 COSTS_N_INSNS (6), /* fp_add */
142 COSTS_N_INSNS (6), /* fp_mult */
143 COSTS_N_INSNS (34), /* fp_div_sf */
144 COSTS_N_INSNS (63), /* fp_div_df */
145 COSTS_N_INSNS (23), /* int_mult_si */
146 COSTS_N_INSNS (23), /* int_mult_di */
147 COSTS_N_INSNS (2), /* int_shift */
148 COSTS_N_INSNS (2), /* int_cmov */
149 COSTS_N_INSNS (97), /* int_div */
150 },
151 { /* EV5 */
152 COSTS_N_INSNS (4), /* fp_add */
153 COSTS_N_INSNS (4), /* fp_mult */
154 COSTS_N_INSNS (15), /* fp_div_sf */
155 COSTS_N_INSNS (22), /* fp_div_df */
156 COSTS_N_INSNS (8), /* int_mult_si */
157 COSTS_N_INSNS (12), /* int_mult_di */
158 COSTS_N_INSNS (1) + 1, /* int_shift */
159 COSTS_N_INSNS (1), /* int_cmov */
160 COSTS_N_INSNS (83), /* int_div */
161 },
162 { /* EV6 */
163 COSTS_N_INSNS (4), /* fp_add */
164 COSTS_N_INSNS (4), /* fp_mult */
165 COSTS_N_INSNS (12), /* fp_div_sf */
166 COSTS_N_INSNS (15), /* fp_div_df */
167 COSTS_N_INSNS (7), /* int_mult_si */
168 COSTS_N_INSNS (7), /* int_mult_di */
169 COSTS_N_INSNS (1), /* int_shift */
170 COSTS_N_INSNS (2), /* int_cmov */
171 COSTS_N_INSNS (86), /* int_div */
172 },
173};
174
175/* Similar but tuned for code size instead of execution latency. The
176 extra +N is fractional cost tuning based on latency. It's used to
177 encourage use of cheaper insns like shift, but only if there's just
178 one of them. */
179
180static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
181{
182 COSTS_N_INSNS (1), /* fp_add */
183 COSTS_N_INSNS (1), /* fp_mult */
184 COSTS_N_INSNS (1), /* fp_div_sf */
185 COSTS_N_INSNS (1) + 1, /* fp_div_df */
186 COSTS_N_INSNS (1) + 1, /* int_mult_si */
187 COSTS_N_INSNS (1) + 2, /* int_mult_di */
188 COSTS_N_INSNS (1), /* int_shift */
189 COSTS_N_INSNS (1), /* int_cmov */
190 COSTS_N_INSNS (6), /* int_div */
191};
192
193/* Get the number of args of a function in one of two ways. */
194#if TARGET_ABI_OPEN_VMS
195#define NUM_ARGS crtl->args.info.num_args
196#else
197#define NUM_ARGS crtl->args.info
198#endif
199
200#define REG_PV 27
201#define REG_RA 26
202
203/* Declarations of static functions. */
204static struct machine_function *alpha_init_machine_status (void);
205static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
206static void alpha_handle_trap_shadows (void);
207static void alpha_align_insns (void);
208static void alpha_override_options_after_change (void);
209
210#if TARGET_ABI_OPEN_VMS
211static void alpha_write_linkage (FILE *, const char *);
212static bool vms_valid_pointer_mode (scalar_int_mode);
213#else
214#define vms_patch_builtins() gcc_unreachable()
215#endif
216
217static unsigned int
218rest_of_handle_trap_shadows (void)
219{
220 alpha_handle_trap_shadows ();
221 return 0;
222}
223
224namespace {
225
226const pass_data pass_data_handle_trap_shadows =
227{
228 RTL_PASS,
229 "trap_shadows", /* name */
230 OPTGROUP_NONE, /* optinfo_flags */
231 TV_NONE, /* tv_id */
232 0, /* properties_required */
233 0, /* properties_provided */
234 0, /* properties_destroyed */
235 0, /* todo_flags_start */
236 TODO_df_finish, /* todo_flags_finish */
237};
238
239class pass_handle_trap_shadows : public rtl_opt_pass
240{
241public:
242 pass_handle_trap_shadows(gcc::context *ctxt)
243 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
244 {}
245
246 /* opt_pass methods: */
247 virtual bool gate (function *)
248 {
249 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
250 }
251
252 virtual unsigned int execute (function *)
253 {
254 return rest_of_handle_trap_shadows ();
255 }
256
257}; // class pass_handle_trap_shadows
258
259} // anon namespace
260
261rtl_opt_pass *
262make_pass_handle_trap_shadows (gcc::context *ctxt)
263{
264 return new pass_handle_trap_shadows (ctxt);
265}
266
267static unsigned int
268rest_of_align_insns (void)
269{
270 alpha_align_insns ();
271 return 0;
272}
273
274namespace {
275
276const pass_data pass_data_align_insns =
277{
278 RTL_PASS,
279 "align_insns", /* name */
280 OPTGROUP_NONE, /* optinfo_flags */
281 TV_NONE, /* tv_id */
282 0, /* properties_required */
283 0, /* properties_provided */
284 0, /* properties_destroyed */
285 0, /* todo_flags_start */
286 TODO_df_finish, /* todo_flags_finish */
287};
288
289class pass_align_insns : public rtl_opt_pass
290{
291public:
292 pass_align_insns(gcc::context *ctxt)
293 : rtl_opt_pass(pass_data_align_insns, ctxt)
294 {}
295
296 /* opt_pass methods: */
297 virtual bool gate (function *)
298 {
299 /* Due to the number of extra trapb insns, don't bother fixing up
300 alignment when trap precision is instruction. Moreover, we can
301 only do our job when sched2 is run. */
302 return ((alpha_tune == PROCESSOR_EV4
303 || alpha_tune == PROCESSOR_EV5)
304 && optimize && !optimize_size
305 && alpha_tp != ALPHA_TP_INSN
306 && flag_schedule_insns_after_reload);
307 }
308
309 virtual unsigned int execute (function *)
310 {
311 return rest_of_align_insns ();
312 }
313
314}; // class pass_align_insns
315
316} // anon namespace
317
318rtl_opt_pass *
319make_pass_align_insns (gcc::context *ctxt)
320{
321 return new pass_align_insns (ctxt);
322}
323
324#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
325/* Implement TARGET_MANGLE_TYPE. */
326
327static const char *
328alpha_mangle_type (const_tree type)
329{
330 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
331 && TARGET_LONG_DOUBLE_128)
332 return "g";
333
334 /* For all other types, use normal C++ mangling. */
335 return NULL;
336}
337#endif
338
339/* Parse target option strings. */
340
341static void
342alpha_option_override (void)
343{
344 static const struct cpu_table {
345 const char *const name;
346 const enum processor_type processor;
347 const int flags;
348 const unsigned short line_size; /* in bytes */
349 const unsigned short l1_size; /* in kb. */
350 const unsigned short l2_size; /* in kb. */
351 } cpu_table[] = {
352 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
353 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
354 had 64k to 8M 8-byte direct Bcache. */
355 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
356 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
357 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
358
359 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
360 and 1M to 16M 64 byte L3 (not modeled).
361 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
362 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
363 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
364 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
365 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
366 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
367 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
368 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
369 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
370
371 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
372 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
373 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
374 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
375 64, 64, 16*1024 },
376 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
377 64, 64, 16*1024 }
378 };
379
380 int const ct_size = ARRAY_SIZE (cpu_table);
381 int line_size = 0, l1_size = 0, l2_size = 0;
382 int i;
383
384#ifdef SUBTARGET_OVERRIDE_OPTIONS
385 SUBTARGET_OVERRIDE_OPTIONS;
386#endif
387
388 /* Default to full IEEE compliance mode for Go language. */
389 if (strcmp (lang_hooks.name, "GNU Go") == 0
390 && !(target_flags_explicit & MASK_IEEE))
391 target_flags |= MASK_IEEE;
392
393 alpha_fprm = ALPHA_FPRM_NORM;
394 alpha_tp = ALPHA_TP_PROG;
395 alpha_fptm = ALPHA_FPTM_N;
396
397 if (TARGET_IEEE)
398 {
399 alpha_tp = ALPHA_TP_INSN;
400 alpha_fptm = ALPHA_FPTM_SU;
401 }
402 if (TARGET_IEEE_WITH_INEXACT)
403 {
404 alpha_tp = ALPHA_TP_INSN;
405 alpha_fptm = ALPHA_FPTM_SUI;
406 }
407
408 if (alpha_tp_string)
409 {
410 if (! strcmp (alpha_tp_string, "p"))
411 alpha_tp = ALPHA_TP_PROG;
412 else if (! strcmp (alpha_tp_string, "f"))
413 alpha_tp = ALPHA_TP_FUNC;
414 else if (! strcmp (alpha_tp_string, "i"))
415 alpha_tp = ALPHA_TP_INSN;
416 else
417 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
418 }
419
420 if (alpha_fprm_string)
421 {
422 if (! strcmp (alpha_fprm_string, "n"))
423 alpha_fprm = ALPHA_FPRM_NORM;
424 else if (! strcmp (alpha_fprm_string, "m"))
425 alpha_fprm = ALPHA_FPRM_MINF;
426 else if (! strcmp (alpha_fprm_string, "c"))
427 alpha_fprm = ALPHA_FPRM_CHOP;
428 else if (! strcmp (alpha_fprm_string,"d"))
429 alpha_fprm = ALPHA_FPRM_DYN;
430 else
431 error ("bad value %qs for -mfp-rounding-mode switch",
432 alpha_fprm_string);
433 }
434
435 if (alpha_fptm_string)
436 {
437 if (strcmp (alpha_fptm_string, "n") == 0)
438 alpha_fptm = ALPHA_FPTM_N;
439 else if (strcmp (alpha_fptm_string, "u") == 0)
440 alpha_fptm = ALPHA_FPTM_U;
441 else if (strcmp (alpha_fptm_string, "su") == 0)
442 alpha_fptm = ALPHA_FPTM_SU;
443 else if (strcmp (alpha_fptm_string, "sui") == 0)
444 alpha_fptm = ALPHA_FPTM_SUI;
445 else
446 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
447 }
448
449 if (alpha_cpu_string)
450 {
451 for (i = 0; i < ct_size; i++)
452 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
453 {
454 alpha_tune = alpha_cpu = cpu_table[i].processor;
455 line_size = cpu_table[i].line_size;
456 l1_size = cpu_table[i].l1_size;
457 l2_size = cpu_table[i].l2_size;
458 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
459 target_flags |= cpu_table[i].flags;
460 break;
461 }
462 if (i == ct_size)
463 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
464 }
465
466 if (alpha_tune_string)
467 {
468 for (i = 0; i < ct_size; i++)
469 if (! strcmp (alpha_tune_string, cpu_table [i].name))
470 {
471 alpha_tune = cpu_table[i].processor;
472 line_size = cpu_table[i].line_size;
473 l1_size = cpu_table[i].l1_size;
474 l2_size = cpu_table[i].l2_size;
475 break;
476 }
477 if (i == ct_size)
478 error ("bad value %qs for -mtune switch", alpha_tune_string);
479 }
480
481 if (line_size)
482 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
483 global_options.x_param_values,
484 global_options_set.x_param_values);
485 if (l1_size)
486 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
487 global_options.x_param_values,
488 global_options_set.x_param_values);
489 if (l2_size)
490 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
491 global_options.x_param_values,
492 global_options_set.x_param_values);
493
494 /* Do some sanity checks on the above options. */
495
496 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
497 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
498 {
499 warning (0, "fp software completion requires -mtrap-precision=i");
500 alpha_tp = ALPHA_TP_INSN;
501 }
502
503 if (alpha_cpu == PROCESSOR_EV6)
504 {
505 /* Except for EV6 pass 1 (not released), we always have precise
506 arithmetic traps. Which means we can do software completion
507 without minding trap shadows. */
508 alpha_tp = ALPHA_TP_PROG;
509 }
510
511 if (TARGET_FLOAT_VAX)
512 {
513 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
514 {
515 warning (0, "rounding mode not supported for VAX floats");
516 alpha_fprm = ALPHA_FPRM_NORM;
517 }
518 if (alpha_fptm == ALPHA_FPTM_SUI)
519 {
520 warning (0, "trap mode not supported for VAX floats");
521 alpha_fptm = ALPHA_FPTM_SU;
522 }
523 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
524 warning (0, "128-bit long double not supported for VAX floats");
525 target_flags &= ~MASK_LONG_DOUBLE_128;
526 }
527
528 {
529 char *end;
530 int lat;
531
532 if (!alpha_mlat_string)
533 alpha_mlat_string = "L1";
534
535 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
536 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
537 ;
538 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
539 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
540 && alpha_mlat_string[2] == '\0')
541 {
542 static int const cache_latency[][4] =
543 {
544 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
545 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
546 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
547 };
548
549 lat = alpha_mlat_string[1] - '0';
550 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
551 {
552 warning (0, "L%d cache latency unknown for %s",
553 lat, alpha_cpu_name[alpha_tune]);
554 lat = 3;
555 }
556 else
557 lat = cache_latency[alpha_tune][lat-1];
558 }
559 else if (! strcmp (alpha_mlat_string, "main"))
560 {
561 /* Most current memories have about 370ns latency. This is
562 a reasonable guess for a fast cpu. */
563 lat = 150;
564 }
565 else
566 {
567 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
568 lat = 3;
569 }
570
571 alpha_memory_latency = lat;
572 }
573
574 /* Default the definition of "small data" to 8 bytes. */
575 if (!global_options_set.x_g_switch_value)
576 g_switch_value = 8;
577
578 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
579 if (flag_pic == 1)
580 target_flags |= MASK_SMALL_DATA;
581 else if (flag_pic == 2)
582 target_flags &= ~MASK_SMALL_DATA;
583
584 alpha_override_options_after_change ();
585
586 /* Register variables and functions with the garbage collector. */
587
588 /* Set up function hooks. */
589 init_machine_status = alpha_init_machine_status;
590
591 /* Tell the compiler when we're using VAX floating point. */
592 if (TARGET_FLOAT_VAX)
593 {
594 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
595 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
596 REAL_MODE_FORMAT (TFmode) = NULL;
597 }
598
599#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
600 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
601 target_flags |= MASK_LONG_DOUBLE_128;
602#endif
603
604}
605
606/* Implement targetm.override_options_after_change. */
607
608static void
609alpha_override_options_after_change (void)
610{
611 /* Align labels and loops for optimal branching. */
612 /* ??? Kludge these by not doing anything if we don't optimize. */
613 if (optimize > 0)
614 {
615 if (align_loops <= 0)
616 align_loops = 16;
617 if (align_jumps <= 0)
618 align_jumps = 16;
619 }
620 if (align_functions <= 0)
621 align_functions = 16;
622}
623
624/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
625
626int
627zap_mask (HOST_WIDE_INT value)
628{
629 int i;
630
631 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
632 i++, value >>= 8)
633 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
634 return 0;
635
636 return 1;
637}
638
639/* Return true if OP is valid for a particular TLS relocation.
640 We are already guaranteed that OP is a CONST. */
641
642int
643tls_symbolic_operand_1 (rtx op, int size, int unspec)
644{
645 op = XEXP (op, 0);
646
647 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
648 return 0;
649 op = XVECEXP (op, 0, 0);
650
651 if (GET_CODE (op) != SYMBOL_REF)
652 return 0;
653
654 switch (SYMBOL_REF_TLS_MODEL (op))
655 {
656 case TLS_MODEL_LOCAL_DYNAMIC:
657 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
658 case TLS_MODEL_INITIAL_EXEC:
659 return unspec == UNSPEC_TPREL && size == 64;
660 case TLS_MODEL_LOCAL_EXEC:
661 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
662 default:
663 gcc_unreachable ();
664 }
665}
666
667/* Used by aligned_memory_operand and unaligned_memory_operand to
668 resolve what reload is going to do with OP if it's a register. */
669
670rtx
671resolve_reload_operand (rtx op)
672{
673 if (reload_in_progress)
674 {
675 rtx tmp = op;
676 if (SUBREG_P (tmp))
677 tmp = SUBREG_REG (tmp);
678 if (REG_P (tmp)
679 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
680 {
681 op = reg_equiv_memory_loc (REGNO (tmp));
682 if (op == 0)
683 return 0;
684 }
685 }
686 return op;
687}
688
689/* The scalar modes supported differs from the default check-what-c-supports
690 version in that sometimes TFmode is available even when long double
691 indicates only DFmode. */
692
693static bool
694alpha_scalar_mode_supported_p (scalar_mode mode)
695{
696 switch (mode)
697 {
698 case E_QImode:
699 case E_HImode:
700 case E_SImode:
701 case E_DImode:
702 case E_TImode: /* via optabs.c */
703 return true;
704
705 case E_SFmode:
706 case E_DFmode:
707 return true;
708
709 case E_TFmode:
710 return TARGET_HAS_XFLOATING_LIBS;
711
712 default:
713 return false;
714 }
715}
716
717/* Alpha implements a couple of integer vector mode operations when
718 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
719 which allows the vectorizer to operate on e.g. move instructions,
720 or when expand_vector_operations can do something useful. */
721
722static bool
723alpha_vector_mode_supported_p (machine_mode mode)
724{
725 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
726}
727
728/* Return 1 if this function can directly return via $26. */
729
730int
731direct_return (void)
732{
733 return (TARGET_ABI_OSF
734 && reload_completed
735 && alpha_sa_size () == 0
736 && get_frame_size () == 0
737 && crtl->outgoing_args_size == 0
738 && crtl->args.pretend_args_size == 0);
739}
740
741/* Return the TLS model to use for SYMBOL. */
742
743static enum tls_model
744tls_symbolic_operand_type (rtx symbol)
745{
746 enum tls_model model;
747
748 if (GET_CODE (symbol) != SYMBOL_REF)
749 return TLS_MODEL_NONE;
750 model = SYMBOL_REF_TLS_MODEL (symbol);
751
752 /* Local-exec with a 64-bit size is the same code as initial-exec. */
753 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
754 model = TLS_MODEL_INITIAL_EXEC;
755
756 return model;
757}
758
759/* Return true if the function DECL will share the same GP as any
760 function in the current unit of translation. */
761
762static bool
763decl_has_samegp (const_tree decl)
764{
765 /* Functions that are not local can be overridden, and thus may
766 not share the same gp. */
767 if (!(*targetm.binds_local_p) (decl))
768 return false;
769
770 /* If -msmall-data is in effect, assume that there is only one GP
771 for the module, and so any local symbol has this property. We
772 need explicit relocations to be able to enforce this for symbols
773 not defined in this unit of translation, however. */
774 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
775 return true;
776
777 /* Functions that are not external are defined in this UoT. */
778 /* ??? Irritatingly, static functions not yet emitted are still
779 marked "external". Apply this to non-static functions only. */
780 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
781}
782
783/* Return true if EXP should be placed in the small data section. */
784
785static bool
786alpha_in_small_data_p (const_tree exp)
787{
788 /* We want to merge strings, so we never consider them small data. */
789 if (TREE_CODE (exp) == STRING_CST)
790 return false;
791
792 /* Functions are never in the small data area. Duh. */
793 if (TREE_CODE (exp) == FUNCTION_DECL)
794 return false;
795
796 /* COMMON symbols are never small data. */
797 if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
798 return false;
799
800 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
801 {
802 const char *section = DECL_SECTION_NAME (exp);
803 if (strcmp (section, ".sdata") == 0
804 || strcmp (section, ".sbss") == 0)
805 return true;
806 }
807 else
808 {
809 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
810
811 /* If this is an incomplete type with size 0, then we can't put it
812 in sdata because it might be too big when completed. */
813 if (size > 0 && size <= g_switch_value)
814 return true;
815 }
816
817 return false;
818}
819
820#if TARGET_ABI_OPEN_VMS
821static bool
822vms_valid_pointer_mode (scalar_int_mode mode)
823{
824 return (mode == SImode || mode == DImode);
825}
826
827static bool
828alpha_linkage_symbol_p (const char *symname)
829{
830 int symlen = strlen (symname);
831
832 if (symlen > 4)
833 return strcmp (&symname [symlen - 4], "..lk") == 0;
834
835 return false;
836}
837
838#define LINKAGE_SYMBOL_REF_P(X) \
839 ((GET_CODE (X) == SYMBOL_REF \
840 && alpha_linkage_symbol_p (XSTR (X, 0))) \
841 || (GET_CODE (X) == CONST \
842 && GET_CODE (XEXP (X, 0)) == PLUS \
843 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
844 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
845#endif
846
847/* legitimate_address_p recognizes an RTL expression that is a valid
848 memory address for an instruction. The MODE argument is the
849 machine mode for the MEM expression that wants to use this address.
850
851 For Alpha, we have either a constant address or the sum of a
852 register and a constant address, or just a register. For DImode,
853 any of those forms can be surrounded with an AND that clear the
854 low-order three bits; this is an "unaligned" access. */
855
856static bool
857alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
858{
859 /* If this is an ldq_u type address, discard the outer AND. */
860 if (mode == DImode
861 && GET_CODE (x) == AND
862 && CONST_INT_P (XEXP (x, 1))
863 && INTVAL (XEXP (x, 1)) == -8)
864 x = XEXP (x, 0);
865
866 /* Discard non-paradoxical subregs. */
867 if (SUBREG_P (x)
868 && (GET_MODE_SIZE (GET_MODE (x))
869 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
870 x = SUBREG_REG (x);
871
872 /* Unadorned general registers are valid. */
873 if (REG_P (x)
874 && (strict
875 ? STRICT_REG_OK_FOR_BASE_P (x)
876 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
877 return true;
878
879 /* Constant addresses (i.e. +/- 32k) are valid. */
880 if (CONSTANT_ADDRESS_P (x))
881 return true;
882
883#if TARGET_ABI_OPEN_VMS
884 if (LINKAGE_SYMBOL_REF_P (x))
885 return true;
886#endif
887
888 /* Register plus a small constant offset is valid. */
889 if (GET_CODE (x) == PLUS)
890 {
891 rtx ofs = XEXP (x, 1);
892 x = XEXP (x, 0);
893
894 /* Discard non-paradoxical subregs. */
895 if (SUBREG_P (x)
896 && (GET_MODE_SIZE (GET_MODE (x))
897 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
898 x = SUBREG_REG (x);
899
900 if (REG_P (x))
901 {
902 if (! strict
903 && NONSTRICT_REG_OK_FP_BASE_P (x)
904 && CONST_INT_P (ofs))
905 return true;
906 if ((strict
907 ? STRICT_REG_OK_FOR_BASE_P (x)
908 : NONSTRICT_REG_OK_FOR_BASE_P (x))
909 && CONSTANT_ADDRESS_P (ofs))
910 return true;
911 }
912 }
913
914 /* If we're managing explicit relocations, LO_SUM is valid, as are small
915 data symbols. Avoid explicit relocations of modes larger than word
916 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
917 else if (TARGET_EXPLICIT_RELOCS
918 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
919 {
920 if (small_symbolic_operand (x, Pmode))
921 return true;
922
923 if (GET_CODE (x) == LO_SUM)
924 {
925 rtx ofs = XEXP (x, 1);
926 x = XEXP (x, 0);
927
928 /* Discard non-paradoxical subregs. */
929 if (SUBREG_P (x)
930 && (GET_MODE_SIZE (GET_MODE (x))
931 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
932 x = SUBREG_REG (x);
933
934 /* Must have a valid base register. */
935 if (! (REG_P (x)
936 && (strict
937 ? STRICT_REG_OK_FOR_BASE_P (x)
938 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
939 return false;
940
941 /* The symbol must be local. */
942 if (local_symbolic_operand (ofs, Pmode)
943 || dtp32_symbolic_operand (ofs, Pmode)
944 || tp32_symbolic_operand (ofs, Pmode))
945 return true;
946 }
947 }
948
949 return false;
950}
951
952/* Build the SYMBOL_REF for __tls_get_addr. */
953
954static GTY(()) rtx tls_get_addr_libfunc;
955
956static rtx
957get_tls_get_addr (void)
958{
959 if (!tls_get_addr_libfunc)
960 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
961 return tls_get_addr_libfunc;
962}
963
964/* Try machine-dependent ways of modifying an illegitimate address
965 to be legitimate. If we find one, return the new, valid address. */
966
967static rtx
968alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
969{
970 HOST_WIDE_INT addend;
971
972 /* If the address is (plus reg const_int) and the CONST_INT is not a
973 valid offset, compute the high part of the constant and add it to
974 the register. Then our address is (plus temp low-part-const). */
975 if (GET_CODE (x) == PLUS
976 && REG_P (XEXP (x, 0))
977 && CONST_INT_P (XEXP (x, 1))
978 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
979 {
980 addend = INTVAL (XEXP (x, 1));
981 x = XEXP (x, 0);
982 goto split_addend;
983 }
984
985 /* If the address is (const (plus FOO const_int)), find the low-order
986 part of the CONST_INT. Then load FOO plus any high-order part of the
987 CONST_INT into a register. Our address is (plus reg low-part-const).
988 This is done to reduce the number of GOT entries. */
989 if (can_create_pseudo_p ()
990 && GET_CODE (x) == CONST
991 && GET_CODE (XEXP (x, 0)) == PLUS
992 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
993 {
994 addend = INTVAL (XEXP (XEXP (x, 0), 1));
995 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
996 goto split_addend;
997 }
998
999 /* If we have a (plus reg const), emit the load as in (2), then add
1000 the two registers, and finally generate (plus reg low-part-const) as
1001 our address. */
1002 if (can_create_pseudo_p ()
1003 && GET_CODE (x) == PLUS
1004 && REG_P (XEXP (x, 0))
1005 && GET_CODE (XEXP (x, 1)) == CONST
1006 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1007 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1008 {
1009 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1010 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1011 XEXP (XEXP (XEXP (x, 1), 0), 0),
1012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1013 goto split_addend;
1014 }
1015
1016 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1017 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1018 around +/- 32k offset. */
1019 if (TARGET_EXPLICIT_RELOCS
1020 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1021 && symbolic_operand (x, Pmode))
1022 {
1023 rtx r0, r16, eqv, tga, tp, dest, seq;
1024 rtx_insn *insn;
1025
1026 switch (tls_symbolic_operand_type (x))
1027 {
1028 case TLS_MODEL_NONE:
1029 break;
1030
1031 case TLS_MODEL_GLOBAL_DYNAMIC:
1032 {
1033 start_sequence ();
1034
1035 r0 = gen_rtx_REG (Pmode, 0);
1036 r16 = gen_rtx_REG (Pmode, 16);
1037 tga = get_tls_get_addr ();
1038 dest = gen_reg_rtx (Pmode);
1039 seq = GEN_INT (alpha_next_sequence_number++);
1040
1041 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1042 rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
1043 insn = emit_call_insn (val);
1044 RTL_CONST_CALL_P (insn) = 1;
1045 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1046
1047 insn = get_insns ();
1048 end_sequence ();
1049
1050 emit_libcall_block (insn, dest, r0, x);
1051 return dest;
1052 }
1053
1054 case TLS_MODEL_LOCAL_DYNAMIC:
1055 {
1056 start_sequence ();
1057
1058 r0 = gen_rtx_REG (Pmode, 0);
1059 r16 = gen_rtx_REG (Pmode, 16);
1060 tga = get_tls_get_addr ();
1061 scratch = gen_reg_rtx (Pmode);
1062 seq = GEN_INT (alpha_next_sequence_number++);
1063
1064 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1065 rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
1066 insn = emit_call_insn (val);
1067 RTL_CONST_CALL_P (insn) = 1;
1068 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1069
1070 insn = get_insns ();
1071 end_sequence ();
1072
1073 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1074 UNSPEC_TLSLDM_CALL);
1075 emit_libcall_block (insn, scratch, r0, eqv);
1076
1077 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1078 eqv = gen_rtx_CONST (Pmode, eqv);
1079
1080 if (alpha_tls_size == 64)
1081 {
1082 dest = gen_reg_rtx (Pmode);
1083 emit_insn (gen_rtx_SET (dest, eqv));
1084 emit_insn (gen_adddi3 (dest, dest, scratch));
1085 return dest;
1086 }
1087 if (alpha_tls_size == 32)
1088 {
1089 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1090 temp = gen_rtx_PLUS (Pmode, scratch, temp);
1091 scratch = gen_reg_rtx (Pmode);
1092 emit_insn (gen_rtx_SET (scratch, temp));
1093 }
1094 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1095 }
1096
1097 case TLS_MODEL_INITIAL_EXEC:
1098 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1099 eqv = gen_rtx_CONST (Pmode, eqv);
1100 tp = gen_reg_rtx (Pmode);
1101 scratch = gen_reg_rtx (Pmode);
1102 dest = gen_reg_rtx (Pmode);
1103
1104 emit_insn (gen_get_thread_pointerdi (tp));
1105 emit_insn (gen_rtx_SET (scratch, eqv));
1106 emit_insn (gen_adddi3 (dest, tp, scratch));
1107 return dest;
1108
1109 case TLS_MODEL_LOCAL_EXEC:
1110 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1111 eqv = gen_rtx_CONST (Pmode, eqv);
1112 tp = gen_reg_rtx (Pmode);
1113
1114 emit_insn (gen_get_thread_pointerdi (tp));
1115 if (alpha_tls_size == 32)
1116 {
1117 rtx temp = gen_rtx_HIGH (Pmode, eqv);
1118 temp = gen_rtx_PLUS (Pmode, tp, temp);
1119 tp = gen_reg_rtx (Pmode);
1120 emit_insn (gen_rtx_SET (tp, temp));
1121 }
1122 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1123
1124 default:
1125 gcc_unreachable ();
1126 }
1127
1128 if (local_symbolic_operand (x, Pmode))
1129 {
1130 if (small_symbolic_operand (x, Pmode))
1131 return x;
1132 else
1133 {
1134 if (can_create_pseudo_p ())
1135 scratch = gen_reg_rtx (Pmode);
1136 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1137 return gen_rtx_LO_SUM (Pmode, scratch, x);
1138 }
1139 }
1140 }
1141
1142 return NULL;
1143
1144 split_addend:
1145 {
1146 HOST_WIDE_INT low, high;
1147
1148 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1149 addend -= low;
1150 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1151 addend -= high;
1152
1153 if (addend)
1154 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1155 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1156 1, OPTAB_LIB_WIDEN);
1157 if (high)
1158 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1159 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1160 1, OPTAB_LIB_WIDEN);
1161
1162 return plus_constant (Pmode, x, low);
1163 }
1164}
1165
1166
1167/* Try machine-dependent ways of modifying an illegitimate address
1168 to be legitimate. Return X or the new, valid address. */
1169
1170static rtx
1171alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1172 machine_mode mode)
1173{
1174 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1175 return new_x ? new_x : x;
1176}
1177
1178/* Return true if ADDR has an effect that depends on the machine mode it
1179 is used for. On the Alpha this is true only for the unaligned modes.
1180 We can simplify the test since we know that the address must be valid. */
1181
1182static bool
1183alpha_mode_dependent_address_p (const_rtx addr,
1184 addr_space_t as ATTRIBUTE_UNUSED)
1185{
1186 return GET_CODE (addr) == AND;
1187}
1188
1189/* Primarily this is required for TLS symbols, but given that our move
1190 patterns *ought* to be able to handle any symbol at any time, we
1191 should never be spilling symbolic operands to the constant pool, ever. */
1192
1193static bool
1194alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1195{
1196 enum rtx_code code = GET_CODE (x);
1197 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1198}
1199
1200/* We do not allow indirect calls to be optimized into sibling calls, nor
1201 can we allow a call to a function with a different GP to be optimized
1202 into a sibcall. */
1203
1204static bool
1205alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1206{
1207 /* Can't do indirect tail calls, since we don't know if the target
1208 uses the same GP. */
1209 if (!decl)
1210 return false;
1211
1212 /* Otherwise, we can make a tail call if the target function shares
1213 the same GP. */
1214 return decl_has_samegp (decl);
1215}
1216
1217bool
1218some_small_symbolic_operand_int (rtx x)
1219{
1220 subrtx_var_iterator::array_type array;
1221 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1222 {
1223 rtx x = *iter;
1224 /* Don't re-split. */
1225 if (GET_CODE (x) == LO_SUM)
1226 iter.skip_subrtxes ();
1227 else if (small_symbolic_operand (x, Pmode))
1228 return true;
1229 }
1230 return false;
1231}
1232
1233rtx
1234split_small_symbolic_operand (rtx x)
1235{
1236 x = copy_insn (x);
1237 subrtx_ptr_iterator::array_type array;
1238 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1239 {
1240 rtx *ptr = *iter;
1241 rtx x = *ptr;
1242 /* Don't re-split. */
1243 if (GET_CODE (x) == LO_SUM)
1244 iter.skip_subrtxes ();
1245 else if (small_symbolic_operand (x, Pmode))
1246 {
1247 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1248 iter.skip_subrtxes ();
1249 }
1250 }
1251 return x;
1252}
1253
1254/* Indicate that INSN cannot be duplicated. This is true for any insn
1255 that we've marked with gpdisp relocs, since those have to stay in
1256 1-1 correspondence with one another.
1257
1258 Technically we could copy them if we could set up a mapping from one
1259 sequence number to another, across the set of insns to be duplicated.
1260 This seems overly complicated and error-prone since interblock motion
1261 from sched-ebb could move one of the pair of insns to a different block.
1262
1263 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1264 then they'll be in a different block from their ldgp. Which could lead
1265 the bb reorder code to think that it would be ok to copy just the block
1266 containing the call and branch to the block containing the ldgp. */
1267
1268static bool
1269alpha_cannot_copy_insn_p (rtx_insn *insn)
1270{
1271 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1272 return false;
1273 if (recog_memoized (insn) >= 0)
1274 return get_attr_cannot_copy (insn);
1275 else
1276 return false;
1277}
1278
1279
1280/* Try a machine-dependent way of reloading an illegitimate address
1281 operand. If we find one, push the reload and return the new rtx. */
1282
1283rtx
1284alpha_legitimize_reload_address (rtx x,
1285 machine_mode mode ATTRIBUTE_UNUSED,
1286 int opnum, int type,
1287 int ind_levels ATTRIBUTE_UNUSED)
1288{
1289 /* We must recognize output that we have already generated ourselves. */
1290 if (GET_CODE (x) == PLUS
1291 && GET_CODE (XEXP (x, 0)) == PLUS
1292 && REG_P (XEXP (XEXP (x, 0), 0))
1293 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1294 && CONST_INT_P (XEXP (x, 1)))
1295 {
1296 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1297 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1298 opnum, (enum reload_type) type);
1299 return x;
1300 }
1301
1302 /* We wish to handle large displacements off a base register by
1303 splitting the addend across an ldah and the mem insn. This
1304 cuts number of extra insns needed from 3 to 1. */
1305 if (GET_CODE (x) == PLUS
1306 && REG_P (XEXP (x, 0))
1307 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1308 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1309 && CONST_INT_P (XEXP (x, 1)))
1310 {
1311 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1312 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1313 HOST_WIDE_INT high
1314 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1315
1316 /* Check for 32-bit overflow. */
1317 if (high + low != val)
1318 return NULL_RTX;
1319
1320 /* Reload the high part into a base reg; leave the low part
1321 in the mem directly. */
1322 x = gen_rtx_PLUS (GET_MODE (x),
1323 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1324 GEN_INT (high)),
1325 GEN_INT (low));
1326
1327 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1328 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1329 opnum, (enum reload_type) type);
1330 return x;
1331 }
1332
1333 return NULL_RTX;
1334}
1335
1336/* Return the cost of moving between registers of various classes. Moving
1337 between FLOAT_REGS and anything else except float regs is expensive.
1338 In fact, we make it quite expensive because we really don't want to
1339 do these moves unless it is clearly worth it. Optimizations may
1340 reduce the impact of not being able to allocate a pseudo to a
1341 hard register. */
1342
1343static int
1344alpha_register_move_cost (machine_mode /*mode*/,
1345 reg_class_t from, reg_class_t to)
1346{
1347 if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
1348 return 2;
1349
1350 if (TARGET_FIX)
1351 return (from == FLOAT_REGS) ? 6 : 8;
1352
1353 return 4 + 2 * alpha_memory_latency;
1354}
1355
1356/* Return the cost of moving data of MODE from a register to
1357 or from memory. On the Alpha, bump this up a bit. */
1358
1359static int
1360alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
1361 bool /*in*/)
1362{
1363 return 2 * alpha_memory_latency;
1364}
1365
1366/* Compute a (partial) cost for rtx X. Return true if the complete
1367 cost has been computed, and false if subexpressions should be
1368 scanned. In either case, *TOTAL contains the cost result. */
1369
1370static bool
1371alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
1372 bool speed)
1373{
1374 int code = GET_CODE (x);
1375 bool float_mode_p = FLOAT_MODE_P (mode);
1376 const struct alpha_rtx_cost_data *cost_data;
1377
1378 if (!speed)
1379 cost_data = &alpha_rtx_cost_size;
1380 else
1381 cost_data = &alpha_rtx_cost_data[alpha_tune];
1382
1383 switch (code)
1384 {
1385 case CONST_INT:
1386 /* If this is an 8-bit constant, return zero since it can be used
1387 nearly anywhere with no cost. If it is a valid operand for an
1388 ADD or AND, likewise return 0 if we know it will be used in that
1389 context. Otherwise, return 2 since it might be used there later.
1390 All other constants take at least two insns. */
1391 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1392 {
1393 *total = 0;
1394 return true;
1395 }
1396 /* FALLTHRU */
1397
1398 case CONST_DOUBLE:
1399 case CONST_WIDE_INT:
1400 if (x == CONST0_RTX (mode))
1401 *total = 0;
1402 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1403 || (outer_code == AND && and_operand (x, VOIDmode)))
1404 *total = 0;
1405 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1406 *total = 2;
1407 else
1408 *total = COSTS_N_INSNS (2);
1409 return true;
1410
1411 case CONST:
1412 case SYMBOL_REF:
1413 case LABEL_REF:
1414 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1415 *total = COSTS_N_INSNS (outer_code != MEM);
1416 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1417 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1418 else if (tls_symbolic_operand_type (x))
1419 /* Estimate of cost for call_pal rduniq. */
1420 /* ??? How many insns do we emit here? More than one... */
1421 *total = COSTS_N_INSNS (15);
1422 else
1423 /* Otherwise we do a load from the GOT. */
1424 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1425 return true;
1426
1427 case HIGH:
1428 /* This is effectively an add_operand. */
1429 *total = 2;
1430 return true;
1431
1432 case PLUS:
1433 case MINUS:
1434 if (float_mode_p)
1435 *total = cost_data->fp_add;
1436 else if (GET_CODE (XEXP (x, 0)) == MULT
1437 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1438 {
1439 *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
1440 (enum rtx_code) outer_code, opno, speed)
1441 + rtx_cost (XEXP (x, 1), mode,
1442 (enum rtx_code) outer_code, opno, speed)
1443 + COSTS_N_INSNS (1));
1444 return true;
1445 }
1446 return false;
1447
1448 case MULT:
1449 if (float_mode_p)
1450 *total = cost_data->fp_mult;
1451 else if (mode == DImode)
1452 *total = cost_data->int_mult_di;
1453 else
1454 *total = cost_data->int_mult_si;
1455 return false;
1456
1457 case ASHIFT:
1458 if (CONST_INT_P (XEXP (x, 1))
1459 && INTVAL (XEXP (x, 1)) <= 3)
1460 {
1461 *total = COSTS_N_INSNS (1);
1462 return false;
1463 }
1464 /* FALLTHRU */
1465
1466 case ASHIFTRT:
1467 case LSHIFTRT:
1468 *total = cost_data->int_shift;
1469 return false;
1470
1471 case IF_THEN_ELSE:
1472 if (float_mode_p)
1473 *total = cost_data->fp_add;
1474 else
1475 *total = cost_data->int_cmov;
1476 return false;
1477
1478 case DIV:
1479 case UDIV:
1480 case MOD:
1481 case UMOD:
1482 if (!float_mode_p)
1483 *total = cost_data->int_div;
1484 else if (mode == SFmode)
1485 *total = cost_data->fp_div_sf;
1486 else
1487 *total = cost_data->fp_div_df;
1488 return false;
1489
1490 case MEM:
1491 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1492 return true;
1493
1494 case NEG:
1495 if (! float_mode_p)
1496 {
1497 *total = COSTS_N_INSNS (1);
1498 return false;
1499 }
1500 /* FALLTHRU */
1501
1502 case ABS:
1503 if (! float_mode_p)
1504 {
1505 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1506 return false;
1507 }
1508 /* FALLTHRU */
1509
1510 case FLOAT:
1511 case UNSIGNED_FLOAT:
1512 case FIX:
1513 case UNSIGNED_FIX:
1514 case FLOAT_TRUNCATE:
1515 *total = cost_data->fp_add;
1516 return false;
1517
1518 case FLOAT_EXTEND:
1519 if (MEM_P (XEXP (x, 0)))
1520 *total = 0;
1521 else
1522 *total = cost_data->fp_add;
1523 return false;
1524
1525 default:
1526 return false;
1527 }
1528}
1529
1530/* REF is an alignable memory location. Place an aligned SImode
1531 reference into *PALIGNED_MEM and the number of bits to shift into
1532 *PBITNUM. SCRATCH is a free register for use in reloading out
1533 of range stack slots. */
1534
1535void
1536get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1537{
1538 rtx base;
1539 HOST_WIDE_INT disp, offset;
1540
1541 gcc_assert (MEM_P (ref));
1542
1543 if (reload_in_progress)
1544 {
1545 base = find_replacement (&XEXP (ref, 0));
1546 gcc_assert (memory_address_p (GET_MODE (ref), base));
1547 }
1548 else
1549 base = XEXP (ref, 0);
1550
1551 if (GET_CODE (base) == PLUS)
1552 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1553 else
1554 disp = 0;
1555
1556 /* Find the byte offset within an aligned word. If the memory itself is
1557 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1558 will have examined the base register and determined it is aligned, and
1559 thus displacements from it are naturally alignable. */
1560 if (MEM_ALIGN (ref) >= 32)
1561 offset = 0;
1562 else
1563 offset = disp & 3;
1564
1565 /* The location should not cross aligned word boundary. */
1566 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1567 <= GET_MODE_SIZE (SImode));
1568
1569 /* Access the entire aligned word. */
1570 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1571
1572 /* Convert the byte offset within the word to a bit offset. */
1573 offset *= BITS_PER_UNIT;
1574 *pbitnum = GEN_INT (offset);
1575}
1576
1577/* Similar, but just get the address. Handle the two reload cases.
1578 Add EXTRA_OFFSET to the address we return. */
1579
1580rtx
1581get_unaligned_address (rtx ref)
1582{
1583 rtx base;
1584 HOST_WIDE_INT offset = 0;
1585
1586 gcc_assert (MEM_P (ref));
1587
1588 if (reload_in_progress)
1589 {
1590 base = find_replacement (&XEXP (ref, 0));
1591 gcc_assert (memory_address_p (GET_MODE (ref), base));
1592 }
1593 else
1594 base = XEXP (ref, 0);
1595
1596 if (GET_CODE (base) == PLUS)
1597 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1598
1599 return plus_constant (Pmode, base, offset);
1600}
1601
1602/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1603 X is always returned in a register. */
1604
1605rtx
1606get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1607{
1608 if (GET_CODE (addr) == PLUS)
1609 {
1610 ofs += INTVAL (XEXP (addr, 1));
1611 addr = XEXP (addr, 0);
1612 }
1613
1614 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1615 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1616}
1617
1618/* On the Alpha, all (non-symbolic) constants except zero go into
1619 a floating-point register via memory. Note that we cannot
1620 return anything that is not a subset of RCLASS, and that some
1621 symbolic constants cannot be dropped to memory. */
1622
1623enum reg_class
1624alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1625{
1626 /* Zero is present in any register class. */
1627 if (x == CONST0_RTX (GET_MODE (x)))
1628 return rclass;
1629
1630 /* These sorts of constants we can easily drop to memory. */
1631 if (CONST_SCALAR_INT_P (x)
1632 || CONST_DOUBLE_P (x)
1633 || GET_CODE (x) == CONST_VECTOR)
1634 {
1635 if (rclass == FLOAT_REGS)
1636 return NO_REGS;
1637 if (rclass == ALL_REGS)
1638 return GENERAL_REGS;
1639 return rclass;
1640 }
1641
1642 /* All other kinds of constants should not (and in the case of HIGH
1643 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1644 secondary reload. */
1645 if (CONSTANT_P (x))
1646 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1647
1648 return rclass;
1649}
1650
1651/* Inform reload about cases where moving X with a mode MODE to a register in
1652 RCLASS requires an extra scratch or immediate register. Return the class
1653 needed for the immediate register. */
1654
1655static reg_class_t
1656alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1657 machine_mode mode, secondary_reload_info *sri)
1658{
1659 enum reg_class rclass = (enum reg_class) rclass_i;
1660
1661 /* Loading and storing HImode or QImode values to and from memory
1662 usually requires a scratch register. */
1663 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1664 {
1665 if (any_memory_operand (x, mode))
1666 {
1667 if (in_p)
1668 {
1669 if (!aligned_memory_operand (x, mode))
1670 sri->icode = direct_optab_handler (reload_in_optab, mode);
1671 }
1672 else
1673 sri->icode = direct_optab_handler (reload_out_optab, mode);
1674 return NO_REGS;
1675 }
1676 }
1677
1678 /* We also cannot do integral arithmetic into FP regs, as might result
1679 from register elimination into a DImode fp register. */
1680 if (rclass == FLOAT_REGS)
1681 {
1682 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1683 return GENERAL_REGS;
1684 if (in_p && INTEGRAL_MODE_P (mode)
1685 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1686 return GENERAL_REGS;
1687 }
1688
1689 return NO_REGS;
1690}
1691
1692/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
1693
1694 If we are copying between general and FP registers, we need a memory
1695 location unless the FIX extension is available. */
1696
1697static bool
1698alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
1699 reg_class_t class2)
1700{
1701 return (!TARGET_FIX
1702 && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
1703 || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
1704}
1705
1706/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is
1707 floating-point, use it. Otherwise, widen to a word like the default.
1708 This is needed because we always store integers in FP registers in
1709 quadword format. This whole area is very tricky! */
1710
1711static machine_mode
1712alpha_secondary_memory_needed_mode (machine_mode mode)
1713{
1714 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1715 return mode;
1716 if (GET_MODE_SIZE (mode) >= 4)
1717 return mode;
1718 return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
1719}
1720
1721/* Given SEQ, which is an INSN list, look for any MEMs in either
1722 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1723 volatile flags from REF into each of the MEMs found. If REF is not
1724 a MEM, don't do anything. */
1725
1726void
1727alpha_set_memflags (rtx seq, rtx ref)
1728{
1729 rtx_insn *insn;
1730
1731 if (!MEM_P (ref))
1732 return;
1733
1734 /* This is only called from alpha.md, after having had something
1735 generated from one of the insn patterns. So if everything is
1736 zero, the pattern is already up-to-date. */
1737 if (!MEM_VOLATILE_P (ref)
1738 && !MEM_NOTRAP_P (ref)
1739 && !MEM_READONLY_P (ref))
1740 return;
1741
1742 subrtx_var_iterator::array_type array;
1743 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1744 if (INSN_P (insn))
1745 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1746 {
1747 rtx x = *iter;
1748 if (MEM_P (x))
1749 {
1750 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1751 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1752 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1753 /* Sadly, we cannot use alias sets because the extra
1754 aliasing produced by the AND interferes. Given that
1755 two-byte quantities are the only thing we would be
1756 able to differentiate anyway, there does not seem to
1757 be any point in convoluting the early out of the
1758 alias check. */
1759 iter.skip_subrtxes ();
1760 }
1761 }
1762 else
1763 gcc_unreachable ();
1764}
1765
1766static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1767 int, bool);
1768
1769/* Internal routine for alpha_emit_set_const to check for N or below insns.
1770 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1771 and return pc_rtx if successful. */
1772
1773static rtx
1774alpha_emit_set_const_1 (rtx target, machine_mode mode,
1775 HOST_WIDE_INT c, int n, bool no_output)
1776{
1777 HOST_WIDE_INT new_const;
1778 int i, bits;
1779 /* Use a pseudo if highly optimizing and still generating RTL. */
1780 rtx subtarget
1781 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1782 rtx temp, insn;
1783
1784 /* If this is a sign-extended 32-bit constant, we can do this in at most
1785 three insns, so do it if we have enough insns left. */
1786
1787 if (c >> 31 == -1 || c >> 31 == 0)
1788 {
1789 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1790 HOST_WIDE_INT tmp1 = c - low;
1791 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1792 HOST_WIDE_INT extra = 0;
1793
1794 /* If HIGH will be interpreted as negative but the constant is
1795 positive, we must adjust it to do two ldha insns. */
1796
1797 if ((high & 0x8000) != 0 && c >= 0)
1798 {
1799 extra = 0x4000;
1800 tmp1 -= 0x40000000;
1801 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1802 }
1803
1804 if (c == low || (low == 0 && extra == 0))
1805 {
1806 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1807 but that meant that we can't handle INT_MIN on 32-bit machines
1808 (like NT/Alpha), because we recurse indefinitely through
1809 emit_move_insn to gen_movdi. So instead, since we know exactly
1810 what we want, create it explicitly. */
1811
1812 if (no_output)
1813 return pc_rtx;
1814 if (target == NULL)
1815 target = gen_reg_rtx (mode);
1816 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1817 return target;
1818 }
1819 else if (n >= 2 + (extra != 0))
1820 {
1821 if (no_output)
1822 return pc_rtx;
1823 if (!can_create_pseudo_p ())
1824 {
1825 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1826 temp = target;
1827 }
1828 else
1829 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1830 subtarget, mode);
1831
1832 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1833 This means that if we go through expand_binop, we'll try to
1834 generate extensions, etc, which will require new pseudos, which
1835 will fail during some split phases. The SImode add patterns
1836 still exist, but are not named. So build the insns by hand. */
1837
1838 if (extra != 0)
1839 {
1840 if (! subtarget)
1841 subtarget = gen_reg_rtx (mode);
1842 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1843 insn = gen_rtx_SET (subtarget, insn);
1844 emit_insn (insn);
1845 temp = subtarget;
1846 }
1847
1848 if (target == NULL)
1849 target = gen_reg_rtx (mode);
1850 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1851 insn = gen_rtx_SET (target, insn);
1852 emit_insn (insn);
1853 return target;
1854 }
1855 }
1856
1857 /* If we couldn't do it that way, try some other methods. But if we have
1858 no instructions left, don't bother. Likewise, if this is SImode and
1859 we can't make pseudos, we can't do anything since the expand_binop
1860 and expand_unop calls will widen and try to make pseudos. */
1861
1862 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1863 return 0;
1864
1865 /* Next, see if we can load a related constant and then shift and possibly
1866 negate it to get the constant we want. Try this once each increasing
1867 numbers of insns. */
1868
1869 for (i = 1; i < n; i++)
1870 {
1871 /* First, see if minus some low bits, we've an easy load of
1872 high bits. */
1873
1874 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1875 if (new_const != 0)
1876 {
1877 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1878 if (temp)
1879 {
1880 if (no_output)
1881 return temp;
1882 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1883 target, 0, OPTAB_WIDEN);
1884 }
1885 }
1886
1887 /* Next try complementing. */
1888 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1889 if (temp)
1890 {
1891 if (no_output)
1892 return temp;
1893 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1894 }
1895
1896 /* Next try to form a constant and do a left shift. We can do this
1897 if some low-order bits are zero; the exact_log2 call below tells
1898 us that information. The bits we are shifting out could be any
1899 value, but here we'll just try the 0- and sign-extended forms of
1900 the constant. To try to increase the chance of having the same
1901 constant in more than one insn, start at the highest number of
1902 bits to shift, but try all possibilities in case a ZAPNOT will
1903 be useful. */
1904
1905 bits = exact_log2 (c & -c);
1906 if (bits > 0)
1907 for (; bits > 0; bits--)
1908 {
1909 new_const = c >> bits;
1910 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1911 if (!temp && c < 0)
1912 {
1913 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1914 temp = alpha_emit_set_const (subtarget, mode, new_const,
1915 i, no_output);
1916 }
1917 if (temp)
1918 {
1919 if (no_output)
1920 return temp;
1921 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1922 target, 0, OPTAB_WIDEN);
1923 }
1924 }
1925
1926 /* Now try high-order zero bits. Here we try the shifted-in bits as
1927 all zero and all ones. Be careful to avoid shifting outside the
1928 mode and to avoid shifting outside the host wide int size. */
1929
1930 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1931 - floor_log2 (c) - 1);
1932 if (bits > 0)
1933 for (; bits > 0; bits--)
1934 {
1935 new_const = c << bits;
1936 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1937 if (!temp)
1938 {
1939 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1940 temp = alpha_emit_set_const (subtarget, mode, new_const,
1941 i, no_output);
1942 }
1943 if (temp)
1944 {
1945 if (no_output)
1946 return temp;
1947 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1948 target, 1, OPTAB_WIDEN);
1949 }
1950 }
1951
1952 /* Now try high-order 1 bits. We get that with a sign-extension.
1953 But one bit isn't enough here. Be careful to avoid shifting outside
1954 the mode and to avoid shifting outside the host wide int size. */
1955
1956 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1957 - floor_log2 (~ c) - 2);
1958 if (bits > 0)
1959 for (; bits > 0; bits--)
1960 {
1961 new_const = c << bits;
1962 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1963 if (!temp)
1964 {
1965 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1966 temp = alpha_emit_set_const (subtarget, mode, new_const,
1967 i, no_output);
1968 }
1969 if (temp)
1970 {
1971 if (no_output)
1972 return temp;
1973 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1974 target, 0, OPTAB_WIDEN);
1975 }
1976 }
1977 }
1978
1979 /* Finally, see if can load a value into the target that is the same as the
1980 constant except that all bytes that are 0 are changed to be 0xff. If we
1981 can, then we can do a ZAPNOT to obtain the desired constant. */
1982
1983 new_const = c;
1984 for (i = 0; i < 64; i += 8)
1985 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1986 new_const |= (HOST_WIDE_INT) 0xff << i;
1987
1988 /* We are only called for SImode and DImode. If this is SImode, ensure that
1989 we are sign extended to a full word. */
1990
1991 if (mode == SImode)
1992 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1993
1994 if (new_const != c)
1995 {
1996 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1997 if (temp)
1998 {
1999 if (no_output)
2000 return temp;
2001 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
2002 target, 0, OPTAB_WIDEN);
2003 }
2004 }
2005
2006 return 0;
2007}
2008
2009/* Try to output insns to set TARGET equal to the constant C if it can be
2010 done in less than N insns. Do all computations in MODE. Returns the place
2011 where the output has been placed if it can be done and the insns have been
2012 emitted. If it would take more than N insns, zero is returned and no
2013 insns and emitted. */
2014
2015static rtx
2016alpha_emit_set_const (rtx target, machine_mode mode,
2017 HOST_WIDE_INT c, int n, bool no_output)
2018{
2019 machine_mode orig_mode = mode;
2020 rtx orig_target = target;
2021 rtx result = 0;
2022 int i;
2023
2024 /* If we can't make any pseudos, TARGET is an SImode hard register, we
2025 can't load this constant in one insn, do this in DImode. */
2026 if (!can_create_pseudo_p () && mode == SImode
2027 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2028 {
2029 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2030 if (result)
2031 return result;
2032
2033 target = no_output ? NULL : gen_lowpart (DImode, target);
2034 mode = DImode;
2035 }
2036 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2037 {
2038 target = no_output ? NULL : gen_lowpart (DImode, target);
2039 mode = DImode;
2040 }
2041
2042 /* Try 1 insn, then 2, then up to N. */
2043 for (i = 1; i <= n; i++)
2044 {
2045 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2046 if (result)
2047 {
2048 rtx_insn *insn;
2049 rtx set;
2050
2051 if (no_output)
2052 return result;
2053
2054 insn = get_last_insn ();
2055 set = single_set (insn);
2056 if (! CONSTANT_P (SET_SRC (set)))
2057 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2058 break;
2059 }
2060 }
2061
2062 /* Allow for the case where we changed the mode of TARGET. */
2063 if (result)
2064 {
2065 if (result == target)
2066 result = orig_target;
2067 else if (mode != orig_mode)
2068 result = gen_lowpart (orig_mode, result);
2069 }
2070
2071 return result;
2072}
2073
2074/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2075 fall back to a straight forward decomposition. We do this to avoid
2076 exponential run times encountered when looking for longer sequences
2077 with alpha_emit_set_const. */
2078
2079static rtx
2080alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2081{
2082 HOST_WIDE_INT d1, d2, d3, d4;
2083
2084 /* Decompose the entire word */
2085
2086 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2087 c1 -= d1;
2088 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2089 c1 = (c1 - d2) >> 32;
2090 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2091 c1 -= d3;
2092 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2093 gcc_assert (c1 == d4);
2094
2095 /* Construct the high word */
2096 if (d4)
2097 {
2098 emit_move_insn (target, GEN_INT (d4));
2099 if (d3)
2100 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2101 }
2102 else
2103 emit_move_insn (target, GEN_INT (d3));
2104
2105 /* Shift it into place */
2106 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2107
2108 /* Add in the low bits. */
2109 if (d2)
2110 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2111 if (d1)
2112 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2113
2114 return target;
2115}
2116
2117/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2118
2119static HOST_WIDE_INT
2120alpha_extract_integer (rtx x)
2121{
2122 if (GET_CODE (x) == CONST_VECTOR)
2123 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2124
2125 gcc_assert (CONST_INT_P (x));
2126
2127 return INTVAL (x);
2128}
2129
2130/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2131 we are willing to load the value into a register via a move pattern.
2132 Normally this is all symbolic constants, integral constants that
2133 take three or fewer instructions, and floating-point zero. */
2134
2135bool
2136alpha_legitimate_constant_p (machine_mode mode, rtx x)
2137{
2138 HOST_WIDE_INT i0;
2139
2140 switch (GET_CODE (x))
2141 {
2142 case LABEL_REF:
2143 case HIGH:
2144 return true;
2145
2146 case CONST:
2147 if (GET_CODE (XEXP (x, 0)) == PLUS
2148 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2149 x = XEXP (XEXP (x, 0), 0);
2150 else
2151 return true;
2152
2153 if (GET_CODE (x) != SYMBOL_REF)
2154 return true;
2155 /* FALLTHRU */
2156
2157 case SYMBOL_REF:
2158 /* TLS symbols are never valid. */
2159 return SYMBOL_REF_TLS_MODEL (x) == 0;
2160
2161 case CONST_WIDE_INT:
2162 if (TARGET_BUILD_CONSTANTS)
2163 return true;
2164 if (x == CONST0_RTX (mode))
2165 return true;
2166 mode = DImode;
2167 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2168 i0 = CONST_WIDE_INT_ELT (x, 1);
2169 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2170 return false;
2171 i0 = CONST_WIDE_INT_ELT (x, 0);
2172 goto do_integer;
2173
2174 case CONST_DOUBLE:
2175 if (x == CONST0_RTX (mode))
2176 return true;
2177 return false;
2178
2179 case CONST_VECTOR:
2180 if (x == CONST0_RTX (mode))
2181 return true;
2182 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2183 return false;
2184 if (GET_MODE_SIZE (mode) != 8)
2185 return false;
2186 /* FALLTHRU */
2187
2188 case CONST_INT:
2189 if (TARGET_BUILD_CONSTANTS)
2190 return true;
2191 i0 = alpha_extract_integer (x);
2192 do_integer:
2193 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2194
2195 default:
2196 return false;
2197 }
2198}
2199
2200/* Operand 1 is known to be a constant, and should require more than one
2201 instruction to load. Emit that multi-part load. */
2202
2203bool
2204alpha_split_const_mov (machine_mode mode, rtx *operands)
2205{
2206 HOST_WIDE_INT i0;
2207 rtx temp = NULL_RTX;
2208
2209 i0 = alpha_extract_integer (operands[1]);
2210
2211 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2212
2213 if (!temp && TARGET_BUILD_CONSTANTS)
2214 temp = alpha_emit_set_long_const (operands[0], i0);
2215
2216 if (temp)
2217 {
2218 if (!rtx_equal_p (operands[0], temp))
2219 emit_move_insn (operands[0], temp);
2220 return true;
2221 }
2222
2223 return false;
2224}
2225
2226/* Expand a move instruction; return true if all work is done.
2227 We don't handle non-bwx subword loads here. */
2228
2229bool
2230alpha_expand_mov (machine_mode mode, rtx *operands)
2231{
2232 rtx tmp;
2233
2234 /* If the output is not a register, the input must be. */
2235 if (MEM_P (operands[0])
2236 && ! reg_or_0_operand (operands[1], mode))
2237 operands[1] = force_reg (mode, operands[1]);
2238
2239 /* Allow legitimize_address to perform some simplifications. */
2240 if (mode == Pmode && symbolic_operand (operands[1], mode))
2241 {
2242 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2243 if (tmp)
2244 {
2245 if (tmp == operands[0])
2246 return true;
2247 operands[1] = tmp;
2248 return false;
2249 }
2250 }
2251
2252 /* Early out for non-constants and valid constants. */
2253 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2254 return false;
2255
2256 /* Split large integers. */
2257 if (CONST_INT_P (operands[1])
2258 || GET_CODE (operands[1]) == CONST_VECTOR)
2259 {
2260 if (alpha_split_const_mov (mode, operands))
2261 return true;
2262 }
2263
2264 /* Otherwise we've nothing left but to drop the thing to memory. */
2265 tmp = force_const_mem (mode, operands[1]);
2266
2267 if (tmp == NULL_RTX)
2268 return false;
2269
2270 if (reload_in_progress)
2271 {
2272 emit_move_insn (operands[0], XEXP (tmp, 0));
2273 operands[1] = replace_equiv_address (tmp, operands[0]);
2274 }
2275 else
2276 operands[1] = validize_mem (tmp);
2277 return false;
2278}
2279
2280/* Expand a non-bwx QImode or HImode move instruction;
2281 return true if all work is done. */
2282
2283bool
2284alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2285{
2286 rtx seq;
2287
2288 /* If the output is not a register, the input must be. */
2289 if (MEM_P (operands[0]))
2290 operands[1] = force_reg (mode, operands[1]);
2291
2292 /* Handle four memory cases, unaligned and aligned for either the input
2293 or the output. The only case where we can be called during reload is
2294 for aligned loads; all other cases require temporaries. */
2295
2296 if (any_memory_operand (operands[1], mode))
2297 {
2298 if (aligned_memory_operand (operands[1], mode))
2299 {
2300 if (reload_in_progress)
2301 {
2302 if (mode == QImode)
2303 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2304 else
2305 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2306 emit_insn (seq);
2307 }
2308 else
2309 {
2310 rtx aligned_mem, bitnum;
2311 rtx scratch = gen_reg_rtx (SImode);
2312 rtx subtarget;
2313 bool copyout;
2314
2315 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2316
2317 subtarget = operands[0];
2318 if (REG_P (subtarget))
2319 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2320 else
2321 subtarget = gen_reg_rtx (DImode), copyout = true;
2322
2323 if (mode == QImode)
2324 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2325 bitnum, scratch);
2326 else
2327 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2328 bitnum, scratch);
2329 emit_insn (seq);
2330
2331 if (copyout)
2332 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2333 }
2334 }
2335 else
2336 {
2337 /* Don't pass these as parameters since that makes the generated
2338 code depend on parameter evaluation order which will cause
2339 bootstrap failures. */
2340
2341 rtx temp1, temp2, subtarget, ua;
2342 bool copyout;
2343
2344 temp1 = gen_reg_rtx (DImode);
2345 temp2 = gen_reg_rtx (DImode);
2346
2347 subtarget = operands[0];
2348 if (REG_P (subtarget))
2349 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2350 else
2351 subtarget = gen_reg_rtx (DImode), copyout = true;
2352
2353 ua = get_unaligned_address (operands[1]);
2354 if (mode == QImode)
2355 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2356 else
2357 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2358
2359 alpha_set_memflags (seq, operands[1]);
2360 emit_insn (seq);
2361
2362 if (copyout)
2363 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2364 }
2365 return true;
2366 }
2367
2368 if (any_memory_operand (operands[0], mode))
2369 {
2370 if (aligned_memory_operand (operands[0], mode))
2371 {
2372 rtx aligned_mem, bitnum;
2373 rtx temp1 = gen_reg_rtx (SImode);
2374 rtx temp2 = gen_reg_rtx (SImode);
2375
2376 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2377
2378 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2379 temp1, temp2));
2380 }
2381 else
2382 {
2383 rtx temp1 = gen_reg_rtx (DImode);
2384 rtx temp2 = gen_reg_rtx (DImode);
2385 rtx temp3 = gen_reg_rtx (DImode);
2386 rtx ua = get_unaligned_address (operands[0]);
2387
2388 if (mode == QImode)
2389 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2390 else
2391 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2392
2393 alpha_set_memflags (seq, operands[0]);
2394 emit_insn (seq);
2395 }
2396 return true;
2397 }
2398
2399 return false;
2400}
2401
2402/* Implement the movmisalign patterns. One of the operands is a memory
2403 that is not naturally aligned. Emit instructions to load it. */
2404
2405void
2406alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2407{
2408 /* Honor misaligned loads, for those we promised to do so. */
2409 if (MEM_P (operands[1]))
2410 {
2411 rtx tmp;
2412
2413 if (register_operand (operands[0], mode))
2414 tmp = operands[0];
2415 else
2416 tmp = gen_reg_rtx (mode);
2417
2418 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2419 if (tmp != operands[0])
2420 emit_move_insn (operands[0], tmp);
2421 }
2422 else if (MEM_P (operands[0]))
2423 {
2424 if (!reg_or_0_operand (operands[1], mode))
2425 operands[1] = force_reg (mode, operands[1]);
2426 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2427 }
2428 else
2429 gcc_unreachable ();
2430}
2431
2432/* Generate an unsigned DImode to FP conversion. This is the same code
2433 optabs would emit if we didn't have TFmode patterns.
2434
2435 For SFmode, this is the only construction I've found that can pass
2436 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2437 intermediates will work, because you'll get intermediate rounding
2438 that ruins the end result. Some of this could be fixed by turning
2439 on round-to-positive-infinity, but that requires diddling the fpsr,
2440 which kills performance. I tried turning this around and converting
2441 to a negative number, so that I could turn on /m, but either I did
2442 it wrong or there's something else cause I wound up with the exact
2443 same single-bit error. There is a branch-less form of this same code:
2444
2445 srl $16,1,$1
2446 and $16,1,$2
2447 cmplt $16,0,$3
2448 or $1,$2,$2
2449 cmovge $16,$16,$2
2450 itoft $3,$f10
2451 itoft $2,$f11
2452 cvtqs $f11,$f11
2453 adds $f11,$f11,$f0
2454 fcmoveq $f10,$f11,$f0
2455
2456 I'm not using it because it's the same number of instructions as
2457 this branch-full form, and it has more serialized long latency
2458 instructions on the critical path.
2459
2460 For DFmode, we can avoid rounding errors by breaking up the word
2461 into two pieces, converting them separately, and adding them back:
2462
2463 LC0: .long 0,0x5f800000
2464
2465 itoft $16,$f11
2466 lda $2,LC0
2467 cmplt $16,0,$1
2468 cpyse $f11,$f31,$f10
2469 cpyse $f31,$f11,$f11
2470 s4addq $1,$2,$1
2471 lds $f12,0($1)
2472 cvtqt $f10,$f10
2473 cvtqt $f11,$f11
2474 addt $f12,$f10,$f0
2475 addt $f0,$f11,$f0
2476
2477 This doesn't seem to be a clear-cut win over the optabs form.
2478 It probably all depends on the distribution of numbers being
2479 converted -- in the optabs form, all but high-bit-set has a
2480 much lower minimum execution time. */
2481
2482void
2483alpha_emit_floatuns (rtx operands[2])
2484{
2485 rtx neglab, donelab, i0, i1, f0, in, out;
2486 machine_mode mode;
2487
2488 out = operands[0];
2489 in = force_reg (DImode, operands[1]);
2490 mode = GET_MODE (out);
2491 neglab = gen_label_rtx ();
2492 donelab = gen_label_rtx ();
2493 i0 = gen_reg_rtx (DImode);
2494 i1 = gen_reg_rtx (DImode);
2495 f0 = gen_reg_rtx (mode);
2496
2497 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2498
2499 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2500 emit_jump_insn (gen_jump (donelab));
2501 emit_barrier ();
2502
2503 emit_label (neglab);
2504
2505 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2506 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2507 emit_insn (gen_iordi3 (i0, i0, i1));
2508 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2509 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2510
2511 emit_label (donelab);
2512}
2513
2514/* Generate the comparison for a conditional branch. */
2515
2516void
2517alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2518{
2519 enum rtx_code cmp_code, branch_code;
2520 machine_mode branch_mode = VOIDmode;
2521 enum rtx_code code = GET_CODE (operands[0]);
2522 rtx op0 = operands[1], op1 = operands[2];
2523 rtx tem;
2524
2525 if (cmp_mode == TFmode)
2526 {
2527 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2528 op1 = const0_rtx;
2529 cmp_mode = DImode;
2530 }
2531
2532 /* The general case: fold the comparison code to the types of compares
2533 that we have, choosing the branch as necessary. */
2534 switch (code)
2535 {
2536 case EQ: case LE: case LT: case LEU: case LTU:
2537 case UNORDERED:
2538 /* We have these compares. */
2539 cmp_code = code, branch_code = NE;
2540 break;
2541
2542 case NE:
2543 case ORDERED:
2544 /* These must be reversed. */
2545 cmp_code = reverse_condition (code), branch_code = EQ;
2546 break;
2547
2548 case GE: case GT: case GEU: case GTU:
2549 /* For FP, we swap them, for INT, we reverse them. */
2550 if (cmp_mode == DFmode)
2551 {
2552 cmp_code = swap_condition (code);
2553 branch_code = NE;
2554 std::swap (op0, op1);
2555 }
2556 else
2557 {
2558 cmp_code = reverse_condition (code);
2559 branch_code = EQ;
2560 }
2561 break;
2562
2563 default:
2564 gcc_unreachable ();
2565 }
2566
2567 if (cmp_mode == DFmode)
2568 {
2569 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2570 {
2571 /* When we are not as concerned about non-finite values, and we
2572 are comparing against zero, we can branch directly. */
2573 if (op1 == CONST0_RTX (DFmode))
2574 cmp_code = UNKNOWN, branch_code = code;
2575 else if (op0 == CONST0_RTX (DFmode))
2576 {
2577 /* Undo the swap we probably did just above. */
2578 std::swap (op0, op1);
2579 branch_code = swap_condition (cmp_code);
2580 cmp_code = UNKNOWN;
2581 }
2582 }
2583 else
2584 {
2585 /* ??? We mark the branch mode to be CCmode to prevent the
2586 compare and branch from being combined, since the compare
2587 insn follows IEEE rules that the branch does not. */
2588 branch_mode = CCmode;
2589 }
2590 }
2591 else
2592 {
2593 /* The following optimizations are only for signed compares. */
2594 if (code != LEU && code != LTU && code != GEU && code != GTU)
2595 {
2596 /* Whee. Compare and branch against 0 directly. */
2597 if (op1 == const0_rtx)
2598 cmp_code = UNKNOWN, branch_code = code;
2599
2600 /* If the constants doesn't fit into an immediate, but can
2601 be generated by lda/ldah, we adjust the argument and
2602 compare against zero, so we can use beq/bne directly. */
2603 /* ??? Don't do this when comparing against symbols, otherwise
2604 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2605 be declared false out of hand (at least for non-weak). */
2606 else if (CONST_INT_P (op1)
2607 && (code == EQ || code == NE)
2608 && !(symbolic_operand (op0, VOIDmode)
2609 || (REG_P (op0) && REG_POINTER (op0))))
2610 {
2611 rtx n_op1 = GEN_INT (-INTVAL (op1));
2612
2613 if (! satisfies_constraint_I (op1)
2614 && (satisfies_constraint_K (n_op1)
2615 || satisfies_constraint_L (n_op1)))
2616 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2617 }
2618 }
2619
2620 if (!reg_or_0_operand (op0, DImode))
2621 op0 = force_reg (DImode, op0);
2622 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2623 op1 = force_reg (DImode, op1);
2624 }
2625
2626 /* Emit an initial compare instruction, if necessary. */
2627 tem = op0;
2628 if (cmp_code != UNKNOWN)
2629 {
2630 tem = gen_reg_rtx (cmp_mode);
2631 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2632 }
2633
2634 /* Emit the branch instruction. */
2635 tem = gen_rtx_SET (pc_rtx,
2636 gen_rtx_IF_THEN_ELSE (VOIDmode,
2637 gen_rtx_fmt_ee (branch_code,
2638 branch_mode, tem,
2639 CONST0_RTX (cmp_mode)),
2640 gen_rtx_LABEL_REF (VOIDmode,
2641 operands[3]),
2642 pc_rtx));
2643 emit_jump_insn (tem);
2644}
2645
2646/* Certain simplifications can be done to make invalid setcc operations
2647 valid. Return the final comparison, or NULL if we can't work. */
2648
2649bool
2650alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2651{
2652 enum rtx_code cmp_code;
2653 enum rtx_code code = GET_CODE (operands[1]);
2654 rtx op0 = operands[2], op1 = operands[3];
2655 rtx tmp;
2656
2657 if (cmp_mode == TFmode)
2658 {
2659 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2660 op1 = const0_rtx;
2661 cmp_mode = DImode;
2662 }
2663
2664 if (cmp_mode == DFmode && !TARGET_FIX)
2665 return 0;
2666
2667 /* The general case: fold the comparison code to the types of compares
2668 that we have, choosing the branch as necessary. */
2669
2670 cmp_code = UNKNOWN;
2671 switch (code)
2672 {
2673 case EQ: case LE: case LT: case LEU: case LTU:
2674 case UNORDERED:
2675 /* We have these compares. */
2676 if (cmp_mode == DFmode)
2677 cmp_code = code, code = NE;
2678 break;
2679
2680 case NE:
2681 if (cmp_mode == DImode && op1 == const0_rtx)
2682 break;
2683 /* FALLTHRU */
2684
2685 case ORDERED:
2686 cmp_code = reverse_condition (code);
2687 code = EQ;
2688 break;
2689
2690 case GE: case GT: case GEU: case GTU:
2691 /* These normally need swapping, but for integer zero we have
2692 special patterns that recognize swapped operands. */
2693 if (cmp_mode == DImode && op1 == const0_rtx)
2694 break;
2695 code = swap_condition (code);
2696 if (cmp_mode == DFmode)
2697 cmp_code = code, code = NE;
2698 std::swap (op0, op1);
2699 break;
2700
2701 default:
2702 gcc_unreachable ();
2703 }
2704
2705 if (cmp_mode == DImode)
2706 {
2707 if (!register_operand (op0, DImode))
2708 op0 = force_reg (DImode, op0);
2709 if (!reg_or_8bit_operand (op1, DImode))
2710 op1 = force_reg (DImode, op1);
2711 }
2712
2713 /* Emit an initial compare instruction, if necessary. */
2714 if (cmp_code != UNKNOWN)
2715 {
2716 tmp = gen_reg_rtx (cmp_mode);
2717 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2718 op0, op1)));
2719
2720 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2721 op1 = const0_rtx;
2722 }
2723
2724 /* Emit the setcc instruction. */
2725 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2726 op0, op1)));
2727 return true;
2728}
2729
2730
2731/* Rewrite a comparison against zero CMP of the form
2732 (CODE (cc0) (const_int 0)) so it can be written validly in
2733 a conditional move (if_then_else CMP ...).
2734 If both of the operands that set cc0 are nonzero we must emit
2735 an insn to perform the compare (it can't be done within
2736 the conditional move). */
2737
2738rtx
2739alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2740{
2741 enum rtx_code code = GET_CODE (cmp);
2742 enum rtx_code cmov_code = NE;
2743 rtx op0 = XEXP (cmp, 0);
2744 rtx op1 = XEXP (cmp, 1);
2745 machine_mode cmp_mode
2746 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2747 machine_mode cmov_mode = VOIDmode;
2748 int local_fast_math = flag_unsafe_math_optimizations;
2749 rtx tem;
2750
2751 if (cmp_mode == TFmode)
2752 {
2753 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2754 op1 = const0_rtx;
2755 cmp_mode = DImode;
2756 }
2757
2758 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2759
2760 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2761 {
2762 enum rtx_code cmp_code;
2763
2764 if (! TARGET_FIX)
2765 return 0;
2766
2767 /* If we have fp<->int register move instructions, do a cmov by
2768 performing the comparison in fp registers, and move the
2769 zero/nonzero value to integer registers, where we can then
2770 use a normal cmov, or vice-versa. */
2771
2772 switch (code)
2773 {
2774 case EQ: case LE: case LT: case LEU: case LTU:
2775 case UNORDERED:
2776 /* We have these compares. */
2777 cmp_code = code, code = NE;
2778 break;
2779
2780 case NE:
2781 case ORDERED:
2782 /* These must be reversed. */
2783 cmp_code = reverse_condition (code), code = EQ;
2784 break;
2785
2786 case GE: case GT: case GEU: case GTU:
2787 /* These normally need swapping, but for integer zero we have
2788 special patterns that recognize swapped operands. */
2789 if (cmp_mode == DImode && op1 == const0_rtx)
2790 cmp_code = code, code = NE;
2791 else
2792 {
2793 cmp_code = swap_condition (code);
2794 code = NE;
2795 std::swap (op0, op1);
2796 }
2797 break;
2798
2799 default:
2800 gcc_unreachable ();
2801 }
2802
2803 if (cmp_mode == DImode)
2804 {
2805 if (!reg_or_0_operand (op0, DImode))
2806 op0 = force_reg (DImode, op0);
2807 if (!reg_or_8bit_operand (op1, DImode))
2808 op1 = force_reg (DImode, op1);
2809 }
2810
2811 tem = gen_reg_rtx (cmp_mode);
2812 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2813 op0, op1)));
2814
2815 cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
2816 op0 = gen_lowpart (cmp_mode, tem);
2817 op1 = CONST0_RTX (cmp_mode);
2818 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2819 local_fast_math = 1;
2820 }
2821
2822 if (cmp_mode == DImode)
2823 {
2824 if (!reg_or_0_operand (op0, DImode))
2825 op0 = force_reg (DImode, op0);
2826 if (!reg_or_8bit_operand (op1, DImode))
2827 op1 = force_reg (DImode, op1);
2828 }
2829
2830 /* We may be able to use a conditional move directly.
2831 This avoids emitting spurious compares. */
2832 if (signed_comparison_operator (cmp, VOIDmode)
2833 && (cmp_mode == DImode || local_fast_math)
2834 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2835 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2836
2837 /* We can't put the comparison inside the conditional move;
2838 emit a compare instruction and put that inside the
2839 conditional move. Make sure we emit only comparisons we have;
2840 swap or reverse as necessary. */
2841
2842 if (!can_create_pseudo_p ())
2843 return NULL_RTX;
2844
2845 switch (code)
2846 {
2847 case EQ: case LE: case LT: case LEU: case LTU:
2848 case UNORDERED:
2849 /* We have these compares: */
2850 break;
2851
2852 case NE:
2853 case ORDERED:
2854 /* These must be reversed. */
2855 code = reverse_condition (code);
2856 cmov_code = EQ;
2857 break;
2858
2859 case GE: case GT: case GEU: case GTU:
2860 /* These normally need swapping, but for integer zero we have
2861 special patterns that recognize swapped operands. */
2862 if (cmp_mode == DImode && op1 == const0_rtx)
2863 break;
2864 code = swap_condition (code);
2865 std::swap (op0, op1);
2866 break;
2867
2868 default:
2869 gcc_unreachable ();
2870 }
2871
2872 if (cmp_mode == DImode)
2873 {
2874 if (!reg_or_0_operand (op0, DImode))
2875 op0 = force_reg (DImode, op0);
2876 if (!reg_or_8bit_operand (op1, DImode))
2877 op1 = force_reg (DImode, op1);
2878 }
2879
2880 /* ??? We mark the branch mode to be CCmode to prevent the compare
2881 and cmov from being combined, since the compare insn follows IEEE
2882 rules that the cmov does not. */
2883 if (cmp_mode == DFmode && !local_fast_math)
2884 cmov_mode = CCmode;
2885
2886 tem = gen_reg_rtx (cmp_mode);
2887 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2888 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2889}
2890
2891/* Simplify a conditional move of two constants into a setcc with
2892 arithmetic. This is done with a splitter since combine would
2893 just undo the work if done during code generation. It also catches
2894 cases we wouldn't have before cse. */
2895
2896int
2897alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2898 rtx t_rtx, rtx f_rtx)
2899{
2900 HOST_WIDE_INT t, f, diff;
2901 machine_mode mode;
2902 rtx target, subtarget, tmp;
2903
2904 mode = GET_MODE (dest);
2905 t = INTVAL (t_rtx);
2906 f = INTVAL (f_rtx);
2907 diff = t - f;
2908
2909 if (((code == NE || code == EQ) && diff < 0)
2910 || (code == GE || code == GT))
2911 {
2912 code = reverse_condition (code);
2913 std::swap (t, f);
2914 diff = -diff;
2915 }
2916
2917 subtarget = target = dest;
2918 if (mode != DImode)
2919 {
2920 target = gen_lowpart (DImode, dest);
2921 if (can_create_pseudo_p ())
2922 subtarget = gen_reg_rtx (DImode);
2923 else
2924 subtarget = target;
2925 }
2926 /* Below, we must be careful to use copy_rtx on target and subtarget
2927 in intermediate insns, as they may be a subreg rtx, which may not
2928 be shared. */
2929
2930 if (f == 0 && exact_log2 (diff) > 0
2931 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2932 viable over a longer latency cmove. On EV5, the E0 slot is a
2933 scarce resource, and on EV4 shift has the same latency as a cmove. */
2934 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2935 {
2936 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2937 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2938
2939 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2940 GEN_INT (exact_log2 (t)));
2941 emit_insn (gen_rtx_SET (target, tmp));
2942 }
2943 else if (f == 0 && t == -1)
2944 {
2945 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2946 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2947
2948 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2949 }
2950 else if (diff == 1 || diff == 4 || diff == 8)
2951 {
2952 rtx add_op;
2953
2954 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2955 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2956
2957 if (diff == 1)
2958 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2959 else
2960 {
2961 add_op = GEN_INT (f);
2962 if (sext_add_operand (add_op, mode))
2963 {
2964 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2965 GEN_INT (diff));
2966 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2967 emit_insn (gen_rtx_SET (target, tmp));
2968 }
2969 else
2970 return 0;
2971 }
2972 }
2973 else
2974 return 0;
2975
2976 return 1;
2977}
2978
2979/* Look up the function X_floating library function name for the
2980 given operation. */
2981
2982struct GTY(()) xfloating_op
2983{
2984 const enum rtx_code code;
2985 const char *const GTY((skip)) osf_func;
2986 const char *const GTY((skip)) vms_func;
2987 rtx libcall;
2988};
2989
2990static GTY(()) struct xfloating_op xfloating_ops[] =
2991{
2992 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2993 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2994 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2995 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2996 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2997 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2998 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2999 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
3000 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
3001 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
3002 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
3003 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
3004 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
3005 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
3006 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
3007};
3008
3009static GTY(()) struct xfloating_op vax_cvt_ops[] =
3010{
3011 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
3012 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
3013};
3014
3015static rtx
3016alpha_lookup_xfloating_lib_func (enum rtx_code code)
3017{
3018 struct xfloating_op *ops = xfloating_ops;
3019 long n = ARRAY_SIZE (xfloating_ops);
3020 long i;
3021
3022 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3023
3024 /* How irritating. Nothing to key off for the main table. */
3025 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3026 {
3027 ops = vax_cvt_ops;
3028 n = ARRAY_SIZE (vax_cvt_ops);
3029 }
3030
3031 for (i = 0; i < n; ++i, ++ops)
3032 if (ops->code == code)
3033 {
3034 rtx func = ops->libcall;
3035 if (!func)
3036 {
3037 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3038 ? ops->vms_func : ops->osf_func);
3039 ops->libcall = func;
3040 }
3041 return func;
3042 }
3043
3044 gcc_unreachable ();
3045}
3046
3047/* Most X_floating operations take the rounding mode as an argument.
3048 Compute that here. */
3049
3050static int
3051alpha_compute_xfloating_mode_arg (enum rtx_code code,
3052 enum alpha_fp_rounding_mode round)
3053{
3054 int mode;
3055
3056 switch (round)
3057 {
3058 case ALPHA_FPRM_NORM:
3059 mode = 2;
3060 break;
3061 case ALPHA_FPRM_MINF:
3062 mode = 1;
3063 break;
3064 case ALPHA_FPRM_CHOP:
3065 mode = 0;
3066 break;
3067 case ALPHA_FPRM_DYN:
3068 mode = 4;
3069 break;
3070 default:
3071 gcc_unreachable ();
3072
3073 /* XXX For reference, round to +inf is mode = 3. */
3074 }
3075
3076 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3077 mode |= 0x10000;
3078
3079 return mode;
3080}
3081
3082/* Emit an X_floating library function call.
3083
3084 Note that these functions do not follow normal calling conventions:
3085 TFmode arguments are passed in two integer registers (as opposed to
3086 indirect); TFmode return values appear in R16+R17.
3087
3088 FUNC is the function to call.
3089 TARGET is where the output belongs.
3090 OPERANDS are the inputs.
3091 NOPERANDS is the count of inputs.
3092 EQUIV is the expression equivalent for the function.
3093*/
3094
3095static void
3096alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3097 int noperands, rtx equiv)
3098{
3099 rtx usage = NULL_RTX, reg;
3100 int regno = 16, i;
3101
3102 start_sequence ();
3103
3104 for (i = 0; i < noperands; ++i)
3105 {
3106 switch (GET_MODE (operands[i]))
3107 {
3108 case E_TFmode:
3109 reg = gen_rtx_REG (TFmode, regno);
3110 regno += 2;
3111 break;
3112
3113 case E_DFmode:
3114 reg = gen_rtx_REG (DFmode, regno + 32);
3115 regno += 1;
3116 break;
3117
3118 case E_VOIDmode:
3119 gcc_assert (CONST_INT_P (operands[i]));
3120 /* FALLTHRU */
3121 case E_DImode:
3122 reg = gen_rtx_REG (DImode, regno);
3123 regno += 1;
3124 break;
3125
3126 default:
3127 gcc_unreachable ();
3128 }
3129
3130 emit_move_insn (reg, operands[i]);
3131 use_reg (&usage, reg);
3132 }
3133
3134 switch (GET_MODE (target))
3135 {
3136 case E_TFmode:
3137 reg = gen_rtx_REG (TFmode, 16);
3138 break;
3139 case E_DFmode:
3140 reg = gen_rtx_REG (DFmode, 32);
3141 break;
3142 case E_DImode:
3143 reg = gen_rtx_REG (DImode, 0);
3144 break;
3145 default:
3146 gcc_unreachable ();
3147 }
3148
3149 rtx mem = gen_rtx_MEM (QImode, func);
3150 rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
3151 const0_rtx, const0_rtx));
3152 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3153 RTL_CONST_CALL_P (tmp) = 1;
3154
3155 tmp = get_insns ();
3156 end_sequence ();
3157
3158 emit_libcall_block (tmp, target, reg, equiv);
3159}
3160
3161/* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3162
3163void
3164alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3165{
3166 rtx func;
3167 int mode;
3168 rtx out_operands[3];
3169
3170 func = alpha_lookup_xfloating_lib_func (code);
3171 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3172
3173 out_operands[0] = operands[1];
3174 out_operands[1] = operands[2];
3175 out_operands[2] = GEN_INT (mode);
3176 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3177 gen_rtx_fmt_ee (code, TFmode, operands[1],
3178 operands[2]));
3179}
3180
3181/* Emit an X_floating library function call for a comparison. */
3182
3183static rtx
3184alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3185{
3186 enum rtx_code cmp_code, res_code;
3187 rtx func, out, operands[2], note;
3188
3189 /* X_floating library comparison functions return
3190 -1 unordered
3191 0 false
3192 1 true
3193 Convert the compare against the raw return value. */
3194
3195 cmp_code = *pcode;
3196 switch (cmp_code)
3197 {
3198 case UNORDERED:
3199 cmp_code = EQ;
3200 res_code = LT;
3201 break;
3202 case ORDERED:
3203 cmp_code = EQ;
3204 res_code = GE;
3205 break;
3206 case NE:
3207 res_code = NE;
3208 break;
3209 case EQ:
3210 case LT:
3211 case GT:
3212 case LE:
3213 case GE:
3214 res_code = GT;
3215 break;
3216 default:
3217 gcc_unreachable ();
3218 }
3219 *pcode = res_code;
3220
3221 func = alpha_lookup_xfloating_lib_func (cmp_code);
3222
3223 operands[0] = op0;
3224 operands[1] = op1;
3225 out = gen_reg_rtx (DImode);
3226
3227 /* What's actually returned is -1,0,1, not a proper boolean value. */
3228 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3229 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3230 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3231
3232 return out;
3233}
3234
3235/* Emit an X_floating library function call for a conversion. */
3236
3237void
3238alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3239{
3240 int noperands = 1, mode;
3241 rtx out_operands[2];
3242 rtx func;
3243 enum rtx_code code = orig_code;
3244
3245 if (code == UNSIGNED_FIX)
3246 code = FIX;
3247
3248 func = alpha_lookup_xfloating_lib_func (code);
3249
3250 out_operands[0] = operands[1];
3251
3252 switch (code)
3253 {
3254 case FIX:
3255 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3256 out_operands[1] = GEN_INT (mode);
3257 noperands = 2;
3258 break;
3259 case FLOAT_TRUNCATE:
3260 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3261 out_operands[1] = GEN_INT (mode);
3262 noperands = 2;
3263 break;
3264 default:
3265 break;
3266 }
3267
3268 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3269 gen_rtx_fmt_e (orig_code,
3270 GET_MODE (operands[0]),
3271 operands[1]));
3272}
3273
3274/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3275 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3276 guarantee that the sequence
3277 set (OP[0] OP[2])
3278 set (OP[1] OP[3])
3279 is valid. Naturally, output operand ordering is little-endian.
3280 This is used by *movtf_internal and *movti_internal. */
3281
3282void
3283alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3284 bool fixup_overlap)
3285{
3286 switch (GET_CODE (operands[1]))
3287 {
3288 case REG:
3289 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3290 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3291 break;
3292
3293 case MEM:
3294 operands[3] = adjust_address (operands[1], DImode, 8);
3295 operands[2] = adjust_address (operands[1], DImode, 0);
3296 break;
3297
3298 CASE_CONST_SCALAR_INT:
3299 case CONST_DOUBLE:
3300 gcc_assert (operands[1] == CONST0_RTX (mode));
3301 operands[2] = operands[3] = const0_rtx;
3302 break;
3303
3304 default:
3305 gcc_unreachable ();
3306 }
3307
3308 switch (GET_CODE (operands[0]))
3309 {
3310 case REG:
3311 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3312 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3313 break;
3314
3315 case MEM:
3316 operands[1] = adjust_address (operands[0], DImode, 8);
3317 operands[0] = adjust_address (operands[0], DImode, 0);
3318 break;
3319
3320 default:
3321 gcc_unreachable ();
3322 }
3323
3324 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3325 {
3326 std::swap (operands[0], operands[1]);
3327 std::swap (operands[2], operands[3]);
3328 }
3329}
3330
3331/* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3332 op2 is a register containing the sign bit, operation is the
3333 logical operation to be performed. */
3334
3335void
3336alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3337{
3338 rtx high_bit = operands[2];
3339 rtx scratch;
3340 int move;
3341
3342 alpha_split_tmode_pair (operands, TFmode, false);
3343
3344 /* Detect three flavors of operand overlap. */
3345 move = 1;
3346 if (rtx_equal_p (operands[0], operands[2]))
3347 move = 0;
3348 else if (rtx_equal_p (operands[1], operands[2]))
3349 {
3350 if (rtx_equal_p (operands[0], high_bit))
3351 move = 2;
3352 else
3353 move = -1;
3354 }
3355
3356 if (move < 0)
3357 emit_move_insn (operands[0], operands[2]);
3358
3359 /* ??? If the destination overlaps both source tf and high_bit, then
3360 assume source tf is dead in its entirety and use the other half
3361 for a scratch register. Otherwise "scratch" is just the proper
3362 destination register. */
3363 scratch = operands[move < 2 ? 1 : 3];
3364
3365 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3366
3367 if (move > 0)
3368 {
3369 emit_move_insn (operands[0], operands[2]);
3370 if (move > 1)
3371 emit_move_insn (operands[1], scratch);
3372 }
3373}
3374
3375/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3376 unaligned data:
3377
3378 unsigned: signed:
3379 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3380 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3381 lda r3,X(r11) lda r3,X+2(r11)
3382 extwl r1,r3,r1 extql r1,r3,r1
3383 extwh r2,r3,r2 extqh r2,r3,r2
3384 or r1.r2.r1 or r1,r2,r1
3385 sra r1,48,r1
3386
3387 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3388 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3389 lda r3,X(r11) lda r3,X(r11)
3390 extll r1,r3,r1 extll r1,r3,r1
3391 extlh r2,r3,r2 extlh r2,r3,r2
3392 or r1.r2.r1 addl r1,r2,r1
3393
3394 quad: ldq_u r1,X(r11)
3395 ldq_u r2,X+7(r11)
3396 lda r3,X(r11)
3397 extql r1,r3,r1
3398 extqh r2,r3,r2
3399 or r1.r2.r1
3400*/
3401
3402void
3403alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3404 HOST_WIDE_INT ofs, int sign)
3405{
3406 rtx meml, memh, addr, extl, exth, tmp, mema;
3407 machine_mode mode;
3408
3409 if (TARGET_BWX && size == 2)
3410 {
3411 meml = adjust_address (mem, QImode, ofs);
3412 memh = adjust_address (mem, QImode, ofs+1);
3413 extl = gen_reg_rtx (DImode);
3414 exth = gen_reg_rtx (DImode);
3415 emit_insn (gen_zero_extendqidi2 (extl, meml));
3416 emit_insn (gen_zero_extendqidi2 (exth, memh));
3417 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3418 NULL, 1, OPTAB_LIB_WIDEN);
3419 addr = expand_simple_binop (DImode, IOR, extl, exth,
3420 NULL, 1, OPTAB_LIB_WIDEN);
3421
3422 if (sign && GET_MODE (tgt) != HImode)
3423 {
3424 addr = gen_lowpart (HImode, addr);
3425 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3426 }
3427 else
3428 {
3429 if (GET_MODE (tgt) != DImode)
3430 addr = gen_lowpart (GET_MODE (tgt), addr);
3431 emit_move_insn (tgt, addr);
3432 }
3433 return;
3434 }
3435
3436 meml = gen_reg_rtx (DImode);
3437 memh = gen_reg_rtx (DImode);
3438 addr = gen_reg_rtx (DImode);
3439 extl = gen_reg_rtx (DImode);
3440 exth = gen_reg_rtx (DImode);
3441
3442 mema = XEXP (mem, 0);
3443 if (GET_CODE (mema) == LO_SUM)
3444 mema = force_reg (Pmode, mema);
3445
3446 /* AND addresses cannot be in any alias set, since they may implicitly
3447 alias surrounding code. Ideally we'd have some alias set that
3448 covered all types except those with alignment 8 or higher. */
3449
3450 tmp = change_address (mem, DImode,
3451 gen_rtx_AND (DImode,
3452 plus_constant (DImode, mema, ofs),
3453 GEN_INT (-8)));
3454 set_mem_alias_set (tmp, 0);
3455 emit_move_insn (meml, tmp);
3456
3457 tmp = change_address (mem, DImode,
3458 gen_rtx_AND (DImode,
3459 plus_constant (DImode, mema,
3460 ofs + size - 1),
3461 GEN_INT (-8)));
3462 set_mem_alias_set (tmp, 0);
3463 emit_move_insn (memh, tmp);
3464
3465 if (sign && size == 2)
3466 {
3467 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3468
3469 emit_insn (gen_extql (extl, meml, addr));
3470 emit_insn (gen_extqh (exth, memh, addr));
3471
3472 /* We must use tgt here for the target. Alpha-vms port fails if we use
3473 addr for the target, because addr is marked as a pointer and combine
3474 knows that pointers are always sign-extended 32-bit values. */
3475 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3476 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3477 addr, 1, OPTAB_WIDEN);
3478 }
3479 else
3480 {
3481 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3482 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3483 switch ((int) size)
3484 {
3485 case 2:
3486 emit_insn (gen_extwh (exth, memh, addr));
3487 mode = HImode;
3488 break;
3489 case 4:
3490 emit_insn (gen_extlh (exth, memh, addr));
3491 mode = SImode;
3492 break;
3493 case 8:
3494 emit_insn (gen_extqh (exth, memh, addr));
3495 mode = DImode;
3496 break;
3497 default:
3498 gcc_unreachable ();
3499 }
3500
3501 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3502 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3503 sign, OPTAB_WIDEN);
3504 }
3505
3506 if (addr != tgt)
3507 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3508}
3509
3510/* Similarly, use ins and msk instructions to perform unaligned stores. */
3511
3512void
3513alpha_expand_unaligned_store (rtx dst, rtx src,
3514 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3515{
3516 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3517
3518 if (TARGET_BWX && size == 2)
3519 {
3520 if (src != const0_rtx)
3521 {
3522 dstl = gen_lowpart (QImode, src);
3523 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3524 NULL, 1, OPTAB_LIB_WIDEN);
3525 dsth = gen_lowpart (QImode, dsth);
3526 }
3527 else
3528 dstl = dsth = const0_rtx;
3529
3530 meml = adjust_address (dst, QImode, ofs);
3531 memh = adjust_address (dst, QImode, ofs+1);
3532
3533 emit_move_insn (meml, dstl);
3534 emit_move_insn (memh, dsth);
3535 return;
3536 }
3537
3538 dstl = gen_reg_rtx (DImode);
3539 dsth = gen_reg_rtx (DImode);
3540 insl = gen_reg_rtx (DImode);
3541 insh = gen_reg_rtx (DImode);
3542
3543 dsta = XEXP (dst, 0);
3544 if (GET_CODE (dsta) == LO_SUM)
3545 dsta = force_reg (Pmode, dsta);
3546
3547 /* AND addresses cannot be in any alias set, since they may implicitly
3548 alias surrounding code. Ideally we'd have some alias set that
3549 covered all types except those with alignment 8 or higher. */
3550
3551 meml = change_address (dst, DImode,
3552 gen_rtx_AND (DImode,
3553 plus_constant (DImode, dsta, ofs),
3554 GEN_INT (-8)));
3555 set_mem_alias_set (meml, 0);
3556
3557 memh = change_address (dst, DImode,
3558 gen_rtx_AND (DImode,
3559 plus_constant (DImode, dsta,
3560 ofs + size - 1),
3561 GEN_INT (-8)));
3562 set_mem_alias_set (memh, 0);
3563
3564 emit_move_insn (dsth, memh);
3565 emit_move_insn (dstl, meml);
3566
3567 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3568
3569 if (src != CONST0_RTX (GET_MODE (src)))
3570 {
3571 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3572 GEN_INT (size*8), addr));
3573
3574 switch ((int) size)
3575 {
3576 case 2:
3577 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3578 break;
3579 case 4:
3580 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3581 break;
3582 case 8:
3583 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3584 break;
3585 default:
3586 gcc_unreachable ();
3587 }
3588 }
3589
3590 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3591
3592 switch ((int) size)
3593 {
3594 case 2:
3595 emit_insn (gen_mskwl (dstl, dstl, addr));
3596 break;
3597 case 4:
3598 emit_insn (gen_mskll (dstl, dstl, addr));
3599 break;
3600 case 8:
3601 emit_insn (gen_mskql (dstl, dstl, addr));
3602 break;
3603 default:
3604 gcc_unreachable ();
3605 }
3606
3607 if (src != CONST0_RTX (GET_MODE (src)))
3608 {
3609 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3610 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3611 }
3612
3613 /* Must store high before low for degenerate case of aligned. */
3614 emit_move_insn (memh, dsth);
3615 emit_move_insn (meml, dstl);
3616}
3617
3618/* The block move code tries to maximize speed by separating loads and
3619 stores at the expense of register pressure: we load all of the data
3620 before we store it back out. There are two secondary effects worth
3621 mentioning, that this speeds copying to/from aligned and unaligned
3622 buffers, and that it makes the code significantly easier to write. */
3623
3624#define MAX_MOVE_WORDS 8
3625
3626/* Load an integral number of consecutive unaligned quadwords. */
3627
3628static void
3629alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3630 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3631{
3632 rtx const im8 = GEN_INT (-8);
3633 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3634 rtx sreg, areg, tmp, smema;
3635 HOST_WIDE_INT i;
3636
3637 smema = XEXP (smem, 0);
3638 if (GET_CODE (smema) == LO_SUM)
3639 smema = force_reg (Pmode, smema);
3640
3641 /* Generate all the tmp registers we need. */
3642 for (i = 0; i < words; ++i)
3643 {
3644 data_regs[i] = out_regs[i];
3645 ext_tmps[i] = gen_reg_rtx (DImode);
3646 }
3647 data_regs[words] = gen_reg_rtx (DImode);
3648
3649 if (ofs != 0)
3650 smem = adjust_address (smem, GET_MODE (smem), ofs);
3651
3652 /* Load up all of the source data. */
3653 for (i = 0; i < words; ++i)
3654 {
3655 tmp = change_address (smem, DImode,
3656 gen_rtx_AND (DImode,
3657 plus_constant (DImode, smema, 8*i),
3658 im8));
3659 set_mem_alias_set (tmp, 0);
3660 emit_move_insn (data_regs[i], tmp);
3661 }
3662
3663 tmp = change_address (smem, DImode,
3664 gen_rtx_AND (DImode,
3665 plus_constant (DImode, smema,
3666 8*words - 1),
3667 im8));
3668 set_mem_alias_set (tmp, 0);
3669 emit_move_insn (data_regs[words], tmp);
3670
3671 /* Extract the half-word fragments. Unfortunately DEC decided to make
3672 extxh with offset zero a noop instead of zeroing the register, so
3673 we must take care of that edge condition ourselves with cmov. */
3674
3675 sreg = copy_addr_to_reg (smema);
3676 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3677 1, OPTAB_WIDEN);
3678 for (i = 0; i < words; ++i)
3679 {
3680 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3681 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3682 emit_insn (gen_rtx_SET (ext_tmps[i],
3683 gen_rtx_IF_THEN_ELSE (DImode,
3684 gen_rtx_EQ (DImode, areg,
3685 const0_rtx),
3686 const0_rtx, ext_tmps[i])));
3687 }
3688
3689 /* Merge the half-words into whole words. */
3690 for (i = 0; i < words; ++i)
3691 {
3692 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3693 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3694 }
3695}
3696
3697/* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3698 may be NULL to store zeros. */
3699
3700static void
3701alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3702 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3703{
3704 rtx const im8 = GEN_INT (-8);
3705 rtx ins_tmps[MAX_MOVE_WORDS];
3706 rtx st_tmp_1, st_tmp_2, dreg;
3707 rtx st_addr_1, st_addr_2, dmema;
3708 HOST_WIDE_INT i;
3709
3710 dmema = XEXP (dmem, 0);
3711 if (GET_CODE (dmema) == LO_SUM)
3712 dmema = force_reg (Pmode, dmema);
3713
3714 /* Generate all the tmp registers we need. */
3715 if (data_regs != NULL)
3716 for (i = 0; i < words; ++i)
3717 ins_tmps[i] = gen_reg_rtx(DImode);
3718 st_tmp_1 = gen_reg_rtx(DImode);
3719 st_tmp_2 = gen_reg_rtx(DImode);
3720
3721 if (ofs != 0)
3722 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3723
3724 st_addr_2 = change_address (dmem, DImode,
3725 gen_rtx_AND (DImode,
3726 plus_constant (DImode, dmema,
3727 words*8 - 1),
3728 im8));
3729 set_mem_alias_set (st_addr_2, 0);
3730
3731 st_addr_1 = change_address (dmem, DImode,
3732 gen_rtx_AND (DImode, dmema, im8));
3733 set_mem_alias_set (st_addr_1, 0);
3734
3735 /* Load up the destination end bits. */
3736 emit_move_insn (st_tmp_2, st_addr_2);
3737 emit_move_insn (st_tmp_1, st_addr_1);
3738
3739 /* Shift the input data into place. */
3740 dreg = copy_addr_to_reg (dmema);
3741 if (data_regs != NULL)
3742 {
3743 for (i = words-1; i >= 0; --i)
3744 {
3745 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3746 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3747 }
3748 for (i = words-1; i > 0; --i)
3749 {
3750 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3751 ins_tmps[i-1], ins_tmps[i-1], 1,
3752 OPTAB_WIDEN);
3753 }
3754 }
3755
3756 /* Split and merge the ends with the destination data. */
3757 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3758 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3759
3760 if (data_regs != NULL)
3761 {
3762 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3763 st_tmp_2, 1, OPTAB_WIDEN);
3764 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3765 st_tmp_1, 1, OPTAB_WIDEN);
3766 }
3767
3768 /* Store it all. */
3769 emit_move_insn (st_addr_2, st_tmp_2);
3770 for (i = words-1; i > 0; --i)
3771 {
3772 rtx tmp = change_address (dmem, DImode,
3773 gen_rtx_AND (DImode,
3774 plus_constant (DImode,
3775 dmema, i*8),
3776 im8));
3777 set_mem_alias_set (tmp, 0);
3778 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3779 }
3780 emit_move_insn (st_addr_1, st_tmp_1);
3781}
3782
3783
3784/* Expand string/block move operations.
3785
3786 operands[0] is the pointer to the destination.
3787 operands[1] is the pointer to the source.
3788 operands[2] is the number of bytes to move.
3789 operands[3] is the alignment. */
3790
3791int
3792alpha_expand_block_move (rtx operands[])
3793{
3794 rtx bytes_rtx = operands[2];
3795 rtx align_rtx = operands[3];
3796 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3797 HOST_WIDE_INT bytes = orig_bytes;
3798 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3799 HOST_WIDE_INT dst_align = src_align;
3800 rtx orig_src = operands[1];
3801 rtx orig_dst = operands[0];
3802 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3803 rtx tmp;
3804 unsigned int i, words, ofs, nregs = 0;
3805
3806 if (orig_bytes <= 0)
3807 return 1;
3808 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3809 return 0;
3810
3811 /* Look for additional alignment information from recorded register info. */
3812
3813 tmp = XEXP (orig_src, 0);
3814 if (REG_P (tmp))
3815 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3816 else if (GET_CODE (tmp) == PLUS
3817 && REG_P (XEXP (tmp, 0))
3818 && CONST_INT_P (XEXP (tmp, 1)))
3819 {
3820 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3821 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3822
3823 if (a > src_align)
3824 {
3825 if (a >= 64 && c % 8 == 0)
3826 src_align = 64;
3827 else if (a >= 32 && c % 4 == 0)
3828 src_align = 32;
3829 else if (a >= 16 && c % 2 == 0)
3830 src_align = 16;
3831 }
3832 }
3833
3834 tmp = XEXP (orig_dst, 0);
3835 if (REG_P (tmp))
3836 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3837 else if (GET_CODE (tmp) == PLUS
3838 && REG_P (XEXP (tmp, 0))
3839 && CONST_INT_P (XEXP (tmp, 1)))
3840 {
3841 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3842 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3843
3844 if (a > dst_align)
3845 {
3846 if (a >= 64 && c % 8 == 0)
3847 dst_align = 64;
3848 else if (a >= 32 && c % 4 == 0)
3849 dst_align = 32;
3850 else if (a >= 16 && c % 2 == 0)
3851 dst_align = 16;
3852 }
3853 }
3854
3855 ofs = 0;
3856 if (src_align >= 64 && bytes >= 8)
3857 {
3858 words = bytes / 8;
3859
3860 for (i = 0; i < words; ++i)
3861 data_regs[nregs + i] = gen_reg_rtx (DImode);
3862
3863 for (i = 0; i < words; ++i)
3864 emit_move_insn (data_regs[nregs + i],
3865 adjust_address (orig_src, DImode, ofs + i * 8));
3866
3867 nregs += words;
3868 bytes -= words * 8;
3869 ofs += words * 8;
3870 }
3871
3872 if (src_align >= 32 && bytes >= 4)
3873 {
3874 words = bytes / 4;
3875
3876 for (i = 0; i < words; ++i)
3877 data_regs[nregs + i] = gen_reg_rtx (SImode);
3878
3879 for (i = 0; i < words; ++i)
3880 emit_move_insn (data_regs[nregs + i],
3881 adjust_address (orig_src, SImode, ofs + i * 4));
3882
3883 nregs += words;
3884 bytes -= words * 4;
3885 ofs += words * 4;
3886 }
3887
3888 if (bytes >= 8)
3889 {
3890 words = bytes / 8;
3891
3892 for (i = 0; i < words+1; ++i)
3893 data_regs[nregs + i] = gen_reg_rtx (DImode);
3894
3895 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3896 words, ofs);
3897
3898 nregs += words;
3899 bytes -= words * 8;
3900 ofs += words * 8;
3901 }
3902
3903 if (! TARGET_BWX && bytes >= 4)
3904 {
3905 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3906 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3907 bytes -= 4;
3908 ofs += 4;
3909 }
3910
3911 if (bytes >= 2)
3912 {
3913 if (src_align >= 16)
3914 {
3915 do {
3916 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3917 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3918 bytes -= 2;
3919 ofs += 2;
3920 } while (bytes >= 2);
3921 }
3922 else if (! TARGET_BWX)
3923 {
3924 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3925 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3926 bytes -= 2;
3927 ofs += 2;
3928 }
3929 }
3930
3931 while (bytes > 0)
3932 {
3933 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3934 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3935 bytes -= 1;
3936 ofs += 1;
3937 }
3938
3939 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3940
3941 /* Now save it back out again. */
3942
3943 i = 0, ofs = 0;
3944
3945 /* Write out the data in whatever chunks reading the source allowed. */
3946 if (dst_align >= 64)
3947 {
3948 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3949 {
3950 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3951 data_regs[i]);
3952 ofs += 8;
3953 i++;
3954 }
3955 }
3956
3957 if (dst_align >= 32)
3958 {
3959 /* If the source has remaining DImode regs, write them out in
3960 two pieces. */
3961 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3962 {
3963 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3964 NULL_RTX, 1, OPTAB_WIDEN);
3965
3966 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3967 gen_lowpart (SImode, data_regs[i]));
3968 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3969 gen_lowpart (SImode, tmp));
3970 ofs += 8;
3971 i++;
3972 }
3973
3974 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3975 {
3976 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3977 data_regs[i]);
3978 ofs += 4;
3979 i++;
3980 }
3981 }
3982
3983 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3984 {
3985 /* Write out a remaining block of words using unaligned methods. */
3986
3987 for (words = 1; i + words < nregs; words++)
3988 if (GET_MODE (data_regs[i + words]) != DImode)
3989 break;
3990
3991 if (words == 1)
3992 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3993 else
3994 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3995 words, ofs);
3996
3997 i += words;
3998 ofs += words * 8;
3999 }
4000
4001 /* Due to the above, this won't be aligned. */
4002 /* ??? If we have more than one of these, consider constructing full
4003 words in registers and using alpha_expand_unaligned_store_words. */
4004 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
4005 {
4006 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
4007 ofs += 4;
4008 i++;
4009 }
4010
4011 if (dst_align >= 16)
4012 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4013 {
4014 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
4015 i++;
4016 ofs += 2;
4017 }
4018 else
4019 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4020 {
4021 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4022 i++;
4023 ofs += 2;
4024 }
4025
4026 /* The remainder must be byte copies. */
4027 while (i < nregs)
4028 {
4029 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4030 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4031 i++;
4032 ofs += 1;
4033 }
4034
4035 return 1;
4036}
4037
4038int
4039alpha_expand_block_clear (rtx operands[])
4040{
4041 rtx bytes_rtx = operands[1];
4042 rtx align_rtx = operands[3];
4043 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4044 HOST_WIDE_INT bytes = orig_bytes;
4045 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4046 HOST_WIDE_INT alignofs = 0;
4047 rtx orig_dst = operands[0];
4048 rtx tmp;
4049 int i, words, ofs = 0;
4050
4051 if (orig_bytes <= 0)
4052 return 1;
4053 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4054 return 0;
4055
4056 /* Look for stricter alignment. */
4057 tmp = XEXP (orig_dst, 0);
4058 if (REG_P (tmp))
4059 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4060 else if (GET_CODE (tmp) == PLUS
4061 && REG_P (XEXP (tmp, 0))
4062 && CONST_INT_P (XEXP (tmp, 1)))
4063 {
4064 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4065 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4066
4067 if (a > align)
4068 {
4069 if (a >= 64)
4070 align = a, alignofs = 8 - c % 8;
4071 else if (a >= 32)
4072 align = a, alignofs = 4 - c % 4;
4073 else if (a >= 16)
4074 align = a, alignofs = 2 - c % 2;
4075 }
4076 }
4077
4078 /* Handle an unaligned prefix first. */
4079
4080 if (alignofs > 0)
4081 {
4082 /* Given that alignofs is bounded by align, the only time BWX could
4083 generate three stores is for a 7 byte fill. Prefer two individual
4084 stores over a load/mask/store sequence. */
4085 if ((!TARGET_BWX || alignofs == 7)
4086 && align >= 32
4087 && !(alignofs == 4 && bytes >= 4))
4088 {
4089 machine_mode mode = (align >= 64 ? DImode : SImode);
4090 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4091 rtx mem, tmp;
4092 HOST_WIDE_INT mask;
4093
4094 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4095 set_mem_alias_set (mem, 0);
4096
4097 mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
4098 if (bytes < alignofs)
4099 {
4100 mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
4101 ofs += bytes;
4102 bytes = 0;
4103 }
4104 else
4105 {
4106 bytes -= alignofs;
4107 ofs += alignofs;
4108 }
4109 alignofs = 0;
4110
4111 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4112 NULL_RTX, 1, OPTAB_WIDEN);
4113
4114 emit_move_insn (mem, tmp);
4115 }
4116
4117 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4118 {
4119 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4120 bytes -= 1;
4121 ofs += 1;
4122 alignofs -= 1;
4123 }
4124 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4125 {
4126 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4127 bytes -= 2;
4128 ofs += 2;
4129 alignofs -= 2;
4130 }
4131 if (alignofs == 4 && bytes >= 4)
4132 {
4133 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4134 bytes -= 4;
4135 ofs += 4;
4136 alignofs = 0;
4137 }
4138
4139 /* If we've not used the extra lead alignment information by now,
4140 we won't be able to. Downgrade align to match what's left over. */
4141 if (alignofs > 0)
4142 {
4143 alignofs = alignofs & -alignofs;
4144 align = MIN (align, alignofs * BITS_PER_UNIT);
4145 }
4146 }
4147
4148 /* Handle a block of contiguous long-words. */
4149
4150 if (align >= 64 && bytes >= 8)
4151 {
4152 words = bytes / 8;
4153
4154 for (i = 0; i < words; ++i)
4155 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4156 const0_rtx);
4157
4158 bytes -= words * 8;
4159 ofs += words * 8;
4160 }
4161
4162 /* If the block is large and appropriately aligned, emit a single
4163 store followed by a sequence of stq_u insns. */
4164
4165 if (align >= 32 && bytes > 16)
4166 {
4167 rtx orig_dsta;
4168
4169 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4170 bytes -= 4;
4171 ofs += 4;
4172
4173 orig_dsta = XEXP (orig_dst, 0);
4174 if (GET_CODE (orig_dsta) == LO_SUM)
4175 orig_dsta = force_reg (Pmode, orig_dsta);
4176
4177 words = bytes / 8;
4178 for (i = 0; i < words; ++i)
4179 {
4180 rtx mem
4181 = change_address (orig_dst, DImode,
4182 gen_rtx_AND (DImode,
4183 plus_constant (DImode, orig_dsta,
4184 ofs + i*8),
4185 GEN_INT (-8)));
4186 set_mem_alias_set (mem, 0);
4187 emit_move_insn (mem, const0_rtx);
4188 }
4189
4190 /* Depending on the alignment, the first stq_u may have overlapped
4191 with the initial stl, which means that the last stq_u didn't
4192 write as much as it would appear. Leave those questionable bytes
4193 unaccounted for. */
4194 bytes -= words * 8 - 4;
4195 ofs += words * 8 - 4;
4196 }
4197
4198 /* Handle a smaller block of aligned words. */
4199
4200 if ((align >= 64 && bytes == 4)
4201 || (align == 32 && bytes >= 4))
4202 {
4203 words = bytes / 4;
4204
4205 for (i = 0; i < words; ++i)
4206 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4207 const0_rtx);
4208
4209 bytes -= words * 4;
4210 ofs += words * 4;
4211 }
4212
4213 /* An unaligned block uses stq_u stores for as many as possible. */
4214
4215 if (bytes >= 8)
4216 {
4217 words = bytes / 8;
4218
4219 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4220
4221 bytes -= words * 8;
4222 ofs += words * 8;
4223 }
4224
4225 /* Next clean up any trailing pieces. */
4226
4227 /* Count the number of bits in BYTES for which aligned stores could
4228 be emitted. */
4229 words = 0;
4230 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4231 if (bytes & i)
4232 words += 1;
4233
4234 /* If we have appropriate alignment (and it wouldn't take too many
4235 instructions otherwise), mask out the bytes we need. */
4236 if (TARGET_BWX ? words > 2 : bytes > 0)
4237 {
4238 if (align >= 64)
4239 {
4240 rtx mem, tmp;
4241 HOST_WIDE_INT mask;
4242
4243 mem = adjust_address (orig_dst, DImode, ofs);
4244 set_mem_alias_set (mem, 0);
4245
4246 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4247
4248 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4249 NULL_RTX, 1, OPTAB_WIDEN);
4250
4251 emit_move_insn (mem, tmp);
4252 return 1;
4253 }
4254 else if (align >= 32 && bytes < 4)
4255 {
4256 rtx mem, tmp;
4257 HOST_WIDE_INT mask;
4258
4259 mem = adjust_address (orig_dst, SImode, ofs);
4260 set_mem_alias_set (mem, 0);
4261
4262 mask = HOST_WIDE_INT_M1U << (bytes * 8);
4263
4264 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4265 NULL_RTX, 1, OPTAB_WIDEN);
4266
4267 emit_move_insn (mem, tmp);
4268 return 1;
4269 }
4270 }
4271
4272 if (!TARGET_BWX && bytes >= 4)
4273 {
4274 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4275 bytes -= 4;
4276 ofs += 4;
4277 }
4278
4279 if (bytes >= 2)
4280 {
4281 if (align >= 16)
4282 {
4283 do {
4284 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4285 const0_rtx);
4286 bytes -= 2;
4287 ofs += 2;
4288 } while (bytes >= 2);
4289 }
4290 else if (! TARGET_BWX)
4291 {
4292 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4293 bytes -= 2;
4294 ofs += 2;
4295 }
4296 }
4297
4298 while (bytes > 0)
4299 {
4300 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4301 bytes -= 1;
4302 ofs += 1;
4303 }
4304
4305 return 1;
4306}
4307
4308/* Returns a mask so that zap(x, value) == x & mask. */
4309
4310rtx
4311alpha_expand_zap_mask (HOST_WIDE_INT value)
4312{
4313 rtx result;
4314 int i;
4315 HOST_WIDE_INT mask = 0;
4316
4317 for (i = 7; i >= 0; --i)
4318 {
4319 mask <<= 8;
4320 if (!((value >> i) & 1))
4321 mask |= 0xff;
4322 }
4323
4324 result = gen_int_mode (mask, DImode);
4325 return result;
4326}
4327
4328void
4329alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4330 machine_mode mode,
4331 rtx op0, rtx op1, rtx op2)
4332{
4333 op0 = gen_lowpart (mode, op0);
4334
4335 if (op1 == const0_rtx)
4336 op1 = CONST0_RTX (mode);
4337 else
4338 op1 = gen_lowpart (mode, op1);
4339
4340 if (op2 == const0_rtx)
4341 op2 = CONST0_RTX (mode);
4342 else
4343 op2 = gen_lowpart (mode, op2);
4344
4345 emit_insn ((*gen) (op0, op1, op2));
4346}
4347
4348/* A subroutine of the atomic operation splitters. Jump to LABEL if
4349 COND is true. Mark the jump as unlikely to be taken. */
4350
4351static void
4352emit_unlikely_jump (rtx cond, rtx label)
4353{
4354 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4355 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4356 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
4357}
4358
4359/* A subroutine of the atomic operation splitters. Emit a load-locked
4360 instruction in MODE. */
4361
4362static void
4363emit_load_locked (machine_mode mode, rtx reg, rtx mem)
4364{
4365 rtx (*fn) (rtx, rtx) = NULL;
4366 if (mode == SImode)
4367 fn = gen_load_locked_si;
4368 else if (mode == DImode)
4369 fn = gen_load_locked_di;
4370 emit_insn (fn (reg, mem));
4371}
4372
4373/* A subroutine of the atomic operation splitters. Emit a store-conditional
4374 instruction in MODE. */
4375
4376static void
4377emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
4378{
4379 rtx (*fn) (rtx, rtx, rtx) = NULL;
4380 if (mode == SImode)
4381 fn = gen_store_conditional_si;
4382 else if (mode == DImode)
4383 fn = gen_store_conditional_di;
4384 emit_insn (fn (res, mem, val));
4385}
4386
4387/* Subroutines of the atomic operation splitters. Emit barriers
4388 as needed for the memory MODEL. */
4389
4390static void
4391alpha_pre_atomic_barrier (enum memmodel model)
4392{
4393 if (need_atomic_barrier_p (model, true))
4394 emit_insn (gen_memory_barrier ());
4395}
4396
4397static void
4398alpha_post_atomic_barrier (enum memmodel model)
4399{
4400 if (need_atomic_barrier_p (model, false))
4401 emit_insn (gen_memory_barrier ());
4402}
4403
4404/* A subroutine of the atomic operation splitters. Emit an insxl
4405 instruction in MODE. */
4406
4407static rtx
4408emit_insxl (machine_mode mode, rtx op1, rtx op2)
4409{
4410 rtx ret = gen_reg_rtx (DImode);
4411 rtx (*fn) (rtx, rtx, rtx);
4412
4413 switch (mode)
4414 {
4415 case E_QImode:
4416 fn = gen_insbl;
4417 break;
4418 case E_HImode:
4419 fn = gen_inswl;
4420 break;
4421 case E_SImode:
4422 fn = gen_insll;
4423 break;
4424 case E_DImode:
4425 fn = gen_insql;
4426 break;
4427 default:
4428 gcc_unreachable ();
4429 }
4430
4431 op1 = force_reg (mode, op1);
4432 emit_insn (fn (ret, op1, op2));
4433
4434 return ret;
4435}
4436
4437/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4438 to perform. MEM is the memory on which to operate. VAL is the second
4439 operand of the binary operator. BEFORE and AFTER are optional locations to
4440 return the value of MEM either before of after the operation. SCRATCH is
4441 a scratch register. */
4442
4443void
4444alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4445 rtx after, rtx scratch, enum memmodel model)
4446{
4447 machine_mode mode = GET_MODE (mem);
4448 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4449
4450 alpha_pre_atomic_barrier (model);
4451
4452 label = gen_label_rtx ();
4453 emit_label (label);
4454 label = gen_rtx_LABEL_REF (DImode, label);
4455
4456 if (before == NULL)
4457 before = scratch;
4458 emit_load_locked (mode, before, mem);
4459
4460 if (code == NOT)
4461 {
4462 x = gen_rtx_AND (mode, before, val);
4463 emit_insn (gen_rtx_SET (val, x));
4464
4465 x = gen_rtx_NOT (mode, val);
4466 }
4467 else
4468 x = gen_rtx_fmt_ee (code, mode, before, val);
4469 if (after)
4470 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4471 emit_insn (gen_rtx_SET (scratch, x));
4472
4473 emit_store_conditional (mode, cond, mem, scratch);
4474
4475 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4476 emit_unlikely_jump (x, label);
4477
4478 alpha_post_atomic_barrier (model);
4479}
4480
4481/* Expand a compare and swap operation. */
4482
4483void
4484alpha_split_compare_and_swap (rtx operands[])
4485{
4486 rtx cond, retval, mem, oldval, newval;
4487 bool is_weak;
4488 enum memmodel mod_s, mod_f;
4489 machine_mode mode;
4490 rtx label1, label2, x;
4491
4492 cond = operands[0];
4493 retval = operands[1];
4494 mem = operands[2];
4495 oldval = operands[3];
4496 newval = operands[4];
4497 is_weak = (operands[5] != const0_rtx);
4498 mod_s = memmodel_from_int (INTVAL (operands[6]));
4499 mod_f = memmodel_from_int (INTVAL (operands[7]));
4500 mode = GET_MODE (mem);
4501
4502 alpha_pre_atomic_barrier (mod_s);
4503
4504 label1 = NULL_RTX;
4505 if (!is_weak)
4506 {
4507 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4508 emit_label (XEXP (label1, 0));
4509 }
4510 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4511
4512 emit_load_locked (mode, retval, mem);
4513
4514 x = gen_lowpart (DImode, retval);
4515 if (oldval == const0_rtx)
4516 {
4517 emit_move_insn (cond, const0_rtx);
4518 x = gen_rtx_NE (DImode, x, const0_rtx);
4519 }
4520 else
4521 {
4522 x = gen_rtx_EQ (DImode, x, oldval);
4523 emit_insn (gen_rtx_SET (cond, x));
4524 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4525 }
4526 emit_unlikely_jump (x, label2);
4527
4528 emit_move_insn (cond, newval);
4529 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4530
4531 if (!is_weak)
4532 {
4533 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4534 emit_unlikely_jump (x, label1);
4535 }
4536
4537 if (!is_mm_relaxed (mod_f))
4538 emit_label (XEXP (label2, 0));
4539
4540 alpha_post_atomic_barrier (mod_s);
4541
4542 if (is_mm_relaxed (mod_f))
4543 emit_label (XEXP (label2, 0));
4544}
4545
4546void
4547alpha_expand_compare_and_swap_12 (rtx operands[])
4548{
4549 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4550 machine_mode mode;
4551 rtx addr, align, wdst;
4552 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4553
4554 cond = operands[0];
4555 dst = operands[1];
4556 mem = operands[2];
4557 oldval = operands[3];
4558 newval = operands[4];
4559 is_weak = operands[5];
4560 mod_s = operands[6];
4561 mod_f = operands[7];
4562 mode = GET_MODE (mem);
4563
4564 /* We forced the address into a register via mem_noofs_operand. */
4565 addr = XEXP (mem, 0);
4566 gcc_assert (register_operand (addr, DImode));
4567
4568 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4569 NULL_RTX, 1, OPTAB_DIRECT);
4570
4571 oldval = convert_modes (DImode, mode, oldval, 1);
4572
4573 if (newval != const0_rtx)
4574 newval = emit_insxl (mode, newval, addr);
4575
4576 wdst = gen_reg_rtx (DImode);
4577 if (mode == QImode)
4578 gen = gen_atomic_compare_and_swapqi_1;
4579 else
4580 gen = gen_atomic_compare_and_swaphi_1;
4581 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4582 is_weak, mod_s, mod_f));
4583
4584 emit_move_insn (dst, gen_lowpart (mode, wdst));
4585}
4586
4587void
4588alpha_split_compare_and_swap_12 (rtx operands[])
4589{
4590 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4591 machine_mode mode;
4592 bool is_weak;
4593 enum memmodel mod_s, mod_f;
4594 rtx label1, label2, mem, addr, width, mask, x;
4595
4596 cond = operands[0];
4597 dest = operands[1];
4598 orig_mem = operands[2];
4599 oldval = operands[3];
4600 newval = operands[4];
4601 align = operands[5];
4602 is_weak = (operands[6] != const0_rtx);
4603 mod_s = memmodel_from_int (INTVAL (operands[7]));
4604 mod_f = memmodel_from_int (INTVAL (operands[8]));
4605 scratch = operands[9];
4606 mode = GET_MODE (orig_mem);
4607 addr = XEXP (orig_mem, 0);
4608
4609 mem = gen_rtx_MEM (DImode, align);
4610 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4611 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4612 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4613
4614 alpha_pre_atomic_barrier (mod_s);
4615
4616 label1 = NULL_RTX;
4617 if (!is_weak)
4618 {
4619 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4620 emit_label (XEXP (label1, 0));
4621 }
4622 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4623
4624 emit_load_locked (DImode, scratch, mem);
4625
4626 width = GEN_INT (GET_MODE_BITSIZE (mode));
4627 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4628 emit_insn (gen_extxl (dest, scratch, width, addr));
4629
4630 if (oldval == const0_rtx)
4631 {
4632 emit_move_insn (cond, const0_rtx);
4633 x = gen_rtx_NE (DImode, dest, const0_rtx);
4634 }
4635 else
4636 {
4637 x = gen_rtx_EQ (DImode, dest, oldval);
4638 emit_insn (gen_rtx_SET (cond, x));
4639 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4640 }
4641 emit_unlikely_jump (x, label2);
4642
4643 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4644
4645 if (newval != const0_rtx)
4646 emit_insn (gen_iordi3 (cond, cond, newval));
4647
4648 emit_store_conditional (DImode, cond, mem, cond);
4649
4650 if (!is_weak)
4651 {
4652 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4653 emit_unlikely_jump (x, label1);
4654 }
4655
4656 if (!is_mm_relaxed (mod_f))
4657 emit_label (XEXP (label2, 0));
4658
4659 alpha_post_atomic_barrier (mod_s);
4660
4661 if (is_mm_relaxed (mod_f))
4662 emit_label (XEXP (label2, 0));
4663}
4664
4665/* Expand an atomic exchange operation. */
4666
4667void
4668alpha_split_atomic_exchange (rtx operands[])
4669{
4670 rtx retval, mem, val, scratch;
4671 enum memmodel model;
4672 machine_mode mode;
4673 rtx label, x, cond;
4674
4675 retval = operands[0];
4676 mem = operands[1];
4677 val = operands[2];
4678 model = (enum memmodel) INTVAL (operands[3]);
4679 scratch = operands[4];
4680 mode = GET_MODE (mem);
4681 cond = gen_lowpart (DImode, scratch);
4682
4683 alpha_pre_atomic_barrier (model);
4684
4685 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4686 emit_label (XEXP (label, 0));
4687
4688 emit_load_locked (mode, retval, mem);
4689 emit_move_insn (scratch, val);
4690 emit_store_conditional (mode, cond, mem, scratch);
4691
4692 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4693 emit_unlikely_jump (x, label);
4694
4695 alpha_post_atomic_barrier (model);
4696}
4697
4698void
4699alpha_expand_atomic_exchange_12 (rtx operands[])
4700{
4701 rtx dst, mem, val, model;
4702 machine_mode mode;
4703 rtx addr, align, wdst;
4704 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4705
4706 dst = operands[0];
4707 mem = operands[1];
4708 val = operands[2];
4709 model = operands[3];
4710 mode = GET_MODE (mem);
4711
4712 /* We forced the address into a register via mem_noofs_operand. */
4713 addr = XEXP (mem, 0);
4714 gcc_assert (register_operand (addr, DImode));
4715
4716 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4717 NULL_RTX, 1, OPTAB_DIRECT);
4718
4719 /* Insert val into the correct byte location within the word. */
4720 if (val != const0_rtx)
4721 val = emit_insxl (mode, val, addr);
4722
4723 wdst = gen_reg_rtx (DImode);
4724 if (mode == QImode)
4725 gen = gen_atomic_exchangeqi_1;
4726 else
4727 gen = gen_atomic_exchangehi_1;
4728 emit_insn (gen (wdst, mem, val, align, model));
4729
4730 emit_move_insn (dst, gen_lowpart (mode, wdst));
4731}
4732
4733void
4734alpha_split_atomic_exchange_12 (rtx operands[])
4735{
4736 rtx dest, orig_mem, addr, val, align, scratch;
4737 rtx label, mem, width, mask, x;
4738 machine_mode mode;
4739 enum memmodel model;
4740
4741 dest = operands[0];
4742 orig_mem = operands[1];
4743 val = operands[2];
4744 align = operands[3];
4745 model = (enum memmodel) INTVAL (operands[4]);
4746 scratch = operands[5];
4747 mode = GET_MODE (orig_mem);
4748 addr = XEXP (orig_mem, 0);
4749
4750 mem = gen_rtx_MEM (DImode, align);
4751 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4752 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4753 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4754
4755 alpha_pre_atomic_barrier (model);
4756
4757 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4758 emit_label (XEXP (label, 0));
4759
4760 emit_load_locked (DImode, scratch, mem);
4761
4762 width = GEN_INT (GET_MODE_BITSIZE (mode));
4763 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4764 emit_insn (gen_extxl (dest, scratch, width, addr));
4765 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4766 if (val != const0_rtx)
4767 emit_insn (gen_iordi3 (scratch, scratch, val));
4768
4769 emit_store_conditional (DImode, scratch, mem, scratch);
4770
4771 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4772 emit_unlikely_jump (x, label);
4773
4774 alpha_post_atomic_barrier (model);
4775}
4776
4777/* Adjust the cost of a scheduling dependency. Return the new cost of
4778 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4779
4780static int
4781alpha_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4782 unsigned int)
4783{
4784 enum attr_type dep_insn_type;
4785
4786 /* If the dependence is an anti-dependence, there is no cost. For an
4787 output dependence, there is sometimes a cost, but it doesn't seem
4788 worth handling those few cases. */
4789 if (dep_type != 0)
4790 return cost;
4791
4792 /* If we can't recognize the insns, we can't really do anything. */
4793 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4794 return cost;
4795
4796 dep_insn_type = get_attr_type (dep_insn);
4797
4798 /* Bring in the user-defined memory latency. */
4799 if (dep_insn_type == TYPE_ILD
4800 || dep_insn_type == TYPE_FLD
4801 || dep_insn_type == TYPE_LDSYM)
4802 cost += alpha_memory_latency-1;
4803
4804 /* Everything else handled in DFA bypasses now. */
4805
4806 return cost;
4807}
4808
4809/* The number of instructions that can be issued per cycle. */
4810
4811static int
4812alpha_issue_rate (void)
4813{
4814 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4815}
4816
4817/* How many alternative schedules to try. This should be as wide as the
4818 scheduling freedom in the DFA, but no wider. Making this value too
4819 large results extra work for the scheduler.
4820
4821 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4822 alternative schedules. For EV5, we can choose between E0/E1 and
4823 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4824
4825static int
4826