1/* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2017 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "rtl.h"
28#include "tree.h"
29#include "predict.h"
30#include "memmodel.h"
31#include "tm_p.h"
32#include "expmed.h"
33#include "optabs.h"
34#include "regs.h"
35#include "emit-rtl.h"
36#include "diagnostic-core.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "dojump.h"
40#include "explow.h"
41#include "expr.h"
42#include "langhooks.h"
43#include "tree-vector-builder.h"
44
45struct target_expmed default_target_expmed;
46#if SWITCHABLE_TARGET
47struct target_expmed *this_target_expmed = &default_target_expmed;
48#endif
49
50static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT,
55 rtx, scalar_int_mode, bool);
56static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 rtx, scalar_int_mode, bool);
60static void store_split_bit_field (rtx, opt_scalar_int_mode,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 unsigned HOST_WIDE_INT,
64 unsigned HOST_WIDE_INT,
65 rtx, scalar_int_mode, bool);
66static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
67 unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, rtx, int, bool);
69static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
70 unsigned HOST_WIDE_INT,
71 unsigned HOST_WIDE_INT, rtx, int, bool);
72static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
73static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
74 unsigned HOST_WIDE_INT,
75 unsigned HOST_WIDE_INT, int, bool);
76static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
77static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
78static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
79
80/* Return a constant integer mask value of mode MODE with BITSIZE ones
81 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
82 The mask is truncated if necessary to the width of mode MODE. The
83 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
84
85static inline rtx
86mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
87{
88 return immed_wide_int_const
89 (wi::shifted_mask (bitpos, bitsize, complement,
90 GET_MODE_PRECISION (mode)), mode);
91}
92
93/* Test whether a value is zero of a power of two. */
94#define EXACT_POWER_OF_2_OR_ZERO_P(x) \
95 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
96
97struct init_expmed_rtl
98{
99 rtx reg;
100 rtx plus;
101 rtx neg;
102 rtx mult;
103 rtx sdiv;
104 rtx udiv;
105 rtx sdiv_32;
106 rtx smod_32;
107 rtx wide_mult;
108 rtx wide_lshr;
109 rtx wide_trunc;
110 rtx shift;
111 rtx shift_mult;
112 rtx shift_add;
113 rtx shift_sub0;
114 rtx shift_sub1;
115 rtx zext;
116 rtx trunc;
117
118 rtx pow2[MAX_BITS_PER_WORD];
119 rtx cint[MAX_BITS_PER_WORD];
120};
121
122static void
123init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
124 scalar_int_mode from_mode, bool speed)
125{
126 int to_size, from_size;
127 rtx which;
128
129 to_size = GET_MODE_PRECISION (to_mode);
130 from_size = GET_MODE_PRECISION (from_mode);
131
132 /* Most partial integers have a precision less than the "full"
133 integer it requires for storage. In case one doesn't, for
134 comparison purposes here, reduce the bit size by one in that
135 case. */
136 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
137 && pow2p_hwi (to_size))
138 to_size --;
139 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
140 && pow2p_hwi (from_size))
141 from_size --;
142
143 /* Assume cost of zero-extend and sign-extend is the same. */
144 which = (to_size < from_size ? all->trunc : all->zext);
145
146 PUT_MODE (all->reg, from_mode);
147 set_convert_cost (to_mode, from_mode, speed,
148 set_src_cost (which, to_mode, speed));
149}
150
151static void
152init_expmed_one_mode (struct init_expmed_rtl *all,
153 machine_mode mode, int speed)
154{
155 int m, n, mode_bitsize;
156 machine_mode mode_from;
157
158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
159
160 PUT_MODE (all->reg, mode);
161 PUT_MODE (all->plus, mode);
162 PUT_MODE (all->neg, mode);
163 PUT_MODE (all->mult, mode);
164 PUT_MODE (all->sdiv, mode);
165 PUT_MODE (all->udiv, mode);
166 PUT_MODE (all->sdiv_32, mode);
167 PUT_MODE (all->smod_32, mode);
168 PUT_MODE (all->wide_trunc, mode);
169 PUT_MODE (all->shift, mode);
170 PUT_MODE (all->shift_mult, mode);
171 PUT_MODE (all->shift_add, mode);
172 PUT_MODE (all->shift_sub0, mode);
173 PUT_MODE (all->shift_sub1, mode);
174 PUT_MODE (all->zext, mode);
175 PUT_MODE (all->trunc, mode);
176
177 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
178 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
179 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
180 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
181 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
182
183 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
184 <= 2 * add_cost (speed, mode)));
185 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
186 <= 4 * add_cost (speed, mode)));
187
188 set_shift_cost (speed, mode, 0, 0);
189 {
190 int cost = add_cost (speed, mode);
191 set_shiftadd_cost (speed, mode, 0, cost);
192 set_shiftsub0_cost (speed, mode, 0, cost);
193 set_shiftsub1_cost (speed, mode, 0, cost);
194 }
195
196 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
197 for (m = 1; m < n; m++)
198 {
199 XEXP (all->shift, 1) = all->cint[m];
200 XEXP (all->shift_mult, 1) = all->pow2[m];
201
202 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
203 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
204 speed));
205 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
206 speed));
207 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
208 speed));
209 }
210
211 scalar_int_mode int_mode_to;
212 if (is_a <scalar_int_mode> (mode, &int_mode_to))
213 {
214 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
215 mode_from = (machine_mode)(mode_from + 1))
216 init_expmed_one_conv (all, int_mode_to,
217 as_a <scalar_int_mode> (mode_from), speed);
218
219 scalar_int_mode wider_mode;
220 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
221 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
222 {
223 PUT_MODE (all->zext, wider_mode);
224 PUT_MODE (all->wide_mult, wider_mode);
225 PUT_MODE (all->wide_lshr, wider_mode);
226 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
227
228 set_mul_widen_cost (speed, wider_mode,
229 set_src_cost (all->wide_mult, wider_mode, speed));
230 set_mul_highpart_cost (speed, int_mode_to,
231 set_src_cost (all->wide_trunc,
232 int_mode_to, speed));
233 }
234 }
235}
236
237void
238init_expmed (void)
239{
240 struct init_expmed_rtl all;
241 machine_mode mode = QImode;
242 int m, speed;
243
244 memset (&all, 0, sizeof all);
245 for (m = 1; m < MAX_BITS_PER_WORD; m++)
246 {
247 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
248 all.cint[m] = GEN_INT (m);
249 }
250
251 /* Avoid using hard regs in ways which may be unsupported. */
252 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
253 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
254 all.neg = gen_rtx_NEG (mode, all.reg);
255 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
256 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
257 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
258 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
259 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
260 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
261 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
262 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
263 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
264 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
265 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
266 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
267 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
268 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
269 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
270
271 for (speed = 0; speed < 2; speed++)
272 {
273 crtl->maybe_hot_insn_p = speed;
274 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
275
276 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
277 mode = (machine_mode)(mode + 1))
278 init_expmed_one_mode (&all, mode, speed);
279
280 if (MIN_MODE_PARTIAL_INT != VOIDmode)
281 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
282 mode = (machine_mode)(mode + 1))
283 init_expmed_one_mode (&all, mode, speed);
284
285 if (MIN_MODE_VECTOR_INT != VOIDmode)
286 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
287 mode = (machine_mode)(mode + 1))
288 init_expmed_one_mode (&all, mode, speed);
289 }
290
291 if (alg_hash_used_p ())
292 {
293 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
294 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
295 }
296 else
297 set_alg_hash_used_p (true);
298 default_rtl_profile ();
299
300 ggc_free (all.trunc);
301 ggc_free (all.shift_sub1);
302 ggc_free (all.shift_sub0);
303 ggc_free (all.shift_add);
304 ggc_free (all.shift_mult);
305 ggc_free (all.shift);
306 ggc_free (all.wide_trunc);
307 ggc_free (all.wide_lshr);
308 ggc_free (all.wide_mult);
309 ggc_free (all.zext);
310 ggc_free (all.smod_32);
311 ggc_free (all.sdiv_32);
312 ggc_free (all.udiv);
313 ggc_free (all.sdiv);
314 ggc_free (all.mult);
315 ggc_free (all.neg);
316 ggc_free (all.plus);
317 ggc_free (all.reg);
318}
319
320/* Return an rtx representing minus the value of X.
321 MODE is the intended mode of the result,
322 useful if X is a CONST_INT. */
323
324rtx
325negate_rtx (machine_mode mode, rtx x)
326{
327 rtx result = simplify_unary_operation (NEG, mode, x, mode);
328
329 if (result == 0)
330 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
331
332 return result;
333}
334
335/* Whether reverse storage order is supported on the target. */
336static int reverse_storage_order_supported = -1;
337
338/* Check whether reverse storage order is supported on the target. */
339
340static void
341check_reverse_storage_order_support (void)
342{
343 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
344 {
345 reverse_storage_order_supported = 0;
346 sorry ("reverse scalar storage order");
347 }
348 else
349 reverse_storage_order_supported = 1;
350}
351
352/* Whether reverse FP storage order is supported on the target. */
353static int reverse_float_storage_order_supported = -1;
354
355/* Check whether reverse FP storage order is supported on the target. */
356
357static void
358check_reverse_float_storage_order_support (void)
359{
360 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
361 {
362 reverse_float_storage_order_supported = 0;
363 sorry ("reverse floating-point scalar storage order");
364 }
365 else
366 reverse_float_storage_order_supported = 1;
367}
368
369/* Return an rtx representing value of X with reverse storage order.
370 MODE is the intended mode of the result,
371 useful if X is a CONST_INT. */
372
373rtx
374flip_storage_order (machine_mode mode, rtx x)
375{
376 scalar_int_mode int_mode;
377 rtx result;
378
379 if (mode == QImode)
380 return x;
381
382 if (COMPLEX_MODE_P (mode))
383 {
384 rtx real = read_complex_part (x, false);
385 rtx imag = read_complex_part (x, true);
386
387 real = flip_storage_order (GET_MODE_INNER (mode), real);
388 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
389
390 return gen_rtx_CONCAT (mode, real, imag);
391 }
392
393 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
394 check_reverse_storage_order_support ();
395
396 if (!is_a <scalar_int_mode> (mode, &int_mode))
397 {
398 if (FLOAT_MODE_P (mode)
399 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
400 check_reverse_float_storage_order_support ();
401
402 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
403 {
404 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
405 return x;
406 }
407 x = gen_lowpart (int_mode, x);
408 }
409
410 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
411 if (result == 0)
412 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
413
414 if (int_mode != mode)
415 result = gen_lowpart (mode, result);
416
417 return result;
418}
419
420/* If MODE is set, adjust bitfield memory MEM so that it points to the
421 first unit of mode MODE that contains a bitfield of size BITSIZE at
422 bit position BITNUM. If MODE is not set, return a BLKmode reference
423 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
424 of the field within the new memory. */
425
426static rtx
427narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
428 unsigned HOST_WIDE_INT bitsize,
429 unsigned HOST_WIDE_INT bitnum,
430 unsigned HOST_WIDE_INT *new_bitnum)
431{
432 scalar_int_mode imode;
433 if (mode.exists (&imode))
434 {
435 unsigned int unit = GET_MODE_BITSIZE (imode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, imode, offset);
439 }
440 else
441 {
442 *new_bitnum = bitnum % BITS_PER_UNIT;
443 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
444 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
445 / BITS_PER_UNIT);
446 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
447 }
448}
449
450/* The caller wants to perform insertion or extraction PATTERN on a
451 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
452 BITREGION_START and BITREGION_END are as for store_bit_field
453 and FIELDMODE is the natural mode of the field.
454
455 Search for a mode that is compatible with the memory access
456 restrictions and (where applicable) with a register insertion or
457 extraction. Return the new memory on success, storing the adjusted
458 bit position in *NEW_BITNUM. Return null otherwise. */
459
460static rtx
461adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
462 rtx op0, HOST_WIDE_INT bitsize,
463 HOST_WIDE_INT bitnum,
464 unsigned HOST_WIDE_INT bitregion_start,
465 unsigned HOST_WIDE_INT bitregion_end,
466 machine_mode fieldmode,
467 unsigned HOST_WIDE_INT *new_bitnum)
468{
469 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
470 bitregion_end, MEM_ALIGN (op0),
471 MEM_VOLATILE_P (op0));
472 scalar_int_mode best_mode;
473 if (iter.next_mode (&best_mode))
474 {
475 /* We can use a memory in BEST_MODE. See whether this is true for
476 any wider modes. All other things being equal, we prefer to
477 use the widest mode possible because it tends to expose more
478 CSE opportunities. */
479 if (!iter.prefer_smaller_modes ())
480 {
481 /* Limit the search to the mode required by the corresponding
482 register insertion or extraction instruction, if any. */
483 scalar_int_mode limit_mode = word_mode;
484 extraction_insn insn;
485 if (get_best_reg_extraction_insn (&insn, pattern,
486 GET_MODE_BITSIZE (best_mode),
487 fieldmode))
488 limit_mode = insn.field_mode;
489
490 scalar_int_mode wider_mode;
491 while (iter.next_mode (&wider_mode)
492 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
493 best_mode = wider_mode;
494 }
495 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
496 new_bitnum);
497 }
498 return NULL_RTX;
499}
500
501/* Return true if a bitfield of size BITSIZE at bit number BITNUM within
502 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
503 offset is then BITNUM / BITS_PER_UNIT. */
504
505static bool
506lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
507 unsigned HOST_WIDE_INT bitsize,
508 machine_mode struct_mode)
509{
510 unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode);
511 if (BYTES_BIG_ENDIAN)
512 return (bitnum % BITS_PER_UNIT == 0
513 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
514 || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0));
515 else
516 return bitnum % (regsize * BITS_PER_UNIT) == 0;
517}
518
519/* Return true if -fstrict-volatile-bitfields applies to an access of OP0
520 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
521 Return false if the access would touch memory outside the range
522 BITREGION_START to BITREGION_END for conformance to the C++ memory
523 model. */
524
525static bool
526strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
527 unsigned HOST_WIDE_INT bitnum,
528 scalar_int_mode fieldmode,
529 unsigned HOST_WIDE_INT bitregion_start,
530 unsigned HOST_WIDE_INT bitregion_end)
531{
532 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
533
534 /* -fstrict-volatile-bitfields must be enabled and we must have a
535 volatile MEM. */
536 if (!MEM_P (op0)
537 || !MEM_VOLATILE_P (op0)
538 || flag_strict_volatile_bitfields <= 0)
539 return false;
540
541 /* The bit size must not be larger than the field mode, and
542 the field mode must not be larger than a word. */
543 if (bitsize > modesize || modesize > BITS_PER_WORD)
544 return false;
545
546 /* Check for cases of unaligned fields that must be split. */
547 if (bitnum % modesize + bitsize > modesize)
548 return false;
549
550 /* The memory must be sufficiently aligned for a MODESIZE access.
551 This condition guarantees, that the memory access will not
552 touch anything after the end of the structure. */
553 if (MEM_ALIGN (op0) < modesize)
554 return false;
555
556 /* Check for cases where the C++ memory model applies. */
557 if (bitregion_end != 0
558 && (bitnum - bitnum % modesize < bitregion_start
559 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
560 return false;
561
562 return true;
563}
564
565/* Return true if OP is a memory and if a bitfield of size BITSIZE at
566 bit number BITNUM can be treated as a simple value of mode MODE. */
567
568static bool
569simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
570 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
571{
572 return (MEM_P (op0)
573 && bitnum % BITS_PER_UNIT == 0
574 && bitsize == GET_MODE_BITSIZE (mode)
575 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
576 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
577 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
578}
579
580/* Try to use instruction INSV to store VALUE into a field of OP0.
581 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
582 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
583 are as for store_bit_field. */
584
585static bool
586store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
587 opt_scalar_int_mode op0_mode,
588 unsigned HOST_WIDE_INT bitsize,
589 unsigned HOST_WIDE_INT bitnum,
590 rtx value, scalar_int_mode value_mode)
591{
592 struct expand_operand ops[4];
593 rtx value1;
594 rtx xop0 = op0;
595 rtx_insn *last = get_last_insn ();
596 bool copy_back = false;
597
598 scalar_int_mode op_mode = insv->field_mode;
599 unsigned int unit = GET_MODE_BITSIZE (op_mode);
600 if (bitsize == 0 || bitsize > unit)
601 return false;
602
603 if (MEM_P (xop0))
604 /* Get a reference to the first byte of the field. */
605 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
606 &bitnum);
607 else
608 {
609 /* Convert from counting within OP0 to counting in OP_MODE. */
610 if (BYTES_BIG_ENDIAN)
611 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
612
613 /* If xop0 is a register, we need it in OP_MODE
614 to make it acceptable to the format of insv. */
615 if (GET_CODE (xop0) == SUBREG)
616 /* We can't just change the mode, because this might clobber op0,
617 and we will need the original value of op0 if insv fails. */
618 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
619 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
620 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
621 }
622
623 /* If the destination is a paradoxical subreg such that we need a
624 truncate to the inner mode, perform the insertion on a temporary and
625 truncate the result to the original destination. Note that we can't
626 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
627 X) 0)) is (reg:N X). */
628 if (GET_CODE (xop0) == SUBREG
629 && REG_P (SUBREG_REG (xop0))
630 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
631 op_mode))
632 {
633 rtx tem = gen_reg_rtx (op_mode);
634 emit_move_insn (tem, xop0);
635 xop0 = tem;
636 copy_back = true;
637 }
638
639 /* There are similar overflow check at the start of store_bit_field_1,
640 but that only check the situation where the field lies completely
641 outside the register, while there do have situation where the field
642 lies partialy in the register, we need to adjust bitsize for this
643 partial overflow situation. Without this fix, pr48335-2.c on big-endian
644 will broken on those arch support bit insert instruction, like arm, aarch64
645 etc. */
646 if (bitsize + bitnum > unit && bitnum < unit)
647 {
648 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
649 "destination object, data truncated into %wu-bit",
650 bitsize, unit - bitnum);
651 bitsize = unit - bitnum;
652 }
653
654 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
655 "backwards" from the size of the unit we are inserting into.
656 Otherwise, we count bits from the most significant on a
657 BYTES/BITS_BIG_ENDIAN machine. */
658
659 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
660 bitnum = unit - bitsize - bitnum;
661
662 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
663 value1 = value;
664 if (value_mode != op_mode)
665 {
666 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
667 {
668 rtx tmp;
669 /* Optimization: Don't bother really extending VALUE
670 if it has all the bits we will actually use. However,
671 if we must narrow it, be sure we do it correctly. */
672
673 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
674 {
675 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
676 if (! tmp)
677 tmp = simplify_gen_subreg (op_mode,
678 force_reg (value_mode, value1),
679 value_mode, 0);
680 }
681 else
682 {
683 tmp = gen_lowpart_if_possible (op_mode, value1);
684 if (! tmp)
685 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
686 }
687 value1 = tmp;
688 }
689 else if (CONST_INT_P (value))
690 value1 = gen_int_mode (INTVAL (value), op_mode);
691 else
692 /* Parse phase is supposed to make VALUE's data type
693 match that of the component reference, which is a type
694 at least as wide as the field; so VALUE should have
695 a mode that corresponds to that type. */
696 gcc_assert (CONSTANT_P (value));
697 }
698
699 create_fixed_operand (&ops[0], xop0);
700 create_integer_operand (&ops[1], bitsize);
701 create_integer_operand (&ops[2], bitnum);
702 create_input_operand (&ops[3], value1, op_mode);
703 if (maybe_expand_insn (insv->icode, 4, ops))
704 {
705 if (copy_back)
706 convert_move (op0, xop0, true);
707 return true;
708 }
709 delete_insns_since (last);
710 return false;
711}
712
713/* A subroutine of store_bit_field, with the same arguments. Return true
714 if the operation could be implemented.
715
716 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
717 no other way of implementing the operation. If FALLBACK_P is false,
718 return false instead. */
719
720static bool
721store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
722 unsigned HOST_WIDE_INT bitnum,
723 unsigned HOST_WIDE_INT bitregion_start,
724 unsigned HOST_WIDE_INT bitregion_end,
725 machine_mode fieldmode,
726 rtx value, bool reverse, bool fallback_p)
727{
728 rtx op0 = str_rtx;
729 rtx orig_value;
730
731 while (GET_CODE (op0) == SUBREG)
732 {
733 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
734 op0 = SUBREG_REG (op0);
735 }
736
737 /* No action is needed if the target is a register and if the field
738 lies completely outside that register. This can occur if the source
739 code contains an out-of-bounds access to a small array. */
740 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
741 return true;
742
743 /* Use vec_set patterns for inserting parts of vectors whenever
744 available. */
745 machine_mode outermode = GET_MODE (op0);
746 scalar_mode innermode = GET_MODE_INNER (outermode);
747 if (VECTOR_MODE_P (outermode)
748 && !MEM_P (op0)
749 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
750 && fieldmode == innermode
751 && bitsize == GET_MODE_BITSIZE (innermode)
752 && !(bitnum % GET_MODE_BITSIZE (innermode)))
753 {
754 struct expand_operand ops[3];
755 enum insn_code icode = optab_handler (vec_set_optab, outermode);
756 int pos = bitnum / GET_MODE_BITSIZE (innermode);
757
758 create_fixed_operand (&ops[0], op0);
759 create_input_operand (&ops[1], value, innermode);
760 create_integer_operand (&ops[2], pos);
761 if (maybe_expand_insn (icode, 3, ops))
762 return true;
763 }
764
765 /* If the target is a register, overwriting the entire object, or storing
766 a full-word or multi-word field can be done with just a SUBREG. */
767 if (!MEM_P (op0)
768 && bitsize == GET_MODE_BITSIZE (fieldmode)
769 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
770 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
771 {
772 /* Use the subreg machinery either to narrow OP0 to the required
773 words or to cope with mode punning between equal-sized modes.
774 In the latter case, use subreg on the rhs side, not lhs. */
775 rtx sub;
776
777 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
778 {
779 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
780 if (sub)
781 {
782 if (reverse)
783 sub = flip_storage_order (GET_MODE (op0), sub);
784 emit_move_insn (op0, sub);
785 return true;
786 }
787 }
788 else
789 {
790 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
791 bitnum / BITS_PER_UNIT);
792 if (sub)
793 {
794 if (reverse)
795 value = flip_storage_order (fieldmode, value);
796 emit_move_insn (sub, value);
797 return true;
798 }
799 }
800 }
801
802 /* If the target is memory, storing any naturally aligned field can be
803 done with a simple store. For targets that support fast unaligned
804 memory, any naturally sized, unit aligned field can be done directly. */
805 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
806 {
807 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
808 if (reverse)
809 value = flip_storage_order (fieldmode, value);
810 emit_move_insn (op0, value);
811 return true;
812 }
813
814 /* Make sure we are playing with integral modes. Pun with subregs
815 if we aren't. This must come after the entire register case above,
816 since that case is valid for any mode. The following cases are only
817 valid for integral modes. */
818 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
819 scalar_int_mode imode;
820 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
821 {
822 if (MEM_P (op0))
823 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
824 0, MEM_SIZE (op0));
825 else
826 op0 = gen_lowpart (op0_mode.require (), op0);
827 }
828
829 /* Storing an lsb-aligned field in a register
830 can be done with a movstrict instruction. */
831
832 if (!MEM_P (op0)
833 && !reverse
834 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
835 && bitsize == GET_MODE_BITSIZE (fieldmode)
836 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
837 {
838 struct expand_operand ops[2];
839 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
840 rtx arg0 = op0;
841 unsigned HOST_WIDE_INT subreg_off;
842
843 if (GET_CODE (arg0) == SUBREG)
844 {
845 /* Else we've got some float mode source being extracted into
846 a different float mode destination -- this combination of
847 subregs results in Severe Tire Damage. */
848 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
849 || GET_MODE_CLASS (fieldmode) == MODE_INT
850 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
851 arg0 = SUBREG_REG (arg0);
852 }
853
854 subreg_off = bitnum / BITS_PER_UNIT;
855 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
856 {
857 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
858
859 create_fixed_operand (&ops[0], arg0);
860 /* Shrink the source operand to FIELDMODE. */
861 create_convert_operand_to (&ops[1], value, fieldmode, false);
862 if (maybe_expand_insn (icode, 2, ops))
863 return true;
864 }
865 }
866
867 /* Handle fields bigger than a word. */
868
869 if (bitsize > BITS_PER_WORD)
870 {
871 /* Here we transfer the words of the field
872 in the order least significant first.
873 This is because the most significant word is the one which may
874 be less than full.
875 However, only do that if the value is not BLKmode. */
876
877 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
878 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
879 unsigned int i;
880 rtx_insn *last;
881
882 /* This is the mode we must force value to, so that there will be enough
883 subwords to extract. Note that fieldmode will often (always?) be
884 VOIDmode, because that is what store_field uses to indicate that this
885 is a bit field, but passing VOIDmode to operand_subword_force
886 is not allowed. */
887 fieldmode = GET_MODE (value);
888 if (fieldmode == VOIDmode)
889 fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
890
891 last = get_last_insn ();
892 for (i = 0; i < nwords; i++)
893 {
894 /* If I is 0, use the low-order word in both field and target;
895 if I is 1, use the next to lowest word; and so on. */
896 unsigned int wordnum = (backwards
897 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
898 - i - 1
899 : i);
900 unsigned int bit_offset = (backwards ^ reverse
901 ? MAX ((int) bitsize - ((int) i + 1)
902 * BITS_PER_WORD,
903 0)
904 : (int) i * BITS_PER_WORD);
905 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
906 unsigned HOST_WIDE_INT new_bitsize =
907 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
908
909 /* If the remaining chunk doesn't have full wordsize we have
910 to make sure that for big-endian machines the higher order
911 bits are used. */
912 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
913 value_word = simplify_expand_binop (word_mode, lshr_optab,
914 value_word,
915 GEN_INT (BITS_PER_WORD
916 - new_bitsize),
917 NULL_RTX, true,
918 OPTAB_LIB_WIDEN);
919
920 if (!store_bit_field_1 (op0, new_bitsize,
921 bitnum + bit_offset,
922 bitregion_start, bitregion_end,
923 word_mode,
924 value_word, reverse, fallback_p))
925 {
926 delete_insns_since (last);
927 return false;
928 }
929 }
930 return true;
931 }
932
933 /* If VALUE has a floating-point or complex mode, access it as an
934 integer of the corresponding size. This can occur on a machine
935 with 64 bit registers that uses SFmode for float. It can also
936 occur for unaligned float or complex fields. */
937 orig_value = value;
938 scalar_int_mode value_mode;
939 if (GET_MODE (value) == VOIDmode)
940 /* By this point we've dealt with values that are bigger than a word,
941 so word_mode is a conservatively correct choice. */
942 value_mode = word_mode;
943 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
944 {
945 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
946 value = gen_reg_rtx (value_mode);
947 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
948 }
949
950 /* If OP0 is a multi-word register, narrow it to the affected word.
951 If the region spans two words, defer to store_split_bit_field.
952 Don't do this if op0 is a single hard register wider than word
953 such as a float or vector register. */
954 if (!MEM_P (op0)
955 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
956 && (!REG_P (op0)
957 || !HARD_REGISTER_P (op0)
958 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
959 {
960 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
961 {
962 if (!fallback_p)
963 return false;
964
965 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
966 bitregion_start, bitregion_end,
967 value, value_mode, reverse);
968 return true;
969 }
970 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
972 gcc_assert (op0);
973 op0_mode = word_mode;
974 bitnum %= BITS_PER_WORD;
975 }
976
977 /* From here on we can assume that the field to be stored in fits
978 within a word. If the destination is a register, it too fits
979 in a word. */
980
981 extraction_insn insv;
982 if (!MEM_P (op0)
983 && !reverse
984 && get_best_reg_extraction_insn (&insv, EP_insv,
985 GET_MODE_BITSIZE (op0_mode.require ()),
986 fieldmode)
987 && store_bit_field_using_insv (&insv, op0, op0_mode,
988 bitsize, bitnum, value, value_mode))
989 return true;
990
991 /* If OP0 is a memory, try copying it to a register and seeing if a
992 cheap register alternative is available. */
993 if (MEM_P (op0) && !reverse)
994 {
995 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
996 fieldmode)
997 && store_bit_field_using_insv (&insv, op0, op0_mode,
998 bitsize, bitnum, value, value_mode))
999 return true;
1000
1001 rtx_insn *last = get_last_insn ();
1002
1003 /* Try loading part of OP0 into a register, inserting the bitfield
1004 into that, and then copying the result back to OP0. */
1005 unsigned HOST_WIDE_INT bitpos;
1006 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1007 bitregion_start, bitregion_end,
1008 fieldmode, &bitpos);
1009 if (xop0)
1010 {
1011 rtx tempreg = copy_to_reg (xop0);
1012 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1013 bitregion_start, bitregion_end,
1014 fieldmode, orig_value, reverse, false))
1015 {
1016 emit_move_insn (xop0, tempreg);
1017 return true;
1018 }
1019 delete_insns_since (last);
1020 }
1021 }
1022
1023 if (!fallback_p)
1024 return false;
1025
1026 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1027 bitregion_end, value, value_mode, reverse);
1028 return true;
1029}
1030
1031/* Generate code to store value from rtx VALUE
1032 into a bit-field within structure STR_RTX
1033 containing BITSIZE bits starting at bit BITNUM.
1034
1035 BITREGION_START is bitpos of the first bitfield in this region.
1036 BITREGION_END is the bitpos of the ending bitfield in this region.
1037 These two fields are 0, if the C++ memory model does not apply,
1038 or we are not interested in keeping track of bitfield regions.
1039
1040 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1041
1042 If REVERSE is true, the store is to be done in reverse order. */
1043
1044void
1045store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1046 unsigned HOST_WIDE_INT bitnum,
1047 unsigned HOST_WIDE_INT bitregion_start,
1048 unsigned HOST_WIDE_INT bitregion_end,
1049 machine_mode fieldmode,
1050 rtx value, bool reverse)
1051{
1052 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1053 scalar_int_mode int_mode;
1054 if (is_a <scalar_int_mode> (fieldmode, &int_mode)
1055 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode,
1056 bitregion_start, bitregion_end))
1057 {
1058 /* Storing of a full word can be done with a simple store.
1059 We know here that the field can be accessed with one single
1060 instruction. For targets that support unaligned memory,
1061 an unaligned access may be necessary. */
1062 if (bitsize == GET_MODE_BITSIZE (int_mode))
1063 {
1064 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1065 bitnum / BITS_PER_UNIT);
1066 if (reverse)
1067 value = flip_storage_order (int_mode, value);
1068 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1069 emit_move_insn (str_rtx, value);
1070 }
1071 else
1072 {
1073 rtx temp;
1074
1075 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1076 &bitnum);
1077 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1078 temp = copy_to_reg (str_rtx);
1079 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1080 int_mode, value, reverse, true))
1081 gcc_unreachable ();
1082
1083 emit_move_insn (str_rtx, temp);
1084 }
1085
1086 return;
1087 }
1088
1089 /* Under the C++0x memory model, we must not touch bits outside the
1090 bit region. Adjust the address to start at the beginning of the
1091 bit region. */
1092 if (MEM_P (str_rtx) && bitregion_start > 0)
1093 {
1094 scalar_int_mode best_mode;
1095 machine_mode addr_mode = VOIDmode;
1096 HOST_WIDE_INT offset, size;
1097
1098 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1099
1100 offset = bitregion_start / BITS_PER_UNIT;
1101 bitnum -= bitregion_start;
1102 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1103 bitregion_end -= bitregion_start;
1104 bitregion_start = 0;
1105 if (get_best_mode (bitsize, bitnum,
1106 bitregion_start, bitregion_end,
1107 MEM_ALIGN (str_rtx), INT_MAX,
1108 MEM_VOLATILE_P (str_rtx), &best_mode))
1109 addr_mode = best_mode;
1110 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1111 offset, size);
1112 }
1113
1114 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1115 bitregion_start, bitregion_end,
1116 fieldmode, value, reverse, true))
1117 gcc_unreachable ();
1118}
1119
1120/* Use shifts and boolean operations to store VALUE into a bit field of
1121 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1122 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1123 the mode of VALUE.
1124
1125 If REVERSE is true, the store is to be done in reverse order. */
1126
1127static void
1128store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1129 unsigned HOST_WIDE_INT bitsize,
1130 unsigned HOST_WIDE_INT bitnum,
1131 unsigned HOST_WIDE_INT bitregion_start,
1132 unsigned HOST_WIDE_INT bitregion_end,
1133 rtx value, scalar_int_mode value_mode, bool reverse)
1134{
1135 /* There is a case not handled here:
1136 a structure with a known alignment of just a halfword
1137 and a field split across two aligned halfwords within the structure.
1138 Or likewise a structure with a known alignment of just a byte
1139 and a field split across two bytes.
1140 Such cases are not supposed to be able to occur. */
1141
1142 scalar_int_mode best_mode;
1143 if (MEM_P (op0))
1144 {
1145 unsigned int max_bitsize = BITS_PER_WORD;
1146 scalar_int_mode imode;
1147 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1148 max_bitsize = GET_MODE_BITSIZE (imode);
1149
1150 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1151 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1152 &best_mode))
1153 {
1154 /* The only way this should occur is if the field spans word
1155 boundaries. */
1156 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1157 bitregion_start, bitregion_end,
1158 value, value_mode, reverse);
1159 return;
1160 }
1161
1162 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1163 }
1164 else
1165 best_mode = op0_mode.require ();
1166
1167 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1168 value, value_mode, reverse);
1169}
1170
1171/* Helper function for store_fixed_bit_field, stores
1172 the bit field always using MODE, which is the mode of OP0. The other
1173 arguments are as for store_fixed_bit_field. */
1174
1175static void
1176store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1177 unsigned HOST_WIDE_INT bitsize,
1178 unsigned HOST_WIDE_INT bitnum,
1179 rtx value, scalar_int_mode value_mode, bool reverse)
1180{
1181 rtx temp;
1182 int all_zero = 0;
1183 int all_one = 0;
1184
1185 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1186 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1187
1188 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1189 /* BITNUM is the distance between our msb
1190 and that of the containing datum.
1191 Convert it to the distance from the lsb. */
1192 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1193
1194 /* Now BITNUM is always the distance between our lsb
1195 and that of OP0. */
1196
1197 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1198 we must first convert its mode to MODE. */
1199
1200 if (CONST_INT_P (value))
1201 {
1202 unsigned HOST_WIDE_INT v = UINTVAL (value);
1203
1204 if (bitsize < HOST_BITS_PER_WIDE_INT)
1205 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1206
1207 if (v == 0)
1208 all_zero = 1;
1209 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1210 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1211 || (bitsize == HOST_BITS_PER_WIDE_INT
1212 && v == HOST_WIDE_INT_M1U))
1213 all_one = 1;
1214
1215 value = lshift_value (mode, v, bitnum);
1216 }
1217 else
1218 {
1219 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1220 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1221
1222 if (value_mode != mode)
1223 value = convert_to_mode (mode, value, 1);
1224
1225 if (must_and)
1226 value = expand_binop (mode, and_optab, value,
1227 mask_rtx (mode, 0, bitsize, 0),
1228 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1229 if (bitnum > 0)
1230 value = expand_shift (LSHIFT_EXPR, mode, value,
1231 bitnum, NULL_RTX, 1);
1232 }
1233
1234 if (reverse)
1235 value = flip_storage_order (mode, value);
1236
1237 /* Now clear the chosen bits in OP0,
1238 except that if VALUE is -1 we need not bother. */
1239 /* We keep the intermediates in registers to allow CSE to combine
1240 consecutive bitfield assignments. */
1241
1242 temp = force_reg (mode, op0);
1243
1244 if (! all_one)
1245 {
1246 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1247 if (reverse)
1248 mask = flip_storage_order (mode, mask);
1249 temp = expand_binop (mode, and_optab, temp, mask,
1250 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1251 temp = force_reg (mode, temp);
1252 }
1253
1254 /* Now logical-or VALUE into OP0, unless it is zero. */
1255
1256 if (! all_zero)
1257 {
1258 temp = expand_binop (mode, ior_optab, temp, value,
1259 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1260 temp = force_reg (mode, temp);
1261 }
1262
1263 if (op0 != temp)
1264 {
1265 op0 = copy_rtx (op0);
1266 emit_move_insn (op0, temp);
1267 }
1268}
1269
1270/* Store a bit field that is split across multiple accessible memory objects.
1271
1272 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1273 BITSIZE is the field width; BITPOS the position of its first bit
1274 (within the word).
1275 VALUE is the value to store, which has mode VALUE_MODE.
1276 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1277 a BLKmode MEM.
1278
1279 If REVERSE is true, the store is to be done in reverse order.
1280
1281 This does not yet handle fields wider than BITS_PER_WORD. */
1282
1283static void
1284store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1285 unsigned HOST_WIDE_INT bitsize,
1286 unsigned HOST_WIDE_INT bitpos,
1287 unsigned HOST_WIDE_INT bitregion_start,
1288 unsigned HOST_WIDE_INT bitregion_end,
1289 rtx value, scalar_int_mode value_mode, bool reverse)
1290{
1291 unsigned int unit, total_bits, bitsdone = 0;
1292
1293 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1294 much at a time. */
1295 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1296 unit = BITS_PER_WORD;
1297 else
1298 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1299
1300 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1301 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1302 again, and we will mutually recurse forever. */
1303 if (MEM_P (op0) && op0_mode.exists ())
1304 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1305
1306 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1307 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1308 that VALUE might be a floating-point constant. */
1309 if (CONSTANT_P (value) && !CONST_INT_P (value))
1310 {
1311 rtx word = gen_lowpart_common (word_mode, value);
1312
1313 if (word && (value != word))
1314 value = word;
1315 else
1316 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1317 value_mode = word_mode;
1318 }
1319
1320 total_bits = GET_MODE_BITSIZE (value_mode);
1321
1322 while (bitsdone < bitsize)
1323 {
1324 unsigned HOST_WIDE_INT thissize;
1325 unsigned HOST_WIDE_INT thispos;
1326 unsigned HOST_WIDE_INT offset;
1327 rtx part;
1328
1329 offset = (bitpos + bitsdone) / unit;
1330 thispos = (bitpos + bitsdone) % unit;
1331
1332 /* When region of bytes we can touch is restricted, decrease
1333 UNIT close to the end of the region as needed. If op0 is a REG
1334 or SUBREG of REG, don't do this, as there can't be data races
1335 on a register and we can expand shorter code in some cases. */
1336 if (bitregion_end
1337 && unit > BITS_PER_UNIT
1338 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1339 && !REG_P (op0)
1340 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1341 {
1342 unit = unit / 2;
1343 continue;
1344 }
1345
1346 /* THISSIZE must not overrun a word boundary. Otherwise,
1347 store_fixed_bit_field will call us again, and we will mutually
1348 recurse forever. */
1349 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1350 thissize = MIN (thissize, unit - thispos);
1351
1352 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1353 {
1354 /* Fetch successively less significant portions. */
1355 if (CONST_INT_P (value))
1356 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1357 >> (bitsize - bitsdone - thissize))
1358 & ((HOST_WIDE_INT_1 << thissize) - 1));
1359 /* Likewise, but the source is little-endian. */
1360 else if (reverse)
1361 part = extract_fixed_bit_field (word_mode, value, value_mode,
1362 thissize,
1363 bitsize - bitsdone - thissize,
1364 NULL_RTX, 1, false);
1365 else
1366 /* The args are chosen so that the last part includes the
1367 lsb. Give extract_bit_field the value it needs (with
1368 endianness compensation) to fetch the piece we want. */
1369 part = extract_fixed_bit_field (word_mode, value, value_mode,
1370 thissize,
1371 total_bits - bitsize + bitsdone,
1372 NULL_RTX, 1, false);
1373 }
1374 else
1375 {
1376 /* Fetch successively more significant portions. */
1377 if (CONST_INT_P (value))
1378 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1379 >> bitsdone)
1380 & ((HOST_WIDE_INT_1 << thissize) - 1));
1381 /* Likewise, but the source is big-endian. */
1382 else if (reverse)
1383 part = extract_fixed_bit_field (word_mode, value, value_mode,
1384 thissize,
1385 total_bits - bitsdone - thissize,
1386 NULL_RTX, 1, false);
1387 else
1388 part = extract_fixed_bit_field (word_mode, value, value_mode,
1389 thissize, bitsdone, NULL_RTX,
1390 1, false);
1391 }
1392
1393 /* If OP0 is a register, then handle OFFSET here. */
1394 rtx op0_piece = op0;
1395 opt_scalar_int_mode op0_piece_mode = op0_mode;
1396 if (SUBREG_P (op0) || REG_P (op0))
1397 {
1398 scalar_int_mode imode;
1399 if (op0_mode.exists (&imode)
1400 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1401 {
1402 if (offset)
1403 op0_piece = const0_rtx;
1404 }
1405 else
1406 {
1407 op0_piece = operand_subword_force (op0,
1408 offset * unit / BITS_PER_WORD,
1409 GET_MODE (op0));
1410 op0_piece_mode = word_mode;
1411 }
1412 offset &= BITS_PER_WORD / unit - 1;
1413 }
1414
1415 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1416 it is just an out-of-bounds access. Ignore it. */
1417 if (op0_piece != const0_rtx)
1418 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1419 offset * unit + thispos, bitregion_start,
1420 bitregion_end, part, word_mode, reverse);
1421 bitsdone += thissize;
1422 }
1423}
1424
1425/* A subroutine of extract_bit_field_1 that converts return value X
1426 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1427 to extract_bit_field. */
1428
1429static rtx
1430convert_extracted_bit_field (rtx x, machine_mode mode,
1431 machine_mode tmode, bool unsignedp)
1432{
1433 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1434 return x;
1435
1436 /* If the x mode is not a scalar integral, first convert to the
1437 integer mode of that size and then access it as a floating-point
1438 value via a SUBREG. */
1439 if (!SCALAR_INT_MODE_P (tmode))
1440 {
1441 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1442 x = convert_to_mode (int_mode, x, unsignedp);
1443 x = force_reg (int_mode, x);
1444 return gen_lowpart (tmode, x);
1445 }
1446
1447 return convert_to_mode (tmode, x, unsignedp);
1448}
1449
1450/* Try to use an ext(z)v pattern to extract a field from OP0.
1451 Return the extracted value on success, otherwise return null.
1452 EXTV describes the extraction instruction to use. If OP0_MODE
1453 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1454 The other arguments are as for extract_bit_field. */
1455
1456static rtx
1457extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1458 opt_scalar_int_mode op0_mode,
1459 unsigned HOST_WIDE_INT bitsize,
1460 unsigned HOST_WIDE_INT bitnum,
1461 int unsignedp, rtx target,
1462 machine_mode mode, machine_mode tmode)
1463{
1464 struct expand_operand ops[4];
1465 rtx spec_target = target;
1466 rtx spec_target_subreg = 0;
1467 scalar_int_mode ext_mode = extv->field_mode;
1468 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1469
1470 if (bitsize == 0 || unit < bitsize)
1471 return NULL_RTX;
1472
1473 if (MEM_P (op0))
1474 /* Get a reference to the first byte of the field. */
1475 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1476 &bitnum);
1477 else
1478 {
1479 /* Convert from counting within OP0 to counting in EXT_MODE. */
1480 if (BYTES_BIG_ENDIAN)
1481 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1482
1483 /* If op0 is a register, we need it in EXT_MODE to make it
1484 acceptable to the format of ext(z)v. */
1485 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1486 return NULL_RTX;
1487 if (REG_P (op0) && op0_mode.require () != ext_mode)
1488 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1489 }
1490
1491 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1492 "backwards" from the size of the unit we are extracting from.
1493 Otherwise, we count bits from the most significant on a
1494 BYTES/BITS_BIG_ENDIAN machine. */
1495
1496 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1497 bitnum = unit - bitsize - bitnum;
1498
1499 if (target == 0)
1500 target = spec_target = gen_reg_rtx (tmode);
1501
1502 if (GET_MODE (target) != ext_mode)
1503 {
1504 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1505 between the mode of the extraction (word_mode) and the target
1506 mode. Instead, create a temporary and use convert_move to set
1507 the target. */
1508 if (REG_P (target)
1509 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1510 {
1511 target = gen_lowpart (ext_mode, target);
1512 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1513 spec_target_subreg = target;
1514 }
1515 else
1516 target = gen_reg_rtx (ext_mode);
1517 }
1518
1519 create_output_operand (&ops[0], target, ext_mode);
1520 create_fixed_operand (&ops[1], op0);
1521 create_integer_operand (&ops[2], bitsize);
1522 create_integer_operand (&ops[3], bitnum);
1523 if (maybe_expand_insn (extv->icode, 4, ops))
1524 {
1525 target = ops[0].value;
1526 if (target == spec_target)
1527 return target;
1528 if (target == spec_target_subreg)
1529 return spec_target;
1530 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1531 }
1532 return NULL_RTX;
1533}
1534
1535/* A subroutine of extract_bit_field, with the same arguments.
1536 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1537 if we can find no other means of implementing the operation.
1538 if FALLBACK_P is false, return NULL instead. */
1539
1540static rtx
1541extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1542 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1543 machine_mode mode, machine_mode tmode,
1544 bool reverse, bool fallback_p, rtx *alt_rtl)
1545{
1546 rtx op0 = str_rtx;
1547 machine_mode mode1;
1548
1549 if (tmode == VOIDmode)
1550 tmode = mode;
1551
1552 while (GET_CODE (op0) == SUBREG)
1553 {
1554 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1555 op0 = SUBREG_REG (op0);
1556 }
1557
1558 /* If we have an out-of-bounds access to a register, just return an
1559 uninitialized register of the required mode. This can occur if the
1560 source code contains an out-of-bounds access to a small array. */
1561 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1562 return gen_reg_rtx (tmode);
1563
1564 if (REG_P (op0)
1565 && mode == GET_MODE (op0)
1566 && bitnum == 0
1567 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1568 {
1569 if (reverse)
1570 op0 = flip_storage_order (mode, op0);
1571 /* We're trying to extract a full register from itself. */
1572 return op0;
1573 }
1574
1575 /* First try to check for vector from vector extractions. */
1576 if (VECTOR_MODE_P (GET_MODE (op0))
1577 && !MEM_P (op0)
1578 && VECTOR_MODE_P (tmode)
1579 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
1580 {
1581 machine_mode new_mode = GET_MODE (op0);
1582 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1583 {
1584 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1585 unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0))
1586 / GET_MODE_UNIT_BITSIZE (tmode));
1587 if (!mode_for_vector (inner_mode, nunits).exists (&new_mode)
1588 || !VECTOR_MODE_P (new_mode)
1589 || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0))
1590 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1591 || !targetm.vector_mode_supported_p (new_mode))
1592 new_mode = VOIDmode;
1593 }
1594 if (new_mode != VOIDmode
1595 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1596 != CODE_FOR_nothing)
1597 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
1598 == bitnum / GET_MODE_BITSIZE (tmode)))
1599 {
1600 struct expand_operand ops[3];
1601 machine_mode outermode = new_mode;
1602 machine_mode innermode = tmode;
1603 enum insn_code icode
1604 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1605 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1606
1607 if (new_mode != GET_MODE (op0))
1608 op0 = gen_lowpart (new_mode, op0);
1609 create_output_operand (&ops[0], target, innermode);
1610 ops[0].target = 1;
1611 create_input_operand (&ops[1], op0, outermode);
1612 create_integer_operand (&ops[2], pos);
1613 if (maybe_expand_insn (icode, 3, ops))
1614 {
1615 if (alt_rtl && ops[0].target)
1616 *alt_rtl = target;
1617 target = ops[0].value;
1618 if (GET_MODE (target) != mode)
1619 return gen_lowpart (tmode, target);
1620 return target;
1621 }
1622 }
1623 }
1624
1625 /* See if we can get a better vector mode before extracting. */
1626 if (VECTOR_MODE_P (GET_MODE (op0))
1627 && !MEM_P (op0)
1628 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1629 {
1630 machine_mode new_mode;
1631
1632 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1633 new_mode = MIN_MODE_VECTOR_FLOAT;
1634 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1635 new_mode = MIN_MODE_VECTOR_FRACT;
1636 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1637 new_mode = MIN_MODE_VECTOR_UFRACT;
1638 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1639 new_mode = MIN_MODE_VECTOR_ACCUM;
1640 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1641 new_mode = MIN_MODE_VECTOR_UACCUM;
1642 else
1643 new_mode = MIN_MODE_VECTOR_INT;
1644
1645 FOR_EACH_MODE_FROM (new_mode, new_mode)
1646 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1647 && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode)
1648 && targetm.vector_mode_supported_p (new_mode))
1649 break;
1650 if (new_mode != VOIDmode)
1651 op0 = gen_lowpart (new_mode, op0);
1652 }
1653
1654 /* Use vec_extract patterns for extracting parts of vectors whenever
1655 available. */
1656 machine_mode outermode = GET_MODE (op0);
1657 scalar_mode innermode = GET_MODE_INNER (outermode);
1658 if (VECTOR_MODE_P (outermode)
1659 && !MEM_P (op0)
1660 && (convert_optab_handler (vec_extract_optab, outermode, innermode)
1661 != CODE_FOR_nothing)
1662 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
1663 == bitnum / GET_MODE_BITSIZE (innermode)))
1664 {
1665 struct expand_operand ops[3];
1666 enum insn_code icode
1667 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1668 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1669
1670 create_output_operand (&ops[0], target, innermode);
1671 ops[0].target = 1;
1672 create_input_operand (&ops[1], op0, outermode);
1673 create_integer_operand (&ops[2], pos);
1674 if (maybe_expand_insn (icode, 3, ops))
1675 {
1676 if (alt_rtl && ops[0].target)
1677 *alt_rtl = target;
1678 target = ops[0].value;
1679 if (GET_MODE (target) != mode)
1680 return gen_lowpart (tmode, target);
1681 return target;
1682 }
1683 }
1684
1685 /* Make sure we are playing with integral modes. Pun with subregs
1686 if we aren't. */
1687 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1688 scalar_int_mode imode;
1689 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1690 {
1691 if (MEM_P (op0))
1692 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1693 0, MEM_SIZE (op0));
1694 else if (op0_mode.exists (&imode))
1695 {
1696 op0 = gen_lowpart (imode, op0);
1697
1698 /* If we got a SUBREG, force it into a register since we
1699 aren't going to be able to do another SUBREG on it. */
1700 if (GET_CODE (op0) == SUBREG)
1701 op0 = force_reg (imode, op0);
1702 }
1703 else
1704 {
1705 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1706 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1707 emit_move_insn (mem, op0);
1708 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1709 }
1710 }
1711
1712 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1713 If that's wrong, the solution is to test for it and set TARGET to 0
1714 if needed. */
1715
1716 /* Get the mode of the field to use for atomic access or subreg
1717 conversion. */
1718 if (!SCALAR_INT_MODE_P (tmode)
1719 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1720 mode1 = mode;
1721 gcc_assert (mode1 != BLKmode);
1722
1723 /* Extraction of a full MODE1 value can be done with a subreg as long
1724 as the least significant bit of the value is the least significant
1725 bit of either OP0 or a word of OP0. */
1726 if (!MEM_P (op0)
1727 && !reverse
1728 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
1729 && bitsize == GET_MODE_BITSIZE (mode1)
1730 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
1731 {
1732 rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
1733 bitnum / BITS_PER_UNIT);
1734 if (sub)
1735 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1736 }
1737
1738 /* Extraction of a full MODE1 value can be done with a load as long as
1739 the field is on a byte boundary and is sufficiently aligned. */
1740 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1741 {
1742 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1743 if (reverse)
1744 op0 = flip_storage_order (mode1, op0);
1745 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1746 }
1747
1748 /* Handle fields bigger than a word. */
1749
1750 if (bitsize > BITS_PER_WORD)
1751 {
1752 /* Here we transfer the words of the field
1753 in the order least significant first.
1754 This is because the most significant word is the one which may
1755 be less than full. */
1756
1757 const bool backwards = WORDS_BIG_ENDIAN;
1758 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1759 unsigned int i;
1760 rtx_insn *last;
1761
1762 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1763 target = gen_reg_rtx (mode);
1764
1765 /* In case we're about to clobber a base register or something
1766 (see gcc.c-torture/execute/20040625-1.c). */
1767 if (reg_mentioned_p (target, str_rtx))
1768 target = gen_reg_rtx (mode);
1769
1770 /* Indicate for flow that the entire target reg is being set. */
1771 emit_clobber (target);
1772
1773 last = get_last_insn ();
1774 for (i = 0; i < nwords; i++)
1775 {
1776 /* If I is 0, use the low-order word in both field and target;
1777 if I is 1, use the next to lowest word; and so on. */
1778 /* Word number in TARGET to use. */
1779 unsigned int wordnum
1780 = (backwards
1781 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1782 : i);
1783 /* Offset from start of field in OP0. */
1784 unsigned int bit_offset = (backwards ^ reverse
1785 ? MAX ((int) bitsize - ((int) i + 1)
1786 * BITS_PER_WORD,
1787 0)
1788 : (int) i * BITS_PER_WORD);
1789 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1790 rtx result_part
1791 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1792 bitsize - i * BITS_PER_WORD),
1793 bitnum + bit_offset, 1, target_part,
1794 mode, word_mode, reverse, fallback_p, NULL);
1795
1796 gcc_assert (target_part);
1797 if (!result_part)
1798 {
1799 delete_insns_since (last);
1800 return NULL;
1801 }
1802
1803 if (result_part != target_part)
1804 emit_move_insn (target_part, result_part);
1805 }
1806
1807 if (unsignedp)
1808 {
1809 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1810 need to be zero'd out. */
1811 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1812 {
1813 unsigned int i, total_words;
1814
1815 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1816 for (i = nwords; i < total_words; i++)
1817 emit_move_insn
1818 (operand_subword (target,
1819 backwards ? total_words - i - 1 : i,
1820 1, VOIDmode),
1821 const0_rtx);
1822 }
1823 return target;
1824 }
1825
1826 /* Signed bit field: sign-extend with two arithmetic shifts. */
1827 target = expand_shift (LSHIFT_EXPR, mode, target,
1828 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1829 return expand_shift (RSHIFT_EXPR, mode, target,
1830 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1831 }
1832
1833 /* If OP0 is a multi-word register, narrow it to the affected word.
1834 If the region spans two words, defer to extract_split_bit_field. */
1835 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1836 {
1837 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1838 {
1839 if (!fallback_p)
1840 return NULL_RTX;
1841 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1842 unsignedp, reverse);
1843 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1844 }
1845 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1846 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1847 op0_mode = word_mode;
1848 bitnum %= BITS_PER_WORD;
1849 }
1850
1851 /* From here on we know the desired field is smaller than a word.
1852 If OP0 is a register, it too fits within a word. */
1853 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1854 extraction_insn extv;
1855 if (!MEM_P (op0)
1856 && !reverse
1857 /* ??? We could limit the structure size to the part of OP0 that
1858 contains the field, with appropriate checks for endianness
1859 and TARGET_TRULY_NOOP_TRUNCATION. */
1860 && get_best_reg_extraction_insn (&extv, pattern,
1861 GET_MODE_BITSIZE (op0_mode.require ()),
1862 tmode))
1863 {
1864 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1865 bitsize, bitnum,
1866 unsignedp, target, mode,
1867 tmode);
1868 if (result)
1869 return result;
1870 }
1871
1872 /* If OP0 is a memory, try copying it to a register and seeing if a
1873 cheap register alternative is available. */
1874 if (MEM_P (op0) & !reverse)
1875 {
1876 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1877 tmode))
1878 {
1879 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1880 bitsize, bitnum,
1881 unsignedp, target, mode,
1882 tmode);
1883 if (result)
1884 return result;
1885 }
1886
1887 rtx_insn *last = get_last_insn ();
1888
1889 /* Try loading part of OP0 into a register and extracting the
1890 bitfield from that. */
1891 unsigned HOST_WIDE_INT bitpos;
1892 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1893 0, 0, tmode, &bitpos);
1894 if (xop0)
1895 {
1896 xop0 = copy_to_reg (xop0);
1897 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1898 unsignedp, target,
1899 mode, tmode, reverse, false, NULL);
1900 if (result)
1901 return result;
1902 delete_insns_since (last);
1903 }
1904 }
1905
1906 if (!fallback_p)
1907 return NULL;
1908
1909 /* Find a correspondingly-sized integer field, so we can apply
1910 shifts and masks to it. */
1911 scalar_int_mode int_mode;
1912 if (!int_mode_for_mode (tmode).exists (&int_mode))
1913 /* If this fails, we should probably push op0 out to memory and then
1914 do a load. */
1915 int_mode = int_mode_for_mode (mode).require ();
1916
1917 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
1918 bitnum, target, unsignedp, reverse);
1919
1920 /* Complex values must be reversed piecewise, so we need to undo the global
1921 reversal, convert to the complex mode and reverse again. */
1922 if (reverse && COMPLEX_MODE_P (tmode))
1923 {
1924 target = flip_storage_order (int_mode, target);
1925 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1926 target = flip_storage_order (tmode, target);
1927 }
1928 else
1929 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1930
1931 return target;
1932}
1933
1934/* Generate code to extract a byte-field from STR_RTX
1935 containing BITSIZE bits, starting at BITNUM,
1936 and put it in TARGET if possible (if TARGET is nonzero).
1937 Regardless of TARGET, we return the rtx for where the value is placed.
1938
1939 STR_RTX is the structure containing the byte (a REG or MEM).
1940 UNSIGNEDP is nonzero if this is an unsigned bit field.
1941 MODE is the natural mode of the field value once extracted.
1942 TMODE is the mode the caller would like the value to have;
1943 but the value may be returned with type MODE instead.
1944
1945 If REVERSE is true, the extraction is to be done in reverse order.
1946
1947 If a TARGET is specified and we can store in it at no extra cost,
1948 we do so, and return TARGET.
1949 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1950 if they are equally easy. */
1951
1952rtx
1953extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1954 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1955 machine_mode mode, machine_mode tmode, bool reverse,
1956 rtx *alt_rtl)
1957{
1958 machine_mode mode1;
1959
1960 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1961 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1962 mode1 = GET_MODE (str_rtx);
1963 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1964 mode1 = GET_MODE (target);
1965 else
1966 mode1 = tmode;
1967
1968 scalar_int_mode int_mode;
1969 if (is_a <scalar_int_mode> (mode1, &int_mode)
1970 && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
1971 {
1972 /* Extraction of a full INT_MODE value can be done with a simple load.
1973 We know here that the field can be accessed with one single
1974 instruction. For targets that support unaligned memory,
1975 an unaligned access may be necessary. */
1976 if (bitsize == GET_MODE_BITSIZE (int_mode))
1977 {
1978 rtx result = adjust_bitfield_address (str_rtx, int_mode,
1979 bitnum / BITS_PER_UNIT);
1980 if (reverse)
1981 result = flip_storage_order (int_mode, result);
1982 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1983 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1984 }
1985
1986 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
1987 &bitnum);
1988 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
1989 str_rtx = copy_to_reg (str_rtx);
1990 }
1991
1992 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1993 target, mode, tmode, reverse, true, alt_rtl);
1994}
1995
1996/* Use shifts and boolean operations to extract a field of BITSIZE bits
1997 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
1998 otherwise OP0 is a BLKmode MEM.
1999
2000 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2001 If REVERSE is true, the extraction is to be done in reverse order.
2002
2003 If TARGET is nonzero, attempts to store the value there
2004 and return TARGET, but this is not guaranteed.
2005 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2006
2007static rtx
2008extract_fixed_bit_field (machine_mode tmode, rtx op0,
2009 opt_scalar_int_mode op0_mode,
2010 unsigned HOST_WIDE_INT bitsize,
2011 unsigned HOST_WIDE_INT bitnum, rtx target,
2012 int unsignedp, bool reverse)
2013{
2014 scalar_int_mode mode;
2015 if (MEM_P (op0))
2016 {
2017 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2018 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2019 /* The only way this should occur is if the field spans word
2020 boundaries. */
2021 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2022 unsignedp, reverse);
2023
2024 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2025 }
2026 else
2027 mode = op0_mode.require ();
2028
2029 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2030 target, unsignedp, reverse);
2031}
2032
2033/* Helper function for extract_fixed_bit_field, extracts
2034 the bit field always using MODE, which is the mode of OP0.
2035 The other arguments are as for extract_fixed_bit_field. */
2036
2037static rtx
2038extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2039 unsigned HOST_WIDE_INT bitsize,
2040 unsigned HOST_WIDE_INT bitnum, rtx target,
2041 int unsignedp, bool reverse)
2042{
2043 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2044 for invalid input, such as extract equivalent of f5 from
2045 gcc.dg/pr48335-2.c. */
2046
2047 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2048 /* BITNUM is the distance between our msb and that of OP0.
2049 Convert it to the distance from the lsb. */
2050 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2051
2052 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2053 We have reduced the big-endian case to the little-endian case. */
2054 if (reverse)
2055 op0 = flip_storage_order (mode, op0);
2056
2057 if (unsignedp)
2058 {
2059 if (bitnum)
2060 {
2061 /* If the field does not already start at the lsb,
2062 shift it so it does. */
2063 /* Maybe propagate the target for the shift. */
2064 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2065 if (tmode != mode)
2066 subtarget = 0;
2067 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2068 }
2069 /* Convert the value to the desired mode. TMODE must also be a
2070 scalar integer for this conversion to make sense, since we
2071 shouldn't reinterpret the bits. */
2072 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2073 if (mode != new_mode)
2074 op0 = convert_to_mode (new_mode, op0, 1);
2075
2076 /* Unless the msb of the field used to be the msb when we shifted,
2077 mask out the upper bits. */
2078
2079 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2080 return expand_binop (new_mode, and_optab, op0,
2081 mask_rtx (new_mode, 0, bitsize, 0),
2082 target, 1, OPTAB_LIB_WIDEN);
2083 return op0;
2084 }
2085
2086 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2087 then arithmetic-shift its lsb to the lsb of the word. */
2088 op0 = force_reg (mode, op0);
2089
2090 /* Find the narrowest integer mode that contains the field. */
2091
2092 opt_scalar_int_mode mode_iter;
2093 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2094 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2095 break;
2096
2097 mode = mode_iter.require ();
2098 op0 = convert_to_mode (mode, op0, 0);
2099
2100 if (mode != tmode)
2101 target = 0;
2102
2103 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2104 {
2105 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2106 /* Maybe propagate the target for the shift. */
2107 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2108 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2109 }
2110
2111 return expand_shift (RSHIFT_EXPR, mode, op0,
2112 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2113}
2114
2115/* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2116 VALUE << BITPOS. */
2117
2118static rtx
2119lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2120 int bitpos)
2121{
2122 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2123}
2124
2125/* Extract a bit field that is split across two words
2126 and return an RTX for the result.
2127
2128 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2129 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2130 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2131 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2132 a BLKmode MEM.
2133
2134 If REVERSE is true, the extraction is to be done in reverse order. */
2135
2136static rtx
2137extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2138 unsigned HOST_WIDE_INT bitsize,
2139 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2140 bool reverse)
2141{
2142 unsigned int unit;
2143 unsigned int bitsdone = 0;
2144 rtx result = NULL_RTX;
2145 int first = 1;
2146
2147 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2148 much at a time. */
2149 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2150 unit = BITS_PER_WORD;
2151 else
2152 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2153
2154 while (bitsdone < bitsize)
2155 {
2156 unsigned HOST_WIDE_INT thissize;
2157 rtx part;
2158 unsigned HOST_WIDE_INT thispos;
2159 unsigned HOST_WIDE_INT offset;
2160
2161 offset = (bitpos + bitsdone) / unit;
2162 thispos = (bitpos + bitsdone) % unit;
2163
2164 /* THISSIZE must not overrun a word boundary. Otherwise,
2165 extract_fixed_bit_field will call us again, and we will mutually
2166 recurse forever. */
2167 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2168 thissize = MIN (thissize, unit - thispos);
2169
2170 /* If OP0 is a register, then handle OFFSET here. */
2171 rtx op0_piece = op0;
2172 opt_scalar_int_mode op0_piece_mode = op0_mode;
2173 if (SUBREG_P (op0) || REG_P (op0))
2174 {
2175 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2176 op0_piece_mode = word_mode;
2177 offset = 0;
2178 }
2179
2180 /* Extract the parts in bit-counting order,
2181 whose meaning is determined by BYTES_PER_UNIT.
2182 OFFSET is in UNITs, and UNIT is in bits. */
2183 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2184 thissize, offset * unit + thispos,
2185 0, 1, reverse);
2186 bitsdone += thissize;
2187
2188 /* Shift this part into place for the result. */
2189 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2190 {
2191 if (bitsize != bitsdone)
2192 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2193 bitsize - bitsdone, 0, 1);
2194 }
2195 else
2196 {
2197 if (bitsdone != thissize)
2198 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2199 bitsdone - thissize, 0, 1);
2200 }
2201
2202 if (first)
2203 result = part;
2204 else
2205 /* Combine the parts with bitwise or. This works
2206 because we extracted each part as an unsigned bit field. */
2207 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2208 OPTAB_LIB_WIDEN);
2209
2210 first = 0;
2211 }
2212
2213 /* Unsigned bit field: we are done. */
2214 if (unsignedp)
2215 return result;
2216 /* Signed bit field: sign-extend with two arithmetic shifts. */
2217 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2218 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2219 return expand_shift (RSHIFT_EXPR, word_mode, result,
2220 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2221}
2222
2223/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2224 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2225 MODE, fill the upper bits with zeros. Fail if the layout of either
2226 mode is unknown (as for CC modes) or if the extraction would involve
2227 unprofitable mode punning. Return the value on success, otherwise
2228 return null.
2229
2230 This is different from gen_lowpart* in these respects:
2231
2232 - the returned value must always be considered an rvalue
2233
2234 - when MODE is wider than SRC_MODE, the extraction involves
2235 a zero extension
2236
2237 - when MODE is smaller than SRC_MODE, the extraction involves
2238 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2239
2240 In other words, this routine performs a computation, whereas the
2241 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2242 operations. */
2243
2244rtx
2245extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2246{
2247 scalar_int_mode int_mode, src_int_mode;
2248
2249 if (mode == src_mode)
2250 return src;
2251
2252 if (CONSTANT_P (src))
2253 {
2254 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2255 fails, it will happily create (subreg (symbol_ref)) or similar
2256 invalid SUBREGs. */
2257 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2258 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2259 if (ret)
2260 return ret;
2261
2262 if (GET_MODE (src) == VOIDmode
2263 || !validate_subreg (mode, src_mode, src, byte))
2264 return NULL_RTX;
2265
2266 src = force_reg (GET_MODE (src), src);
2267 return gen_rtx_SUBREG (mode, src, byte);
2268 }
2269
2270 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2271 return NULL_RTX;
2272
2273 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2274 && targetm.modes_tieable_p (mode, src_mode))
2275 {
2276 rtx x = gen_lowpart_common (mode, src);
2277 if (x)
2278 return x;
2279 }
2280
2281 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2282 || !int_mode_for_mode (mode).exists (&int_mode))
2283 return NULL_RTX;
2284
2285 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2286 return NULL_RTX;
2287 if (!targetm.modes_tieable_p (int_mode, mode))
2288 return NULL_RTX;
2289
2290 src = gen_lowpart (src_int_mode, src);
2291 src = convert_modes (int_mode, src_int_mode, src, true);
2292 src = gen_lowpart (mode, src);
2293 return src;
2294}
2295
2296/* Add INC into TARGET. */
2297
2298void
2299expand_inc (rtx target, rtx inc)
2300{
2301 rtx value = expand_binop (GET_MODE (target), add_optab,
2302 target, inc,
2303 target, 0, OPTAB_LIB_WIDEN);
2304 if (value != target)
2305 emit_move_insn (target, value);
2306}
2307
2308/* Subtract DEC from TARGET. */
2309
2310void
2311expand_dec (rtx target, rtx dec)
2312{
2313 rtx value = expand_binop (GET_MODE (target), sub_optab,
2314 target, dec,
2315 target, 0, OPTAB_LIB_WIDEN);
2316 if (value != target)
2317 emit_move_insn (target, value);
2318}
2319
2320/* Output a shift instruction for expression code CODE,
2321 with SHIFTED being the rtx for the value to shift,
2322 and AMOUNT the rtx for the amount to shift by.
2323 Store the result in the rtx TARGET, if that is convenient.
2324 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2325 Return the rtx for where the value is.
2326 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2327 in which case 0 is returned. */
2328
2329static rtx
2330expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2331 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2332{
2333 rtx op1, temp = 0;
2334 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2335 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2336 optab lshift_optab = ashl_optab;
2337 optab rshift_arith_optab = ashr_optab;
2338 optab rshift_uns_optab = lshr_optab;
2339 optab lrotate_optab = rotl_optab;
2340 optab rrotate_optab = rotr_optab;
2341 machine_mode op1_mode;
2342 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2343 int attempt;
2344 bool speed = optimize_insn_for_speed_p ();
2345
2346 op1 = amount;
2347 op1_mode = GET_MODE (op1);
2348
2349 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2350 shift amount is a vector, use the vector/vector shift patterns. */
2351 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2352 {
2353 lshift_optab = vashl_optab;
2354 rshift_arith_optab = vashr_optab;
2355 rshift_uns_optab = vlshr_optab;
2356 lrotate_optab = vrotl_optab;
2357 rrotate_optab = vrotr_optab;
2358 }
2359
2360 /* Previously detected shift-counts computed by NEGATE_EXPR
2361 and shifted in the other direction; but that does not work
2362 on all machines. */
2363
2364 if (SHIFT_COUNT_TRUNCATED)
2365 {
2366 if (CONST_INT_P (op1)
2367 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2368 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2369 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2370 % GET_MODE_BITSIZE (scalar_mode));
2371 else if (GET_CODE (op1) == SUBREG
2372 && subreg_lowpart_p (op1)
2373 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2374 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2375 op1 = SUBREG_REG (op1);
2376 }
2377
2378 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2379 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2380 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2381 amount instead. */
2382 if (rotate
2383 && CONST_INT_P (op1)
2384 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2385 GET_MODE_BITSIZE (scalar_mode) - 1))
2386 {
2387 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2388 left = !left;
2389 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2390 }
2391
2392 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2393 Note that this is not the case for bigger values. For instance a rotation
2394 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2395 0x04030201 (bswapsi). */
2396 if (rotate
2397 && CONST_INT_P (op1)
2398 && INTVAL (op1) == BITS_PER_UNIT
2399 && GET_MODE_SIZE (scalar_mode) == 2
2400 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2401 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2402 unsignedp);
2403
2404 if (op1 == const0_rtx)
2405 return shifted;
2406
2407 /* Check whether its cheaper to implement a left shift by a constant
2408 bit count by a sequence of additions. */
2409 if (code == LSHIFT_EXPR
2410 && CONST_INT_P (op1)
2411 && INTVAL (op1) > 0
2412 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2413 && INTVAL (op1) < MAX_BITS_PER_WORD
2414 && (shift_cost (speed, mode, INTVAL (op1))
2415 > INTVAL (op1) * add_cost (speed, mode))
2416 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2417 {
2418 int i;
2419 for (i = 0; i < INTVAL (op1); i++)
2420 {
2421 temp = force_reg (mode, shifted);
2422 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2423 unsignedp, OPTAB_LIB_WIDEN);
2424 }
2425 return shifted;
2426 }
2427
2428 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2429 {
2430 enum optab_methods methods;
2431
2432 if (attempt == 0)
2433 methods = OPTAB_DIRECT;
2434 else if (attempt == 1)
2435 methods = OPTAB_WIDEN;
2436 else
2437 methods = OPTAB_LIB_WIDEN;
2438
2439 if (rotate)
2440 {
2441 /* Widening does not work for rotation. */
2442 if (methods == OPTAB_WIDEN)
2443 continue;
2444 else if (methods == OPTAB_LIB_WIDEN)
2445 {
2446 /* If we have been unable to open-code this by a rotation,
2447 do it as the IOR of two shifts. I.e., to rotate A
2448 by N bits, compute
2449 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2450 where C is the bitsize of A.
2451
2452 It is theoretically possible that the target machine might
2453 not be able to perform either shift and hence we would
2454 be making two libcalls rather than just the one for the
2455 shift (similarly if IOR could not be done). We will allow
2456 this extremely unlikely lossage to avoid complicating the
2457 code below. */
2458
2459 rtx subtarget = target == shifted ? 0 : target;
2460 rtx new_amount, other_amount;
2461 rtx temp1;
2462
2463 new_amount = op1;
2464 if (op1 == const0_rtx)
2465 return shifted;
2466 else if (CONST_INT_P (op1))
2467 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2468 - INTVAL (op1));
2469 else
2470 {
2471 other_amount
2472 = simplify_gen_unary (NEG, GET_MODE (op1),
2473 op1, GET_MODE (op1));
2474 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2475 other_amount
2476 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2477 gen_int_mode (mask, GET_MODE (op1)));
2478 }
2479
2480 shifted = force_reg (mode, shifted);
2481
2482 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2483 mode, shifted, new_amount, 0, 1);
2484 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2485 mode, shifted, other_amount,
2486 subtarget, 1);
2487 return expand_binop (mode, ior_optab, temp, temp1, target,
2488 unsignedp, methods);
2489 }
2490
2491 temp = expand_binop (mode,
2492 left ? lrotate_optab : rrotate_optab,
2493 shifted, op1, target, unsignedp, methods);
2494 }
2495 else if (unsignedp)
2496 temp = expand_binop (mode,
2497 left ? lshift_optab : rshift_uns_optab,
2498 shifted, op1, target, unsignedp, methods);
2499
2500 /* Do arithmetic shifts.
2501 Also, if we are going to widen the operand, we can just as well
2502 use an arithmetic right-shift instead of a logical one. */
2503 if (temp == 0 && ! rotate
2504 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2505 {
2506 enum optab_methods methods1 = methods;
2507
2508 /* If trying to widen a log shift to an arithmetic shift,
2509 don't accept an arithmetic shift of the same size. */
2510 if (unsignedp)
2511 methods1 = OPTAB_MUST_WIDEN;
2512
2513 /* Arithmetic shift */
2514
2515 temp = expand_binop (mode,
2516 left ? lshift_optab : rshift_arith_optab,
2517 shifted, op1, target, unsignedp, methods1);
2518 }
2519
2520 /* We used to try extzv here for logical right shifts, but that was
2521 only useful for one machine, the VAX, and caused poor code
2522 generation there for lshrdi3, so the code was deleted and a
2523 define_expand for lshrsi3 was added to vax.md. */
2524 }
2525
2526 gcc_assert (temp != NULL_RTX || may_fail);
2527 return temp;
2528}
2529
2530/* Output a shift instruction for expression code CODE,
2531 with SHIFTED being the rtx for the value to shift,
2532 and AMOUNT the amount to shift by.
2533 Store the result in the rtx TARGET, if that is convenient.
2534 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2535 Return the rtx for where the value is. */
2536
2537rtx
2538expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2539 int amount, rtx target, int unsignedp)
2540{
2541 return expand_shift_1 (code, mode,
2542 shifted, GEN_INT (amount), target, unsignedp);
2543}
2544
2545/* Likewise, but return 0 if that cannot be done. */
2546
2547static rtx
2548maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2549 int amount, rtx target, int unsignedp)
2550{
2551 return expand_shift_1 (code, mode,
2552 shifted, GEN_INT (amount), target, unsignedp, true);
2553}
2554
2555/* Output a shift instruction for expression code CODE,
2556 with SHIFTED being the rtx for the value to shift,
2557 and AMOUNT the tree for the amount to shift by.
2558 Store the result in the rtx TARGET, if that is convenient.
2559 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2560 Return the rtx for where the value is. */
2561
2562rtx
2563expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2564 tree amount, rtx target, int unsignedp)
2565{
2566 return expand_shift_1 (code, mode,
2567 shifted, expand_normal (amount), target, unsignedp);
2568}
2569
2570
2571static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2572 const struct mult_cost *, machine_mode mode);
2573static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2574 const struct algorithm *, enum mult_variant);
2575static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2576static rtx extract_high_half (scalar_int_mode, rtx);
2577static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2578static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2579 int, int);
2580/* Compute and return the best algorithm for multiplying by T.
2581 The algorithm must cost less than cost_limit
2582 If retval.cost >= COST_LIMIT, no algorithm was found and all
2583 other field of the returned struct are undefined.
2584 MODE is the machine mode of the multiplication. */
2585
2586static void
2587synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2588 const struct mult_cost *cost_limit, machine_mode mode)
2589{
2590 int m;
2591 struct algorithm *alg_in, *best_alg;
2592 struct mult_cost best_cost;
2593 struct mult_cost new_limit;
2594 int op_cost, op_latency;
2595 unsigned HOST_WIDE_INT orig_t = t;
2596 unsigned HOST_WIDE_INT q;
2597 int maxm, hash_index;
2598 bool cache_hit = false;
2599 enum alg_code cache_alg = alg_zero;
2600 bool speed = optimize_insn_for_speed_p ();
2601 scalar_int_mode imode;
2602 struct alg_hash_entry *entry_ptr;
2603
2604 /* Indicate that no algorithm is yet found. If no algorithm
2605 is found, this value will be returned and indicate failure. */
2606 alg_out->cost.cost = cost_limit->cost + 1;
2607 alg_out->cost.latency = cost_limit->latency + 1;
2608
2609 if (cost_limit->cost < 0
2610 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2611 return;
2612
2613 /* Be prepared for vector modes. */
2614 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2615
2616 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2617
2618 /* Restrict the bits of "t" to the multiplication's mode. */
2619 t &= GET_MODE_MASK (imode);
2620
2621 /* t == 1 can be done in zero cost. */
2622 if (t == 1)
2623 {
2624 alg_out->ops = 1;
2625 alg_out->cost.cost = 0;
2626 alg_out->cost.latency = 0;
2627 alg_out->op[0] = alg_m;
2628 return;
2629 }
2630
2631 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2632 fail now. */
2633 if (t == 0)
2634 {
2635 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2636 return;
2637 else
2638 {
2639 alg_out->ops = 1;
2640 alg_out->cost.cost = zero_cost (speed);
2641 alg_out->cost.latency = zero_cost (speed);
2642 alg_out->op[0] = alg_zero;
2643 return;
2644 }
2645 }
2646
2647 /* We'll be needing a couple extra algorithm structures now. */
2648
2649 alg_in = XALLOCA (struct algorithm);
2650 best_alg = XALLOCA (struct algorithm);
2651 best_cost = *cost_limit;
2652
2653 /* Compute the hash index. */
2654 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2655
2656 /* See if we already know what to do for T. */
2657 entry_ptr = alg_hash_entry_ptr (hash_index);
2658 if (entry_ptr->t == t
2659 && entry_ptr->mode == mode
2660 && entry_ptr->speed == speed
2661 && entry_ptr->alg != alg_unknown)
2662 {
2663 cache_alg = entry_ptr->alg;
2664
2665 if (cache_alg == alg_impossible)
2666 {
2667 /* The cache tells us that it's impossible to synthesize
2668 multiplication by T within entry_ptr->cost. */
2669 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2670 /* COST_LIMIT is at least as restrictive as the one
2671 recorded in the hash table, in which case we have no
2672 hope of synthesizing a multiplication. Just
2673 return. */
2674 return;
2675
2676 /* If we get here, COST_LIMIT is less restrictive than the
2677 one recorded in the hash table, so we may be able to
2678 synthesize a multiplication. Proceed as if we didn't
2679 have the cache entry. */
2680 }
2681 else
2682 {
2683 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2684 /* The cached algorithm shows that this multiplication
2685 requires more cost than COST_LIMIT. Just return. This
2686 way, we don't clobber this cache entry with
2687 alg_impossible but retain useful information. */
2688 return;
2689
2690 cache_hit = true;
2691
2692 switch (cache_alg)
2693 {
2694 case alg_shift:
2695 goto do_alg_shift;
2696
2697 case alg_add_t_m2:
2698 case alg_sub_t_m2:
2699 goto do_alg_addsub_t_m2;
2700
2701 case alg_add_factor:
2702 case alg_sub_factor:
2703 goto do_alg_addsub_factor;
2704
2705 case alg_add_t2_m:
2706 goto do_alg_add_t2_m;
2707
2708 case alg_sub_t2_m:
2709 goto do_alg_sub_t2_m;
2710
2711 default:
2712 gcc_unreachable ();
2713 }
2714 }
2715 }
2716
2717 /* If we have a group of zero bits at the low-order part of T, try
2718 multiplying by the remaining bits and then doing a shift. */
2719
2720 if ((t & 1) == 0)
2721 {
2722 do_alg_shift:
2723 m = ctz_or_zero (t); /* m = number of low zero bits */
2724 if (m < maxm)
2725 {
2726 q = t >> m;
2727 /* The function expand_shift will choose between a shift and
2728 a sequence of additions, so the observed cost is given as
2729 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2730 op_cost = m * add_cost (speed, mode);
2731 if (shift_cost (speed, mode, m) < op_cost)
2732 op_cost = shift_cost (speed, mode, m);
2733 new_limit.cost = best_cost.cost - op_cost;
2734 new_limit.latency = best_cost.latency - op_cost;
2735 synth_mult (alg_in, q, &new_limit, mode);
2736
2737 alg_in->cost.cost += op_cost;
2738 alg_in->cost.latency += op_cost;
2739 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2740 {
2741 best_cost = alg_in->cost;
2742 std::swap (alg_in, best_alg);
2743 best_alg->log[best_alg->ops] = m;
2744 best_alg->op[best_alg->ops] = alg_shift;
2745 }
2746
2747 /* See if treating ORIG_T as a signed number yields a better
2748 sequence. Try this sequence only for a negative ORIG_T
2749 as it would be useless for a non-negative ORIG_T. */
2750 if ((HOST_WIDE_INT) orig_t < 0)
2751 {
2752 /* Shift ORIG_T as follows because a right shift of a
2753 negative-valued signed type is implementation
2754 defined. */
2755 q = ~(~orig_t >> m);
2756 /* The function expand_shift will choose between a shift
2757 and a sequence of additions, so the observed cost is
2758 given as MIN (m * add_cost(speed, mode),
2759 shift_cost(speed, mode, m)). */
2760 op_cost = m * add_cost (speed, mode);
2761 if (shift_cost (speed, mode, m) < op_cost)
2762 op_cost = shift_cost (speed, mode, m);
2763 new_limit.cost = best_cost.cost - op_cost;
2764 new_limit.latency = best_cost.latency - op_cost;
2765 synth_mult (alg_in, q, &new_limit, mode);
2766
2767 alg_in->cost.cost += op_cost;
2768 alg_in->cost.latency += op_cost;
2769 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2770 {
2771 best_cost = alg_in->cost;
2772 std::swap (alg_in, best_alg);
2773 best_alg->log[best_alg->ops] = m;
2774 best_alg->op[best_alg->ops] = alg_shift;
2775 }
2776 }
2777 }
2778 if (cache_hit)
2779 goto done;
2780 }
2781
2782 /* If we have an odd number, add or subtract one. */
2783 if ((t & 1) != 0)
2784 {
2785 unsigned HOST_WIDE_INT w;
2786
2787 do_alg_addsub_t_m2:
2788 for (w = 1; (w & t) != 0; w <<= 1)
2789 ;
2790 /* If T was -1, then W will be zero after the loop. This is another
2791 case where T ends with ...111. Handling this with (T + 1) and
2792 subtract 1 produces slightly better code and results in algorithm
2793 selection much faster than treating it like the ...0111 case
2794 below. */
2795 if (w == 0
2796 || (w > 2
2797 /* Reject the case where t is 3.
2798 Thus we prefer addition in that case. */
2799 && t != 3))
2800 {
2801 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2802
2803 op_cost = add_cost (speed, mode);
2804 new_limit.cost = best_cost.cost - op_cost;
2805 new_limit.latency = best_cost.latency - op_cost;
2806 synth_mult (alg_in, t + 1, &new_limit, mode);
2807
2808 alg_in->cost.cost += op_cost;
2809 alg_in->cost.latency += op_cost;
2810 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2811 {
2812 best_cost = alg_in->cost;
2813 std::swap (alg_in, best_alg);
2814 best_alg->log[best_alg->ops] = 0;
2815 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2816 }
2817 }
2818 else
2819 {
2820 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2821
2822 op_cost = add_cost (speed, mode);
2823 new_limit.cost = best_cost.cost - op_cost;
2824 new_limit.latency = best_cost.latency - op_cost;
2825 synth_mult (alg_in, t - 1, &new_limit, mode);
2826
2827 alg_in->cost.cost += op_cost;
2828 alg_in->cost.latency += op_cost;
2829 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2830 {
2831 best_cost = alg_in->cost;
2832 std::swap (alg_in, best_alg);
2833 best_alg->log[best_alg->ops] = 0;
2834 best_alg->op[best_alg->ops] = alg_add_t_m2;
2835 }
2836 }
2837
2838 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2839 quickly with a - a * n for some appropriate constant n. */
2840 m = exact_log2 (-orig_t + 1);
2841 if (m >= 0 && m < maxm)
2842 {
2843 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2844 /* If the target has a cheap shift-and-subtract insn use
2845 that in preference to a shift insn followed by a sub insn.
2846 Assume that the shift-and-sub is "atomic" with a latency
2847 equal to it's cost, otherwise assume that on superscalar
2848 hardware the shift may be executed concurrently with the
2849 earlier steps in the algorithm. */
2850 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2851 {
2852 op_cost = shiftsub1_cost (speed, mode, m);
2853 op_latency = op_cost;
2854 }
2855 else
2856 op_latency = add_cost (speed, mode);
2857
2858 new_limit.cost = best_cost.cost - op_cost;
2859 new_limit.latency = best_cost.latency - op_latency;
2860 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2861 &new_limit, mode);
2862
2863 alg_in->cost.cost += op_cost;
2864 alg_in->cost.latency += op_latency;
2865 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2866 {
2867 best_cost = alg_in->cost;
2868 std::swap (alg_in, best_alg);
2869 best_alg->log[best_alg->ops] = m;
2870 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2871 }
2872 }
2873
2874 if (cache_hit)
2875 goto done;
2876 }
2877
2878 /* Look for factors of t of the form
2879 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2880 If we find such a factor, we can multiply by t using an algorithm that
2881 multiplies by q, shift the result by m and add/subtract it to itself.
2882
2883 We search for large factors first and loop down, even if large factors
2884 are less probable than small; if we find a large factor we will find a
2885 good sequence quickly, and therefore be able to prune (by decreasing
2886 COST_LIMIT) the search. */
2887
2888 do_alg_addsub_factor:
2889 for (m = floor_log2 (t - 1); m >= 2; m--)
2890 {
2891 unsigned HOST_WIDE_INT d;
2892
2893 d = (HOST_WIDE_INT_1U << m) + 1;
2894 if (t % d == 0 && t > d && m < maxm
2895 && (!cache_hit || cache_alg == alg_add_factor))
2896 {
2897 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2898 if (shiftadd_cost (speed, mode, m) <= op_cost)
2899 op_cost = shiftadd_cost (speed, mode, m);
2900
2901 op_latency = op_cost;
2902
2903
2904 new_limit.cost = best_cost.cost - op_cost;
2905 new_limit.latency = best_cost.latency - op_latency;
2906 synth_mult (alg_in, t / d, &new_limit, mode);
2907
2908 alg_in->cost.cost += op_cost;
2909 alg_in->cost.latency += op_latency;
2910 if (alg_in->cost.latency < op_cost)
2911 alg_in->cost.latency = op_cost;
2912 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2913 {
2914 best_cost = alg_in->cost;
2915 std::swap (alg_in, best_alg);
2916 best_alg->log[best_alg->ops] = m;
2917 best_alg->op[best_alg->ops] = alg_add_factor;
2918 }
2919 /* Other factors will have been taken care of in the recursion. */
2920 break;
2921 }
2922
2923 d = (HOST_WIDE_INT_1U << m) - 1;
2924 if (t % d == 0 && t > d && m < maxm
2925 && (!cache_hit || cache_alg == alg_sub_factor))
2926 {
2927 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2928 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2929 op_cost = shiftsub0_cost (speed, mode, m);
2930
2931 op_latency = op_cost;
2932
2933 new_limit.cost = best_cost.cost - op_cost;
2934 new_limit.latency = best_cost.latency - op_latency;
2935 synth_mult (alg_in, t / d, &new_limit, mode);
2936
2937 alg_in->cost.cost += op_cost;
2938 alg_in->cost.latency += op_latency;
2939 if (alg_in->cost.latency < op_cost)
2940 alg_in->cost.latency = op_cost;
2941 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2942 {
2943 best_cost = alg_in->cost;
2944 std::swap (alg_in, best_alg);
2945 best_alg->log[best_alg->ops] = m;
2946 best_alg->op[best_alg->ops] = alg_sub_factor;
2947 }
2948 break;
2949 }
2950 }
2951 if (cache_hit)
2952 goto done;
2953
2954 /* Try shift-and-add (load effective address) instructions,
2955 i.e. do a*3, a*5, a*9. */
2956 if ((t & 1) != 0)
2957 {
2958 do_alg_add_t2_m:
2959 q = t - 1;
2960 m = ctz_hwi (q);
2961 if (q && m < maxm)
2962 {
2963 op_cost = shiftadd_cost (speed, mode, m);
2964 new_limit.cost = best_cost.cost - op_cost;
2965 new_limit.latency = best_cost.latency - op_cost;
2966 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2967
2968 alg_in->cost.cost += op_cost;
2969 alg_in->cost.latency += op_cost;
2970 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2971 {
2972 best_cost = alg_in->cost;
2973 std::swap (alg_in, best_alg);
2974 best_alg->log[best_alg->ops] = m;
2975 best_alg->op[best_alg->ops] = alg_add_t2_m;
2976 }
2977 }
2978 if (cache_hit)
2979 goto done;
2980
2981 do_alg_sub_t2_m:
2982 q = t + 1;
2983 m = ctz_hwi (q);
2984 if (q && m < maxm)
2985 {
2986 op_cost = shiftsub0_cost (speed, mode, m);
2987 new_limit.cost = best_cost.cost - op_cost;
2988 new_limit.latency = best_cost.latency - op_cost;
2989 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2990
2991 alg_in->cost.cost += op_cost;
2992 alg_in->cost.latency += op_cost;
2993 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2994 {
2995 best_cost = alg_in->cost;
2996 std::swap (alg_in, best_alg);
2997 best_alg->log[best_alg->ops] = m;
2998 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2999 }
3000 }
3001 if (cache_hit)
3002 goto done;
3003 }
3004
3005 done:
3006 /* If best_cost has not decreased, we have not found any algorithm. */
3007 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3008 {
3009 /* We failed to find an algorithm. Record alg_impossible for
3010 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3011 we are asked to find an algorithm for T within the same or
3012 lower COST_LIMIT, we can immediately return to the
3013 caller. */
3014 entry_ptr->t = t;
3015 entry_ptr->mode = mode;
3016 entry_ptr->speed = speed;
3017 entry_ptr->alg = alg_impossible;
3018 entry_ptr->cost = *cost_limit;
3019 return;
3020 }
3021
3022 /* Cache the result. */
3023 if (!cache_hit)
3024 {
3025 entry_ptr->t = t;
3026 entry_ptr->mode = mode;
3027 entry_ptr->speed = speed;
3028 entry_ptr->alg = best_alg->op[best_alg->ops];
3029 entry_ptr->cost.cost = best_cost.cost;
3030 entry_ptr->cost.latency = best_cost.latency;
3031 }
3032
3033 /* If we are getting a too long sequence for `struct algorithm'
3034 to record, make this search fail. */
3035 if (best_alg->ops == MAX_BITS_PER_WORD)
3036 return;
3037
3038 /* Copy the algorithm from temporary space to the space at alg_out.
3039 We avoid using structure assignment because the majority of
3040 best_alg is normally undefined, and this is a critical function. */
3041 alg_out->ops = best_alg->ops + 1;
3042 alg_out->cost = best_cost;
3043 memcpy (alg_out->op, best_alg->op,
3044 alg_out->ops * sizeof *alg_out->op);
3045 memcpy (alg_out->log, best_alg->log,
3046 alg_out->ops * sizeof *alg_out->log);
3047}
3048
3049/* Find the cheapest way of multiplying a value of mode MODE by VAL.
3050 Try three variations:
3051
3052 - a shift/add sequence based on VAL itself
3053 - a shift/add sequence based on -VAL, followed by a negation
3054 - a shift/add sequence based on VAL - 1, followed by an addition.
3055
3056 Return true if the cheapest of these cost less than MULT_COST,
3057 describing the algorithm in *ALG and final fixup in *VARIANT. */
3058
3059bool
3060choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3061 struct algorithm *alg, enum mult_variant *variant,
3062 int mult_cost)
3063{
3064 struct algorithm alg2;
3065 struct mult_cost limit;
3066 int op_cost;
3067 bool speed = optimize_insn_for_speed_p ();
3068
3069 /* Fail quickly for impossible bounds. */
3070 if (mult_cost < 0)
3071 return false;
3072
3073 /* Ensure that mult_cost provides a reasonable upper bound.
3074 Any constant multiplication can be performed with less
3075 than 2 * bits additions. */
3076 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3077 if (mult_cost > op_cost)
3078 mult_cost = op_cost;
3079
3080 *variant = basic_variant;
3081 limit.cost = mult_cost;
3082 limit.latency = mult_cost;
3083 synth_mult (alg, val, &limit, mode);
3084
3085 /* This works only if the inverted value actually fits in an
3086 `unsigned int' */
3087 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3088 {
3089 op_cost = neg_cost (speed, mode);
3090 if (MULT_COST_LESS (&alg->cost, mult_cost))
3091 {
3092 limit.cost = alg->cost.cost - op_cost;
3093 limit.latency = alg->cost.latency - op_cost;
3094 }
3095 else
3096 {
3097 limit.cost = mult_cost - op_cost;
3098 limit.latency = mult_cost - op_cost;
3099 }
3100
3101 synth_mult (&alg2, -val, &limit, mode);
3102 alg2.cost.cost += op_cost;
3103 alg2.cost.latency += op_cost;
3104 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3105 *alg = alg2, *variant = negate_variant;
3106 }
3107
3108 /* This proves very useful for division-by-constant. */
3109 op_cost = add_cost (speed, mode);
3110 if (MULT_COST_LESS (&alg->cost, mult_cost))
3111 {
3112 limit.cost = alg->cost.cost - op_cost;
3113 limit.latency = alg->cost.latency - op_cost;
3114 }
3115 else
3116 {
3117 limit.cost = mult_cost - op_cost;
3118 limit.latency = mult_cost - op_cost;
3119 }
3120
3121 synth_mult (&alg2, val - 1, &limit, mode);
3122 alg2.cost.cost += op_cost;
3123 alg2.cost.latency += op_cost;
3124 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3125 *alg = alg2, *variant = add_variant;
3126
3127 return MULT_COST_LESS (&alg->cost, mult_cost);
3128}
3129
3130/* A subroutine of expand_mult, used for constant multiplications.
3131 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3132 convenient. Use the shift/add sequence described by ALG and apply
3133 the final fixup specified by VARIANT. */
3134
3135static rtx
3136expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3137 rtx target, const struct algorithm *alg,
3138 enum mult_variant variant)
3139{
3140 unsigned HOST_WIDE_INT val_so_far;
3141 rtx_insn *insn;
3142 rtx accum, tem;
3143 int opno;
3144 machine_mode nmode;
3145
3146 /* Avoid referencing memory over and over and invalid sharing
3147 on SUBREGs. */
3148 op0 = force_reg (mode, op0);
3149
3150 /* ACCUM starts out either as OP0 or as a zero, depending on
3151 the first operation. */
3152
3153 if (alg->op[0] == alg_zero)
3154 {
3155 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3156 val_so_far = 0;
3157 }
3158 else if (alg->op[0] == alg_m)
3159 {
3160 accum = copy_to_mode_reg (mode, op0);
3161 val_so_far = 1;
3162 }
3163 else
3164 gcc_unreachable ();
3165
3166 for (opno = 1; opno < alg->ops; opno++)
3167 {
3168 int log = alg->log[opno];
3169 rtx shift_subtarget = optimize ? 0 : accum;
3170 rtx add_target
3171 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3172 && !optimize)
3173 ? target : 0;
3174 rtx accum_target = optimize ? 0 : accum;
3175 rtx accum_inner;
3176
3177 switch (alg->op[opno])
3178 {
3179 case alg_shift:
3180 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3181 /* REG_EQUAL note will be attached to the following insn. */
3182 emit_move_insn (accum, tem);
3183 val_so_far <<= log;
3184 break;
3185
3186 case alg_add_t_m2:
3187 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3188 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3189 add_target ? add_target : accum_target);
3190 val_so_far += HOST_WIDE_INT_1U << log;
3191 break;
3192
3193 case alg_sub_t_m2:
3194 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3195 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3196 add_target ? add_target : accum_target);
3197 val_so_far -= HOST_WIDE_INT_1U << log;
3198 break;
3199
3200 case alg_add_t2_m:
3201 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3202 log, shift_subtarget, 0);
3203 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3204 add_target ? add_target : accum_target);
3205 val_so_far = (val_so_far << log) + 1;
3206 break;
3207
3208 case alg_sub_t2_m:
3209 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3210 log, shift_subtarget, 0);
3211 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3212 add_target ? add_target : accum_target);
3213 val_so_far = (val_so_far << log) - 1;
3214 break;
3215
3216 case alg_add_factor:
3217 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3218 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3219 add_target ? add_target : accum_target);
3220 val_so_far += val_so_far << log;
3221 break;
3222
3223 case alg_sub_factor:
3224 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3225 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3226 (add_target
3227 ? add_target : (optimize ? 0 : tem)));
3228 val_so_far = (val_so_far << log) - val_so_far;
3229 break;
3230
3231 default:
3232 gcc_unreachable ();
3233 }
3234
3235 if (SCALAR_INT_MODE_P (mode))
3236 {
3237 /* Write a REG_EQUAL note on the last insn so that we can cse
3238 multiplication sequences. Note that if ACCUM is a SUBREG,
3239 we've set the inner register and must properly indicate that. */
3240 tem = op0, nmode = mode;
3241 accum_inner = accum;
3242 if (GET_CODE (accum) == SUBREG)
3243 {
3244 accum_inner = SUBREG_REG (accum);
3245 nmode = GET_MODE (accum_inner);
3246 tem = gen_lowpart (nmode, op0);
3247 }
3248
3249 insn = get_last_insn ();
3250 set_dst_reg_note (insn, REG_EQUAL,
3251 gen_rtx_MULT (nmode, tem,
3252 gen_int_mode (val_so_far, nmode)),
3253 accum_inner);
3254 }
3255 }
3256
3257 if (variant == negate_variant)
3258 {
3259 val_so_far = -val_so_far;
3260 accum = expand_unop (mode, neg_optab, accum, target, 0);
3261 }
3262 else if (variant == add_variant)
3263 {
3264 val_so_far = val_so_far + 1;
3265 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3266 }
3267
3268 /* Compare only the bits of val and val_so_far that are significant
3269 in the result mode, to avoid sign-/zero-extension confusion. */
3270 nmode = GET_MODE_INNER (mode);
3271 val &= GET_MODE_MASK (nmode);
3272 val_so_far &= GET_MODE_MASK (nmode);
3273 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3274
3275 return accum;
3276}
3277
3278/* Perform a multiplication and return an rtx for the result.
3279 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3280 TARGET is a suggestion for where to store the result (an rtx).
3281
3282 We check specially for a constant integer as OP1.
3283 If you want this check for OP0 as well, then before calling
3284 you should swap the two operands if OP0 would be constant. */
3285
3286rtx
3287expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3288 int unsignedp, bool no_libcall)
3289{
3290 enum mult_variant variant;
3291 struct algorithm algorithm;
3292 rtx scalar_op1;
3293 int max_cost;
3294 bool speed = optimize_insn_for_speed_p ();
3295 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3296
3297 if (CONSTANT_P (op0))
3298 std::swap (op0, op1);
3299
3300 /* For vectors, there are several simplifications that can be made if
3301 all elements of the vector constant are identical. */
3302 scalar_op1 = unwrap_const_vec_duplicate (op1);
3303
3304 if (INTEGRAL_MODE_P (mode))
3305 {
3306 rtx fake_reg;
3307 HOST_WIDE_INT coeff;
3308 bool is_neg;
3309 int mode_bitsize;
3310
3311 if (op1 == CONST0_RTX (mode))
3312 return op1;
3313 if (op1 == CONST1_RTX (mode))
3314 return op0;
3315 if (op1 == CONSTM1_RTX (mode))
3316 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3317 op0, target, 0);
3318
3319 if (do_trapv)
3320 goto skip_synth;
3321
3322 /* If mode is integer vector mode, check if the backend supports
3323 vector lshift (by scalar or vector) at all. If not, we can't use
3324 synthetized multiply. */
3325 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3326 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3327 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3328 goto skip_synth;
3329
3330 /* These are the operations that are potentially turned into
3331 a sequence of shifts and additions. */
3332 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3333
3334 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3335 less than or equal in size to `unsigned int' this doesn't matter.
3336 If the mode is larger than `unsigned int', then synth_mult works
3337 only if the constant value exactly fits in an `unsigned int' without
3338 any truncation. This means that multiplying by negative values does
3339 not work; results are off by 2^32 on a 32 bit machine. */
3340 if (CONST_INT_P (scalar_op1))
3341 {
3342 coeff = INTVAL (scalar_op1);
3343 is_neg = coeff < 0;
3344 }
3345#if TARGET_SUPPORTS_WIDE_INT
3346 else if (CONST_WIDE_INT_P (scalar_op1))
3347#else
3348 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3349#endif
3350 {
3351 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3352 /* Perfect power of 2 (other than 1, which is handled above). */
3353 if (shift > 0)
3354 return expand_shift (LSHIFT_EXPR, mode, op0,
3355 shift, target, unsignedp);
3356 else
3357 goto skip_synth;
3358 }
3359 else
3360 goto skip_synth;
3361
3362 /* We used to test optimize here, on the grounds that it's better to
3363 produce a smaller program when -O is not used. But this causes
3364 such a terrible slowdown sometimes that it seems better to always
3365 use synth_mult. */
3366
3367 /* Special case powers of two. */
3368 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3369 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3370 return expand_shift (LSHIFT_EXPR, mode, op0,
3371 floor_log2 (coeff), target, unsignedp);
3372
3373 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3374
3375 /* Attempt to handle multiplication of DImode values by negative
3376 coefficients, by performing the multiplication by a positive
3377 multiplier and then inverting the result. */
3378 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3379 {
3380 /* Its safe to use -coeff even for INT_MIN, as the
3381 result is interpreted as an unsigned coefficient.
3382 Exclude cost of op0 from max_cost to match the cost
3383 calculation of the synth_mult. */
3384 coeff = -(unsigned HOST_WIDE_INT) coeff;
3385 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3386 mode, speed)
3387 - neg_cost (speed, mode));
3388 if (max_cost <= 0)
3389 goto skip_synth;
3390
3391 /* Special case powers of two. */
3392 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3393 {
3394 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3395 floor_log2 (coeff), target, unsignedp);
3396 return expand_unop (mode, neg_optab, temp, target, 0);
3397 }
3398
3399 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3400 max_cost))
3401 {
3402 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3403 &algorithm, variant);
3404 return expand_unop (mode, neg_optab, temp, target, 0);
3405 }
3406 goto skip_synth;
3407 }
3408
3409 /* Exclude cost of op0 from max_cost to match the cost
3410 calculation of the synth_mult. */
3411 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3412 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3413 return expand_mult_const (mode, op0, coeff, target,
3414 &algorithm, variant);
3415 }
3416 skip_synth:
3417
3418 /* Expand x*2.0 as x+x. */
3419 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3420 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3421 {
3422 op0 = force_reg (GET_MODE (op0), op0);
3423 return expand_binop (mode, add_optab, op0, op0,
3424 target, unsignedp,
3425 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3426 }
3427
3428 /* This used to use umul_optab if unsigned, but for non-widening multiply
3429 there is no difference between signed and unsigned. */
3430 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3431 op0, op1, target, unsignedp,
3432 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3433 gcc_assert (op0 || no_libcall);
3434 return op0;
3435}
3436
3437/* Return a cost estimate for multiplying a register by the given
3438 COEFFicient in the given MODE and SPEED. */
3439
3440int
3441mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3442{
3443 int max_cost;
3444 struct algorithm algorithm;
3445 enum mult_variant variant;
3446
3447 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3448 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3449 mode, speed);
3450 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3451 return algorithm.cost.cost;
3452 else
3453 return max_cost;
3454}
3455
3456/* Perform a widening multiplication and return an rtx for the result.
3457 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3458 TARGET is a suggestion for where to store the result (an rtx).
3459 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3460 or smul_widen_optab.
3461
3462 We check specially for a constant integer as OP1, comparing the
3463 cost of a widening multiply against the cost of a sequence of shifts
3464 and adds. */
3465
3466rtx
3467expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3468 int unsignedp, optab this_optab)
3469{
3470 bool speed = optimize_insn_for_speed_p ();
3471 rtx cop1;
3472
3473 if (CONST_INT_P (op1)
3474 && GET_MODE (op0) != VOIDmode
3475 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3476 this_optab == umul_widen_optab))
3477 && CONST_INT_P (cop1)
3478 && (INTVAL (cop1) >= 0
3479 || HWI_COMPUTABLE_MODE_P (mode)))
3480 {
3481 HOST_WIDE_INT coeff = INTVAL (cop1);
3482 int max_cost;
3483 enum mult_variant variant;
3484 struct algorithm algorithm;
3485
3486 if (coeff == 0)
3487 return CONST0_RTX (mode);
3488
3489 /* Special case powers of two. */
3490 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3491 {
3492 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3493 return expand_shift (LSHIFT_EXPR, mode, op0,
3494 floor_log2 (coeff), target, unsignedp);
3495 }
3496
3497 /* Exclude cost of op0 from max_cost to match the cost
3498 calculation of the synth_mult. */
3499 max_cost = mul_widen_cost (speed, mode);
3500 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3501 max_cost))
3502 {
3503 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3504 return expand_mult_const (mode, op0, coeff, target,
3505 &algorithm, variant);
3506 }
3507 }
3508 return expand_binop (mode, this_optab, op0, op1, target,
3509 unsignedp, OPTAB_LIB_WIDEN);
3510}
3511
3512/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3513 replace division by D, and put the least significant N bits of the result
3514 in *MULTIPLIER_PTR and return the most significant bit.
3515
3516 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3517 needed precision is in PRECISION (should be <= N).
3518
3519 PRECISION should be as small as possible so this function can choose
3520 multiplier more freely.
3521
3522 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3523 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3524
3525 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3526 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3527
3528unsigned HOST_WIDE_INT
3529choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3530 unsigned HOST_WIDE_INT *multiplier_ptr,
3531 int *post_shift_ptr, int *lgup_ptr)
3532{
3533 int lgup, post_shift;
3534 int pow, pow2;
3535
3536 /* lgup = ceil(log2(divisor)); */
3537 lgup = ceil_log2 (d);
3538
3539 gcc_assert (lgup <= n);
3540
3541 pow = n + lgup;
3542 pow2 = n + lgup - precision;
3543
3544 /* mlow = 2^(N + lgup)/d */
3545 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3546 wide_int mlow = wi::udiv_trunc (val, d);
3547
3548 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3549 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3550 wide_int mhigh = wi::udiv_trunc (val, d);
3551
3552 /* If precision == N, then mlow, mhigh exceed 2^N
3553 (but they do not exceed 2^(N+1)). */
3554
3555 /* Reduce to lowest terms. */
3556 for (post_shift = lgup; post_shift > 0; post_shift--)
3557 {
3558 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3559 HOST_BITS_PER_WIDE_INT);
3560 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3561 HOST_BITS_PER_WIDE_INT);
3562 if (ml_lo >= mh_lo)
3563 break;
3564
3565 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3566 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3567 }
3568
3569 *post_shift_ptr = post_shift;
3570 *lgup_ptr = lgup;
3571 if (n < HOST_BITS_PER_WIDE_INT)
3572 {
3573 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3574 *multiplier_ptr = mhigh.to_uhwi () & mask;
3575 return mhigh.to_uhwi () >= mask;
3576 }
3577 else
3578 {
3579 *multiplier_ptr = mhigh.to_uhwi ();
3580 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3581 }
3582}
3583
3584/* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3585 congruent to 1 (mod 2**N). */
3586
3587static unsigned HOST_WIDE_INT
3588invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3589{
3590 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3591
3592 /* The algorithm notes that the choice y = x satisfies
3593 x*y == 1 mod 2^3, since x is assumed odd.
3594 Each iteration doubles the number of bits of significance in y. */
3595
3596 unsigned HOST_WIDE_INT mask;
3597 unsigned HOST_WIDE_INT y = x;
3598 int nbit = 3;
3599
3600 mask = (n == HOST_BITS_PER_WIDE_INT
3601 ? HOST_WIDE_INT_M1U
3602 : (HOST_WIDE_INT_1U << n) - 1);
3603
3604 while (nbit < n)
3605 {
3606 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3607 nbit *= 2;
3608 }
3609 return y;
3610}
3611
3612/* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3613 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3614 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3615 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3616 become signed.
3617
3618 The result is put in TARGET if that is convenient.
3619
3620 MODE is the mode of operation. */
3621
3622rtx
3623expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3624 rtx op1, rtx target, int unsignedp)
3625{
3626 rtx tem;
3627 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3628
3629 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3630 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3631 tem = expand_and (mode, tem, op1, NULL_RTX);
3632 adj_operand
3633 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3634 adj_operand);
3635
3636 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3637 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3638 tem = expand_and (mode, tem, op0, NULL_RTX);
3639 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3640 target);
3641
3642 return target;
3643}
3644
3645/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3646
3647static rtx
3648extract_high_half (scalar_int_mode mode, rtx op)
3649{
3650 if (mode == word_mode)
3651 return gen_highpart (mode, op);
3652
3653 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3654
3655 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3656 GET_MODE_BITSIZE (mode), 0, 1);
3657 return convert_modes (mode, wider_mode, op, 0);
3658}
3659
3660/* Like expmed_mult_highpart, but only consider using a multiplication
3661 optab. OP1 is an rtx for the constant operand. */
3662
3663static rtx
3664expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3665 rtx target, int unsignedp, int max_cost)
3666{
3667 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3668 optab moptab;
3669 rtx tem;
3670 int size;
3671 bool speed = optimize_insn_for_speed_p ();
3672
3673 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3674
3675 size = GET_MODE_BITSIZE (mode);
3676
3677 /* Firstly, try using a multiplication insn that only generates the needed
3678 high part of the product, and in the sign flavor of unsignedp. */
3679 if (mul_highpart_cost (speed, mode) < max_cost)
3680 {
3681 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3682 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3683 unsignedp, OPTAB_DIRECT);
3684 if (tem)
3685 return tem;
3686 }
3687
3688 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3689 Need to adjust the result after the multiplication. */
3690 if (size - 1 < BITS_PER_WORD
3691 && (mul_highpart_cost (speed, mode)
3692 + 2 * shift_cost (speed, mode, size-1)
3693 + 4 * add_cost (speed, mode) < max_cost))
3694 {
3695 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3696 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3697 unsignedp, OPTAB_DIRECT);
3698 if (tem)
3699 /* We used the wrong signedness. Adjust the result. */
3700 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3701 tem, unsignedp);
3702 }
3703
3704 /* Try widening multiplication. */
3705 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3706 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3707 && mul_widen_cost (speed, wider_mode) < max_cost)
3708 {
3709 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3710 unsignedp, OPTAB_WIDEN);
3711 if (tem)
3712 return extract_high_half (mode, tem);
3713 }
3714
3715 /* Try widening the mode and perform a non-widening multiplication. */
3716 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3717 && size - 1 < BITS_PER_WORD
3718 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3719 < max_cost))
3720 {
3721 rtx_insn *insns;
3722 rtx wop0, wop1;
3723
3724 /* We need to widen the operands, for example to ensure the
3725 constant multiplier is correctly sign or zero extended.
3726 Use a sequence to clean-up any instructions emitted by
3727 the conversions if things don't work out. */
3728 start_sequence ();
3729 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3730 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3731 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3732 unsignedp, OPTAB_WIDEN);
3733 insns = get_insns ();
3734 end_sequence ();
3735
3736 if (tem)
3737 {
3738 emit_insn (insns);
3739 return extract_high_half (mode, tem);
3740 }
3741 }
3742
3743 /* Try widening multiplication of opposite signedness, and adjust. */
3744 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3745 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3746 && size - 1 < BITS_PER_WORD
3747 && (mul_widen_cost (speed, wider_mode)
3748 + 2 * shift_cost (speed, mode, size-1)
3749 + 4 * add_cost (speed, mode) < max_cost))
3750 {
3751 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3752 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3753 if (tem != 0)
3754 {
3755 tem = extract_high_half (mode, tem);
3756 /* We used the wrong signedness. Adjust the result. */
3757 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3758 target, unsignedp);
3759 }
3760 }
3761
3762 return 0;
3763}
3764
3765/* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3766 putting the high half of the result in TARGET if that is convenient,
3767 and return where the result is. If the operation can not be performed,
3768 0 is returned.
3769
3770 MODE is the mode of operation and result.
3771
3772 UNSIGNEDP nonzero means unsigned multiply.
3773
3774 MAX_COST is the total allowed cost for the expanded RTL. */
3775
3776static rtx
3777expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3778 rtx target, int unsignedp, int max_cost)
3779{
3780 unsigned HOST_WIDE_INT cnst1;
3781 int extra_cost;
3782 bool sign_adjust = false;
3783 enum mult_variant variant;
3784 struct algorithm alg;
3785 rtx tem;
3786 bool speed = optimize_insn_for_speed_p ();
3787
3788 /* We can't support modes wider than HOST_BITS_PER_INT. */
3789 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3790
3791 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3792
3793 /* We can't optimize modes wider than BITS_PER_WORD.
3794 ??? We might be able to perform double-word arithmetic if
3795 mode == word_mode, however all the cost calculations in
3796 synth_mult etc. assume single-word operations. */
3797 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3798 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3799 return expmed_mult_highpart_optab (mode, op0, op1, target,
3800<