1 | /* Medium-level subroutines: convert bit-field store and extract |
2 | and shifts, multiplies and divides to rtl instructions. |
3 | Copyright (C) 1987-2024 Free Software Foundation, Inc. |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* Work around tree-optimization/91825. */ |
22 | #pragma GCC diagnostic warning "-Wmaybe-uninitialized" |
23 | |
24 | #include "config.h" |
25 | #include "system.h" |
26 | #include "coretypes.h" |
27 | #include "backend.h" |
28 | #include "target.h" |
29 | #include "rtl.h" |
30 | #include "tree.h" |
31 | #include "predict.h" |
32 | #include "memmodel.h" |
33 | #include "tm_p.h" |
34 | #include "optabs.h" |
35 | #include "expmed.h" |
36 | #include "regs.h" |
37 | #include "emit-rtl.h" |
38 | #include "diagnostic-core.h" |
39 | #include "fold-const.h" |
40 | #include "stor-layout.h" |
41 | #include "dojump.h" |
42 | #include "explow.h" |
43 | #include "expr.h" |
44 | #include "langhooks.h" |
45 | #include "tree-vector-builder.h" |
46 | #include "recog.h" |
47 | |
48 | struct target_expmed default_target_expmed; |
49 | #if SWITCHABLE_TARGET |
50 | struct target_expmed *this_target_expmed = &default_target_expmed; |
51 | #endif |
52 | |
53 | static bool store_integral_bit_field (rtx, opt_scalar_int_mode, |
54 | unsigned HOST_WIDE_INT, |
55 | unsigned HOST_WIDE_INT, |
56 | poly_uint64, poly_uint64, |
57 | machine_mode, rtx, bool, bool); |
58 | static void store_fixed_bit_field (rtx, opt_scalar_int_mode, |
59 | unsigned HOST_WIDE_INT, |
60 | unsigned HOST_WIDE_INT, |
61 | poly_uint64, poly_uint64, |
62 | rtx, scalar_int_mode, bool); |
63 | static void store_fixed_bit_field_1 (rtx, scalar_int_mode, |
64 | unsigned HOST_WIDE_INT, |
65 | unsigned HOST_WIDE_INT, |
66 | rtx, scalar_int_mode, bool); |
67 | static void store_split_bit_field (rtx, opt_scalar_int_mode, |
68 | unsigned HOST_WIDE_INT, |
69 | unsigned HOST_WIDE_INT, |
70 | poly_uint64, poly_uint64, |
71 | rtx, scalar_int_mode, bool); |
72 | static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode, |
73 | unsigned HOST_WIDE_INT, |
74 | unsigned HOST_WIDE_INT, int, rtx, |
75 | machine_mode, machine_mode, bool, bool); |
76 | static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, |
77 | unsigned HOST_WIDE_INT, |
78 | unsigned HOST_WIDE_INT, rtx, int, bool); |
79 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, |
80 | unsigned HOST_WIDE_INT, |
81 | unsigned HOST_WIDE_INT, rtx, int, bool); |
82 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |
83 | static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, |
84 | unsigned HOST_WIDE_INT, |
85 | unsigned HOST_WIDE_INT, int, bool); |
86 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |
87 | static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
88 | static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |
89 | |
90 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |
91 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. |
92 | The mask is truncated if necessary to the width of mode MODE. The |
93 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ |
94 | |
95 | static inline rtx |
96 | mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) |
97 | { |
98 | return immed_wide_int_const |
99 | (wi::shifted_mask (start: bitpos, width: bitsize, negate_p: complement, |
100 | precision: GET_MODE_PRECISION (mode)), mode); |
101 | } |
102 | |
103 | /* Test whether a value is zero of a power of two. */ |
104 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |
105 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |
106 | |
107 | struct init_expmed_rtl |
108 | { |
109 | rtx reg; |
110 | rtx plus; |
111 | rtx neg; |
112 | rtx mult; |
113 | rtx sdiv; |
114 | rtx udiv; |
115 | rtx sdiv_32; |
116 | rtx smod_32; |
117 | rtx wide_mult; |
118 | rtx wide_lshr; |
119 | rtx wide_trunc; |
120 | rtx shift; |
121 | rtx shift_mult; |
122 | rtx shift_add; |
123 | rtx shift_sub0; |
124 | rtx shift_sub1; |
125 | rtx zext; |
126 | rtx trunc; |
127 | |
128 | rtx pow2[MAX_BITS_PER_WORD]; |
129 | rtx cint[MAX_BITS_PER_WORD]; |
130 | }; |
131 | |
132 | static void |
133 | init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, |
134 | scalar_int_mode from_mode, bool speed) |
135 | { |
136 | int to_size, from_size; |
137 | rtx which; |
138 | |
139 | to_size = GET_MODE_PRECISION (mode: to_mode); |
140 | from_size = GET_MODE_PRECISION (mode: from_mode); |
141 | |
142 | /* Most partial integers have a precision less than the "full" |
143 | integer it requires for storage. In case one doesn't, for |
144 | comparison purposes here, reduce the bit size by one in that |
145 | case. */ |
146 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT |
147 | && pow2p_hwi (x: to_size)) |
148 | to_size --; |
149 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT |
150 | && pow2p_hwi (x: from_size)) |
151 | from_size --; |
152 | |
153 | /* Assume cost of zero-extend and sign-extend is the same. */ |
154 | which = (to_size < from_size ? all->trunc : all->zext); |
155 | |
156 | PUT_MODE (x: all->reg, mode: from_mode); |
157 | set_convert_cost (to_mode, from_mode, speed, |
158 | cost: set_src_cost (x: which, mode: to_mode, speed_p: speed)); |
159 | /* Restore all->reg's mode. */ |
160 | PUT_MODE (x: all->reg, mode: to_mode); |
161 | } |
162 | |
163 | static void |
164 | init_expmed_one_mode (struct init_expmed_rtl *all, |
165 | machine_mode mode, int speed) |
166 | { |
167 | int m, n, mode_bitsize; |
168 | machine_mode mode_from; |
169 | |
170 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
171 | |
172 | PUT_MODE (x: all->reg, mode); |
173 | PUT_MODE (x: all->plus, mode); |
174 | PUT_MODE (x: all->neg, mode); |
175 | PUT_MODE (x: all->mult, mode); |
176 | PUT_MODE (x: all->sdiv, mode); |
177 | PUT_MODE (x: all->udiv, mode); |
178 | PUT_MODE (x: all->sdiv_32, mode); |
179 | PUT_MODE (x: all->smod_32, mode); |
180 | PUT_MODE (x: all->wide_trunc, mode); |
181 | PUT_MODE (x: all->shift, mode); |
182 | PUT_MODE (x: all->shift_mult, mode); |
183 | PUT_MODE (x: all->shift_add, mode); |
184 | PUT_MODE (x: all->shift_sub0, mode); |
185 | PUT_MODE (x: all->shift_sub1, mode); |
186 | PUT_MODE (x: all->zext, mode); |
187 | PUT_MODE (x: all->trunc, mode); |
188 | |
189 | set_add_cost (speed, mode, cost: set_src_cost (x: all->plus, mode, speed_p: speed)); |
190 | set_neg_cost (speed, mode, cost: set_src_cost (x: all->neg, mode, speed_p: speed)); |
191 | set_mul_cost (speed, mode, cost: set_src_cost (x: all->mult, mode, speed_p: speed)); |
192 | set_sdiv_cost (speed, mode, cost: set_src_cost (x: all->sdiv, mode, speed_p: speed)); |
193 | set_udiv_cost (speed, mode, cost: set_src_cost (x: all->udiv, mode, speed_p: speed)); |
194 | |
195 | set_sdiv_pow2_cheap (speed, mode, cheap_p: (set_src_cost (x: all->sdiv_32, mode, speed_p: speed) |
196 | <= 2 * add_cost (speed, mode))); |
197 | set_smod_pow2_cheap (speed, mode, cheap: (set_src_cost (x: all->smod_32, mode, speed_p: speed) |
198 | <= 4 * add_cost (speed, mode))); |
199 | |
200 | set_shift_cost (speed, mode, bits: 0, cost: 0); |
201 | { |
202 | int cost = add_cost (speed, mode); |
203 | set_shiftadd_cost (speed, mode, bits: 0, cost); |
204 | set_shiftsub0_cost (speed, mode, bits: 0, cost); |
205 | set_shiftsub1_cost (speed, mode, bits: 0, cost); |
206 | } |
207 | |
208 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); |
209 | for (m = 1; m < n; m++) |
210 | { |
211 | XEXP (all->shift, 1) = all->cint[m]; |
212 | XEXP (all->shift_mult, 1) = all->pow2[m]; |
213 | |
214 | set_shift_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift, mode, speed_p: speed)); |
215 | set_shiftadd_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_add, mode, |
216 | speed_p: speed)); |
217 | set_shiftsub0_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_sub0, mode, |
218 | speed_p: speed)); |
219 | set_shiftsub1_cost (speed, mode, bits: m, cost: set_src_cost (x: all->shift_sub1, mode, |
220 | speed_p: speed)); |
221 | } |
222 | |
223 | scalar_int_mode int_mode_to; |
224 | if (is_a <scalar_int_mode> (m: mode, result: &int_mode_to)) |
225 | { |
226 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |
227 | mode_from = (machine_mode)(mode_from + 1)) |
228 | init_expmed_one_conv (all, to_mode: int_mode_to, |
229 | from_mode: as_a <scalar_int_mode> (m: mode_from), speed); |
230 | |
231 | scalar_int_mode wider_mode; |
232 | if (GET_MODE_CLASS (int_mode_to) == MODE_INT |
233 | && GET_MODE_WIDER_MODE (m: int_mode_to).exists (mode: &wider_mode)) |
234 | { |
235 | PUT_MODE (x: all->reg, mode); |
236 | PUT_MODE (x: all->zext, mode: wider_mode); |
237 | PUT_MODE (x: all->wide_mult, mode: wider_mode); |
238 | PUT_MODE (x: all->wide_lshr, mode: wider_mode); |
239 | XEXP (all->wide_lshr, 1) |
240 | = gen_int_shift_amount (wider_mode, mode_bitsize); |
241 | |
242 | set_mul_widen_cost (speed, mode: wider_mode, |
243 | cost: set_src_cost (x: all->wide_mult, mode: wider_mode, speed_p: speed)); |
244 | set_mul_highpart_cost (speed, mode: int_mode_to, |
245 | cost: set_src_cost (x: all->wide_trunc, |
246 | mode: int_mode_to, speed_p: speed)); |
247 | } |
248 | } |
249 | } |
250 | |
251 | void |
252 | init_expmed (void) |
253 | { |
254 | struct init_expmed_rtl all; |
255 | machine_mode mode = QImode; |
256 | int m, speed; |
257 | |
258 | memset (s: &all, c: 0, n: sizeof all); |
259 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |
260 | { |
261 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |
262 | all.cint[m] = GEN_INT (m); |
263 | } |
264 | |
265 | /* Avoid using hard regs in ways which may be unsupported. */ |
266 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
267 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |
268 | all.neg = gen_rtx_NEG (mode, all.reg); |
269 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); |
270 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); |
271 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); |
272 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); |
273 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); |
274 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); |
275 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); |
276 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); |
277 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); |
278 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); |
279 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); |
280 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); |
281 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); |
282 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); |
283 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); |
284 | |
285 | for (speed = 0; speed < 2; speed++) |
286 | { |
287 | crtl->maybe_hot_insn_p = speed; |
288 | set_zero_cost (speed, cost: set_src_cost (const0_rtx, mode, speed_p: speed)); |
289 | |
290 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |
291 | mode = (machine_mode)(mode + 1)) |
292 | init_expmed_one_mode (all: &all, mode, speed); |
293 | |
294 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |
295 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; |
296 | mode = (machine_mode)(mode + 1)) |
297 | init_expmed_one_mode (all: &all, mode, speed); |
298 | |
299 | if (MIN_MODE_VECTOR_INT != VOIDmode) |
300 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; |
301 | mode = (machine_mode)(mode + 1)) |
302 | init_expmed_one_mode (all: &all, mode, speed); |
303 | } |
304 | |
305 | if (alg_hash_used_p ()) |
306 | { |
307 | struct alg_hash_entry *p = alg_hash_entry_ptr (idx: 0); |
308 | memset (s: p, c: 0, n: sizeof (*p) * NUM_ALG_HASH_ENTRIES); |
309 | } |
310 | else |
311 | set_alg_hash_used_p (true); |
312 | default_rtl_profile (); |
313 | |
314 | ggc_free (all.trunc); |
315 | ggc_free (all.shift_sub1); |
316 | ggc_free (all.shift_sub0); |
317 | ggc_free (all.shift_add); |
318 | ggc_free (all.shift_mult); |
319 | ggc_free (all.shift); |
320 | ggc_free (all.wide_trunc); |
321 | ggc_free (all.wide_lshr); |
322 | ggc_free (all.wide_mult); |
323 | ggc_free (all.zext); |
324 | ggc_free (all.smod_32); |
325 | ggc_free (all.sdiv_32); |
326 | ggc_free (all.udiv); |
327 | ggc_free (all.sdiv); |
328 | ggc_free (all.mult); |
329 | ggc_free (all.neg); |
330 | ggc_free (all.plus); |
331 | ggc_free (all.reg); |
332 | } |
333 | |
334 | /* Return an rtx representing minus the value of X. |
335 | MODE is the intended mode of the result, |
336 | useful if X is a CONST_INT. */ |
337 | |
338 | rtx |
339 | negate_rtx (machine_mode mode, rtx x) |
340 | { |
341 | rtx result = simplify_unary_operation (code: NEG, mode, op: x, op_mode: mode); |
342 | |
343 | if (result == 0) |
344 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
345 | |
346 | return result; |
347 | } |
348 | |
349 | /* Whether reverse storage order is supported on the target. */ |
350 | static int reverse_storage_order_supported = -1; |
351 | |
352 | /* Check whether reverse storage order is supported on the target. */ |
353 | |
354 | static void |
355 | check_reverse_storage_order_support (void) |
356 | { |
357 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) |
358 | { |
359 | reverse_storage_order_supported = 0; |
360 | sorry ("reverse scalar storage order" ); |
361 | } |
362 | else |
363 | reverse_storage_order_supported = 1; |
364 | } |
365 | |
366 | /* Whether reverse FP storage order is supported on the target. */ |
367 | static int reverse_float_storage_order_supported = -1; |
368 | |
369 | /* Check whether reverse FP storage order is supported on the target. */ |
370 | |
371 | static void |
372 | check_reverse_float_storage_order_support (void) |
373 | { |
374 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) |
375 | { |
376 | reverse_float_storage_order_supported = 0; |
377 | sorry ("reverse floating-point scalar storage order" ); |
378 | } |
379 | else |
380 | reverse_float_storage_order_supported = 1; |
381 | } |
382 | |
383 | /* Return an rtx representing value of X with reverse storage order. |
384 | MODE is the intended mode of the result, |
385 | useful if X is a CONST_INT. */ |
386 | |
387 | rtx |
388 | flip_storage_order (machine_mode mode, rtx x) |
389 | { |
390 | scalar_int_mode int_mode; |
391 | rtx result; |
392 | |
393 | if (mode == QImode) |
394 | return x; |
395 | |
396 | if (COMPLEX_MODE_P (mode)) |
397 | { |
398 | rtx real = read_complex_part (x, false); |
399 | rtx imag = read_complex_part (x, true); |
400 | |
401 | real = flip_storage_order (GET_MODE_INNER (mode), x: real); |
402 | imag = flip_storage_order (GET_MODE_INNER (mode), x: imag); |
403 | |
404 | return gen_rtx_CONCAT (mode, real, imag); |
405 | } |
406 | |
407 | if (UNLIKELY (reverse_storage_order_supported < 0)) |
408 | check_reverse_storage_order_support (); |
409 | |
410 | if (!is_a <scalar_int_mode> (m: mode, result: &int_mode)) |
411 | { |
412 | if (FLOAT_MODE_P (mode) |
413 | && UNLIKELY (reverse_float_storage_order_supported < 0)) |
414 | check_reverse_float_storage_order_support (); |
415 | |
416 | if (!int_mode_for_size (size: GET_MODE_PRECISION (mode), limit: 0).exists (mode: &int_mode) |
417 | || !targetm.scalar_mode_supported_p (int_mode)) |
418 | { |
419 | sorry ("reverse storage order for %smode" , GET_MODE_NAME (mode)); |
420 | return x; |
421 | } |
422 | x = gen_lowpart (int_mode, x); |
423 | } |
424 | |
425 | result = simplify_unary_operation (code: BSWAP, mode: int_mode, op: x, op_mode: int_mode); |
426 | if (result == 0) |
427 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); |
428 | |
429 | if (int_mode != mode) |
430 | result = gen_lowpart (mode, result); |
431 | |
432 | return result; |
433 | } |
434 | |
435 | /* If MODE is set, adjust bitfield memory MEM so that it points to the |
436 | first unit of mode MODE that contains a bitfield of size BITSIZE at |
437 | bit position BITNUM. If MODE is not set, return a BLKmode reference |
438 | to every byte in the bitfield. Set *NEW_BITNUM to the bit position |
439 | of the field within the new memory. */ |
440 | |
441 | static rtx |
442 | narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, |
443 | unsigned HOST_WIDE_INT bitsize, |
444 | unsigned HOST_WIDE_INT bitnum, |
445 | unsigned HOST_WIDE_INT *new_bitnum) |
446 | { |
447 | scalar_int_mode imode; |
448 | if (mode.exists (mode: &imode)) |
449 | { |
450 | unsigned int unit = GET_MODE_BITSIZE (mode: imode); |
451 | *new_bitnum = bitnum % unit; |
452 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; |
453 | return adjust_bitfield_address (mem, imode, offset); |
454 | } |
455 | else |
456 | { |
457 | *new_bitnum = bitnum % BITS_PER_UNIT; |
458 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; |
459 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) |
460 | / BITS_PER_UNIT); |
461 | return adjust_bitfield_address_size (mem, BLKmode, offset, size); |
462 | } |
463 | } |
464 | |
465 | /* The caller wants to perform insertion or extraction PATTERN on a |
466 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. |
467 | BITREGION_START and BITREGION_END are as for store_bit_field |
468 | and FIELDMODE is the natural mode of the field. |
469 | |
470 | Search for a mode that is compatible with the memory access |
471 | restrictions and (where applicable) with a register insertion or |
472 | extraction. Return the new memory on success, storing the adjusted |
473 | bit position in *NEW_BITNUM. Return null otherwise. */ |
474 | |
475 | static rtx |
476 | (enum extraction_pattern pattern, |
477 | rtx op0, HOST_WIDE_INT bitsize, |
478 | HOST_WIDE_INT bitnum, |
479 | poly_uint64 bitregion_start, |
480 | poly_uint64 bitregion_end, |
481 | machine_mode fieldmode, |
482 | unsigned HOST_WIDE_INT *new_bitnum) |
483 | { |
484 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, |
485 | bitregion_end, MEM_ALIGN (op0), |
486 | MEM_VOLATILE_P (op0)); |
487 | scalar_int_mode best_mode; |
488 | if (iter.next_mode (&best_mode)) |
489 | { |
490 | /* We can use a memory in BEST_MODE. See whether this is true for |
491 | any wider modes. All other things being equal, we prefer to |
492 | use the widest mode possible because it tends to expose more |
493 | CSE opportunities. */ |
494 | if (!iter.prefer_smaller_modes ()) |
495 | { |
496 | /* Limit the search to the mode required by the corresponding |
497 | register insertion or extraction instruction, if any. */ |
498 | scalar_int_mode limit_mode = word_mode; |
499 | extraction_insn insn; |
500 | if (get_best_reg_extraction_insn (&insn, pattern, |
501 | GET_MODE_BITSIZE (mode: best_mode), |
502 | fieldmode)) |
503 | limit_mode = insn.field_mode; |
504 | |
505 | scalar_int_mode wider_mode; |
506 | while (iter.next_mode (&wider_mode) |
507 | && GET_MODE_SIZE (mode: wider_mode) <= GET_MODE_SIZE (mode: limit_mode)) |
508 | best_mode = wider_mode; |
509 | } |
510 | return narrow_bit_field_mem (mem: op0, mode: best_mode, bitsize, bitnum, |
511 | new_bitnum); |
512 | } |
513 | return NULL_RTX; |
514 | } |
515 | |
516 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |
517 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg |
518 | offset is then BITNUM / BITS_PER_UNIT. */ |
519 | |
520 | static bool |
521 | lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize, |
522 | machine_mode struct_mode) |
523 | { |
524 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode); |
525 | if (BYTES_BIG_ENDIAN) |
526 | return (multiple_p (a: bitnum, BITS_PER_UNIT) |
527 | && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode)) |
528 | || multiple_p (a: bitnum + bitsize, |
529 | b: regsize * BITS_PER_UNIT))); |
530 | else |
531 | return multiple_p (a: bitnum, b: regsize * BITS_PER_UNIT); |
532 | } |
533 | |
534 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |
535 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |
536 | Return false if the access would touch memory outside the range |
537 | BITREGION_START to BITREGION_END for conformance to the C++ memory |
538 | model. */ |
539 | |
540 | static bool |
541 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, |
542 | unsigned HOST_WIDE_INT bitnum, |
543 | scalar_int_mode fieldmode, |
544 | poly_uint64 bitregion_start, |
545 | poly_uint64 bitregion_end) |
546 | { |
547 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (mode: fieldmode); |
548 | |
549 | /* -fstrict-volatile-bitfields must be enabled and we must have a |
550 | volatile MEM. */ |
551 | if (!MEM_P (op0) |
552 | || !MEM_VOLATILE_P (op0) |
553 | || flag_strict_volatile_bitfields <= 0) |
554 | return false; |
555 | |
556 | /* The bit size must not be larger than the field mode, and |
557 | the field mode must not be larger than a word. */ |
558 | if (bitsize > modesize || modesize > BITS_PER_WORD) |
559 | return false; |
560 | |
561 | /* Check for cases of unaligned fields that must be split. */ |
562 | if (bitnum % modesize + bitsize > modesize) |
563 | return false; |
564 | |
565 | /* The memory must be sufficiently aligned for a MODESIZE access. |
566 | This condition guarantees, that the memory access will not |
567 | touch anything after the end of the structure. */ |
568 | if (MEM_ALIGN (op0) < modesize) |
569 | return false; |
570 | |
571 | /* Check for cases where the C++ memory model applies. */ |
572 | if (maybe_ne (a: bitregion_end, b: 0U) |
573 | && (maybe_lt (a: bitnum - bitnum % modesize, b: bitregion_start) |
574 | || maybe_gt (bitnum - bitnum % modesize + modesize - 1, |
575 | bitregion_end))) |
576 | return false; |
577 | |
578 | return true; |
579 | } |
580 | |
581 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |
582 | bit number BITNUM can be treated as a simple value of mode MODE. |
583 | Store the byte offset in *BYTENUM if so. */ |
584 | |
585 | static bool |
586 | simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum, |
587 | machine_mode mode, poly_uint64 *bytenum) |
588 | { |
589 | return (MEM_P (op0) |
590 | && multiple_p (a: bitnum, BITS_PER_UNIT, multiple: bytenum) |
591 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) |
592 | && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) |
593 | || (multiple_p (a: bitnum, GET_MODE_ALIGNMENT (mode)) |
594 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); |
595 | } |
596 | |
597 | /* Try to use instruction INSV to store VALUE into a field of OP0. |
598 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a |
599 | BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM |
600 | are as for store_bit_field. */ |
601 | |
602 | static bool |
603 | (const extraction_insn *insv, rtx op0, |
604 | opt_scalar_int_mode op0_mode, |
605 | unsigned HOST_WIDE_INT bitsize, |
606 | unsigned HOST_WIDE_INT bitnum, |
607 | rtx value, scalar_int_mode value_mode) |
608 | { |
609 | class expand_operand ops[4]; |
610 | rtx value1; |
611 | rtx xop0 = op0; |
612 | rtx_insn *last = get_last_insn (); |
613 | bool copy_back = false; |
614 | |
615 | scalar_int_mode op_mode = insv->field_mode; |
616 | unsigned int unit = GET_MODE_BITSIZE (mode: op_mode); |
617 | if (bitsize == 0 || bitsize > unit) |
618 | return false; |
619 | |
620 | if (MEM_P (xop0)) |
621 | /* Get a reference to the first byte of the field. */ |
622 | xop0 = narrow_bit_field_mem (mem: xop0, mode: insv->struct_mode, bitsize, bitnum, |
623 | new_bitnum: &bitnum); |
624 | else |
625 | { |
626 | /* Convert from counting within OP0 to counting in OP_MODE. */ |
627 | if (BYTES_BIG_ENDIAN) |
628 | bitnum += unit - GET_MODE_BITSIZE (mode: op0_mode.require ()); |
629 | |
630 | /* If xop0 is a register, we need it in OP_MODE |
631 | to make it acceptable to the format of insv. */ |
632 | if (GET_CODE (xop0) == SUBREG) |
633 | { |
634 | /* If such a SUBREG can't be created, give up. */ |
635 | if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)), |
636 | SUBREG_REG (xop0), SUBREG_BYTE (xop0))) |
637 | return false; |
638 | /* We can't just change the mode, because this might clobber op0, |
639 | and we will need the original value of op0 if insv fails. */ |
640 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), |
641 | SUBREG_BYTE (xop0)); |
642 | } |
643 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) |
644 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); |
645 | } |
646 | |
647 | /* If the destination is a paradoxical subreg such that we need a |
648 | truncate to the inner mode, perform the insertion on a temporary and |
649 | truncate the result to the original destination. Note that we can't |
650 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N |
651 | X) 0)) is (reg:N X). */ |
652 | if (GET_CODE (xop0) == SUBREG |
653 | && REG_P (SUBREG_REG (xop0)) |
654 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), |
655 | op_mode)) |
656 | { |
657 | rtx tem = gen_reg_rtx (op_mode); |
658 | emit_move_insn (tem, xop0); |
659 | xop0 = tem; |
660 | copy_back = true; |
661 | } |
662 | |
663 | /* There are similar overflow check at the start of store_bit_field_1, |
664 | but that only check the situation where the field lies completely |
665 | outside the register, while there do have situation where the field |
666 | lies partialy in the register, we need to adjust bitsize for this |
667 | partial overflow situation. Without this fix, pr48335-2.c on big-endian |
668 | will broken on those arch support bit insert instruction, like arm, aarch64 |
669 | etc. */ |
670 | if (bitsize + bitnum > unit && bitnum < unit) |
671 | { |
672 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |
673 | "destination object, data truncated into %wu-bit" , |
674 | bitsize, unit - bitnum); |
675 | bitsize = unit - bitnum; |
676 | } |
677 | |
678 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
679 | "backwards" from the size of the unit we are inserting into. |
680 | Otherwise, we count bits from the most significant on a |
681 | BYTES/BITS_BIG_ENDIAN machine. */ |
682 | |
683 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
684 | bitnum = unit - bitsize - bitnum; |
685 | |
686 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
687 | value1 = value; |
688 | if (value_mode != op_mode) |
689 | { |
690 | if (GET_MODE_BITSIZE (mode: value_mode) >= bitsize) |
691 | { |
692 | rtx tmp; |
693 | /* Optimization: Don't bother really extending VALUE |
694 | if it has all the bits we will actually use. However, |
695 | if we must narrow it, be sure we do it correctly. */ |
696 | |
697 | if (GET_MODE_SIZE (mode: value_mode) < GET_MODE_SIZE (mode: op_mode)) |
698 | { |
699 | tmp = simplify_subreg (outermode: op_mode, op: value1, innermode: value_mode, byte: 0); |
700 | if (! tmp) |
701 | tmp = simplify_gen_subreg (outermode: op_mode, |
702 | op: force_reg (value_mode, value1), |
703 | innermode: value_mode, byte: 0); |
704 | } |
705 | else |
706 | { |
707 | tmp = gen_lowpart_if_possible (op_mode, value1); |
708 | if (! tmp) |
709 | tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); |
710 | } |
711 | value1 = tmp; |
712 | } |
713 | else if (CONST_INT_P (value)) |
714 | value1 = gen_int_mode (INTVAL (value), op_mode); |
715 | else |
716 | /* Parse phase is supposed to make VALUE's data type |
717 | match that of the component reference, which is a type |
718 | at least as wide as the field; so VALUE should have |
719 | a mode that corresponds to that type. */ |
720 | gcc_assert (CONSTANT_P (value)); |
721 | } |
722 | |
723 | create_fixed_operand (op: &ops[0], x: xop0); |
724 | create_integer_operand (&ops[1], bitsize); |
725 | create_integer_operand (&ops[2], bitnum); |
726 | create_input_operand (op: &ops[3], value: value1, mode: op_mode); |
727 | if (maybe_expand_insn (icode: insv->icode, nops: 4, ops)) |
728 | { |
729 | if (copy_back) |
730 | convert_move (op0, xop0, true); |
731 | return true; |
732 | } |
733 | delete_insns_since (last); |
734 | return false; |
735 | } |
736 | |
737 | /* A subroutine of store_bit_field, with the same arguments. Return true |
738 | if the operation could be implemented. |
739 | |
740 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
741 | no other way of implementing the operation. If FALLBACK_P is false, |
742 | return false instead. |
743 | |
744 | if UNDEFINED_P is true then STR_RTX is undefined and may be set using |
745 | a subreg instead. */ |
746 | |
747 | static bool |
748 | store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
749 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
750 | machine_mode fieldmode, |
751 | rtx value, bool reverse, bool fallback_p, bool undefined_p) |
752 | { |
753 | rtx op0 = str_rtx; |
754 | |
755 | while (GET_CODE (op0) == SUBREG) |
756 | { |
757 | bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; |
758 | op0 = SUBREG_REG (op0); |
759 | } |
760 | |
761 | /* No action is needed if the target is a register and if the field |
762 | lies completely outside that register. This can occur if the source |
763 | code contains an out-of-bounds access to a small array. */ |
764 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
765 | return true; |
766 | |
767 | /* Use vec_set patterns for inserting parts of vectors whenever |
768 | available. */ |
769 | machine_mode outermode = GET_MODE (op0); |
770 | scalar_mode innermode = GET_MODE_INNER (outermode); |
771 | poly_uint64 pos; |
772 | if (VECTOR_MODE_P (outermode) |
773 | && !MEM_P (op0) |
774 | && optab_handler (op: vec_set_optab, mode: outermode) != CODE_FOR_nothing |
775 | && fieldmode == innermode |
776 | && known_eq (bitsize, GET_MODE_PRECISION (innermode)) |
777 | && multiple_p (a: bitnum, b: GET_MODE_PRECISION (mode: innermode), multiple: &pos)) |
778 | { |
779 | class expand_operand ops[3]; |
780 | enum insn_code icode = optab_handler (op: vec_set_optab, mode: outermode); |
781 | |
782 | create_fixed_operand (op: &ops[0], x: op0); |
783 | create_input_operand (op: &ops[1], value, mode: innermode); |
784 | create_integer_operand (&ops[2], pos); |
785 | if (maybe_expand_insn (icode, nops: 3, ops)) |
786 | return true; |
787 | } |
788 | |
789 | /* If the target is a register, overwriting the entire object, or storing |
790 | a full-word or multi-word field can be done with just a SUBREG. */ |
791 | if (!MEM_P (op0) |
792 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))) |
793 | { |
794 | /* Use the subreg machinery either to narrow OP0 to the required |
795 | words or to cope with mode punning between equal-sized modes. |
796 | In the latter case, use subreg on the rhs side, not lhs. */ |
797 | rtx sub; |
798 | poly_uint64 bytenum; |
799 | poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0)); |
800 | if (known_eq (bitnum, 0U) |
801 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) |
802 | { |
803 | sub = simplify_gen_subreg (GET_MODE (op0), op: value, innermode: fieldmode, byte: 0); |
804 | if (sub) |
805 | { |
806 | if (reverse) |
807 | sub = flip_storage_order (GET_MODE (op0), x: sub); |
808 | emit_move_insn (op0, sub); |
809 | return true; |
810 | } |
811 | } |
812 | else if (multiple_p (a: bitnum, BITS_PER_UNIT, multiple: &bytenum) |
813 | && (undefined_p |
814 | || (multiple_p (a: bitnum, b: regsize * BITS_PER_UNIT) |
815 | && multiple_p (a: bitsize, b: regsize * BITS_PER_UNIT))) |
816 | && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize)) |
817 | { |
818 | sub = simplify_gen_subreg (outermode: fieldmode, op: op0, GET_MODE (op0), byte: bytenum); |
819 | if (sub) |
820 | { |
821 | if (reverse) |
822 | value = flip_storage_order (mode: fieldmode, x: value); |
823 | emit_move_insn (sub, value); |
824 | return true; |
825 | } |
826 | } |
827 | } |
828 | |
829 | /* If the target is memory, storing any naturally aligned field can be |
830 | done with a simple store. For targets that support fast unaligned |
831 | memory, any naturally sized, unit aligned field can be done directly. */ |
832 | poly_uint64 bytenum; |
833 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode: fieldmode, bytenum: &bytenum)) |
834 | { |
835 | op0 = adjust_bitfield_address (op0, fieldmode, bytenum); |
836 | if (reverse) |
837 | value = flip_storage_order (mode: fieldmode, x: value); |
838 | emit_move_insn (op0, value); |
839 | return true; |
840 | } |
841 | |
842 | /* It's possible we'll need to handle other cases here for |
843 | polynomial bitnum and bitsize. */ |
844 | |
845 | /* From here on we need to be looking at a fixed-size insertion. */ |
846 | unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant (); |
847 | unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant (); |
848 | |
849 | /* Make sure we are playing with integral modes. Pun with subregs |
850 | if we aren't. This must come after the entire register case above, |
851 | since that case is valid for any mode. The following cases are only |
852 | valid for integral modes. */ |
853 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
854 | scalar_int_mode imode; |
855 | if (!op0_mode.exists (mode: &imode) || imode != GET_MODE (op0)) |
856 | { |
857 | if (MEM_P (op0)) |
858 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
859 | 0, MEM_SIZE (op0)); |
860 | else if (!op0_mode.exists ()) |
861 | { |
862 | if (ibitnum == 0 |
863 | && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0))) |
864 | && MEM_P (value) |
865 | && !reverse) |
866 | { |
867 | value = adjust_address (value, GET_MODE (op0), 0); |
868 | emit_move_insn (op0, value); |
869 | return true; |
870 | } |
871 | if (!fallback_p) |
872 | return false; |
873 | rtx temp = assign_stack_temp (GET_MODE (op0), |
874 | GET_MODE_SIZE (GET_MODE (op0))); |
875 | emit_move_insn (temp, op0); |
876 | store_bit_field_1 (str_rtx: temp, bitsize, bitnum, bitregion_start: 0, bitregion_end: 0, fieldmode, value, |
877 | reverse, fallback_p, undefined_p); |
878 | emit_move_insn (op0, temp); |
879 | return true; |
880 | } |
881 | else |
882 | op0 = gen_lowpart (op0_mode.require (), op0); |
883 | } |
884 | |
885 | return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum, |
886 | bitregion_start, bitregion_end, |
887 | fieldmode, value, reverse, fallback_p); |
888 | } |
889 | |
890 | /* Subroutine of store_bit_field_1, with the same arguments, except |
891 | that BITSIZE and BITNUM are constant. Handle cases specific to |
892 | integral modes. If OP0_MODE is defined, it is the mode of OP0, |
893 | otherwise OP0 is a BLKmode MEM. */ |
894 | |
895 | static bool |
896 | store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
897 | unsigned HOST_WIDE_INT bitsize, |
898 | unsigned HOST_WIDE_INT bitnum, |
899 | poly_uint64 bitregion_start, |
900 | poly_uint64 bitregion_end, |
901 | machine_mode fieldmode, |
902 | rtx value, bool reverse, bool fallback_p) |
903 | { |
904 | /* Storing an lsb-aligned field in a register |
905 | can be done with a movstrict instruction. */ |
906 | |
907 | if (!MEM_P (op0) |
908 | && !reverse |
909 | && lowpart_bit_field_p (bitnum, bitsize, struct_mode: op0_mode.require ()) |
910 | && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)) |
911 | && optab_handler (op: movstrict_optab, mode: fieldmode) != CODE_FOR_nothing) |
912 | { |
913 | class expand_operand ops[2]; |
914 | enum insn_code icode = optab_handler (op: movstrict_optab, mode: fieldmode); |
915 | rtx arg0 = op0; |
916 | unsigned HOST_WIDE_INT subreg_off; |
917 | |
918 | if (GET_CODE (arg0) == SUBREG) |
919 | { |
920 | /* Else we've got some float mode source being extracted into |
921 | a different float mode destination -- this combination of |
922 | subregs results in Severe Tire Damage. */ |
923 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |
924 | || GET_MODE_CLASS (fieldmode) == MODE_INT |
925 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); |
926 | arg0 = SUBREG_REG (arg0); |
927 | } |
928 | |
929 | subreg_off = bitnum / BITS_PER_UNIT; |
930 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off) |
931 | /* STRICT_LOW_PART must have a non-paradoxical subreg as |
932 | operand. */ |
933 | && !paradoxical_subreg_p (outermode: fieldmode, GET_MODE (arg0))) |
934 | { |
935 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); |
936 | |
937 | create_fixed_operand (op: &ops[0], x: arg0); |
938 | /* Shrink the source operand to FIELDMODE. */ |
939 | create_convert_operand_to (op: &ops[1], value, mode: fieldmode, unsigned_p: false); |
940 | if (maybe_expand_insn (icode, nops: 2, ops)) |
941 | return true; |
942 | } |
943 | } |
944 | |
945 | /* Handle fields bigger than a word. */ |
946 | |
947 | if (bitsize > BITS_PER_WORD) |
948 | { |
949 | /* Here we transfer the words of the field |
950 | in the order least significant first. |
951 | This is because the most significant word is the one which may |
952 | be less than full. |
953 | However, only do that if the value is not BLKmode. */ |
954 | |
955 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
956 | const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
957 | rtx_insn *last; |
958 | |
959 | /* This is the mode we must force value to, so that there will be enough |
960 | subwords to extract. Note that fieldmode will often (always?) be |
961 | VOIDmode, because that is what store_field uses to indicate that this |
962 | is a bit field, but passing VOIDmode to operand_subword_force |
963 | is not allowed. |
964 | |
965 | The mode must be fixed-size, since insertions into variable-sized |
966 | objects are meant to be handled before calling this function. */ |
967 | fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value)); |
968 | if (value_mode == VOIDmode) |
969 | value_mode = smallest_int_mode_for_size (size: nwords * BITS_PER_WORD); |
970 | |
971 | last = get_last_insn (); |
972 | for (int i = 0; i < nwords; i++) |
973 | { |
974 | /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD |
975 | except maybe for the last iteration. */ |
976 | const unsigned HOST_WIDE_INT new_bitsize |
977 | = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); |
978 | /* Bit offset from the starting bit number in the target. */ |
979 | const unsigned int bit_offset |
980 | = backwards ^ reverse |
981 | ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0) |
982 | : i * BITS_PER_WORD; |
983 | /* Starting word number in the value. */ |
984 | const unsigned int wordnum |
985 | = backwards |
986 | ? GET_MODE_SIZE (mode: value_mode) / UNITS_PER_WORD - (i + 1) |
987 | : i; |
988 | /* The chunk of the value in word_mode. We use bit-field extraction |
989 | in BLKmode to handle unaligned memory references and to shift the |
990 | last chunk right on big-endian machines if need be. */ |
991 | rtx value_word |
992 | = fieldmode == BLKmode |
993 | ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD, |
994 | 1, NULL_RTX, word_mode, word_mode, false, |
995 | NULL) |
996 | : operand_subword_force (value, wordnum, value_mode); |
997 | |
998 | if (!store_bit_field_1 (str_rtx: op0, bitsize: new_bitsize, |
999 | bitnum: bitnum + bit_offset, |
1000 | bitregion_start, bitregion_end, |
1001 | fieldmode: word_mode, |
1002 | value: value_word, reverse, fallback_p, undefined_p: false)) |
1003 | { |
1004 | delete_insns_since (last); |
1005 | return false; |
1006 | } |
1007 | } |
1008 | return true; |
1009 | } |
1010 | |
1011 | /* If VALUE has a floating-point or complex mode, access it as an |
1012 | integer of the corresponding size. This can occur on a machine |
1013 | with 64 bit registers that uses SFmode for float. It can also |
1014 | occur for unaligned float or complex fields. */ |
1015 | rtx orig_value = value; |
1016 | scalar_int_mode value_mode; |
1017 | if (GET_MODE (value) == VOIDmode) |
1018 | /* By this point we've dealt with values that are bigger than a word, |
1019 | so word_mode is a conservatively correct choice. */ |
1020 | value_mode = word_mode; |
1021 | else if (!is_a <scalar_int_mode> (GET_MODE (value), result: &value_mode)) |
1022 | { |
1023 | value_mode = int_mode_for_mode (GET_MODE (value)).require (); |
1024 | value = gen_reg_rtx (value_mode); |
1025 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |
1026 | } |
1027 | |
1028 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1029 | If the region spans two words, defer to store_split_bit_field. |
1030 | Don't do this if op0 is a single hard register wider than word |
1031 | such as a float or vector register. */ |
1032 | if (!MEM_P (op0) |
1033 | && GET_MODE_SIZE (mode: op0_mode.require ()) > UNITS_PER_WORD |
1034 | && (!REG_P (op0) |
1035 | || !HARD_REGISTER_P (op0) |
1036 | || hard_regno_nregs (REGNO (op0), mode: op0_mode.require ()) != 1)) |
1037 | { |
1038 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
1039 | { |
1040 | if (!fallback_p) |
1041 | return false; |
1042 | |
1043 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1044 | bitregion_start, bitregion_end, |
1045 | value, value_mode, reverse); |
1046 | return true; |
1047 | } |
1048 | op0 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: op0_mode.require (), |
1049 | byte: bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
1050 | gcc_assert (op0); |
1051 | op0_mode = word_mode; |
1052 | bitnum %= BITS_PER_WORD; |
1053 | } |
1054 | |
1055 | /* From here on we can assume that the field to be stored in fits |
1056 | within a word. If the destination is a register, it too fits |
1057 | in a word. */ |
1058 | |
1059 | extraction_insn insv; |
1060 | if (!MEM_P (op0) |
1061 | && !reverse |
1062 | && get_best_reg_extraction_insn (&insv, EP_insv, |
1063 | GET_MODE_BITSIZE (mode: op0_mode.require ()), |
1064 | fieldmode) |
1065 | && store_bit_field_using_insv (insv: &insv, op0, op0_mode, |
1066 | bitsize, bitnum, value, value_mode)) |
1067 | return true; |
1068 | |
1069 | /* If OP0 is a memory, try copying it to a register and seeing if a |
1070 | cheap register alternative is available. */ |
1071 | if (MEM_P (op0) && !reverse) |
1072 | { |
1073 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |
1074 | fieldmode) |
1075 | && store_bit_field_using_insv (insv: &insv, op0, op0_mode, |
1076 | bitsize, bitnum, value, value_mode)) |
1077 | return true; |
1078 | |
1079 | rtx_insn *last = get_last_insn (); |
1080 | |
1081 | /* Try loading part of OP0 into a register, inserting the bitfield |
1082 | into that, and then copying the result back to OP0. */ |
1083 | unsigned HOST_WIDE_INT bitpos; |
1084 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern: EP_insv, op0, bitsize, bitnum, |
1085 | bitregion_start, bitregion_end, |
1086 | fieldmode, new_bitnum: &bitpos); |
1087 | if (xop0) |
1088 | { |
1089 | rtx tempreg = copy_to_reg (xop0); |
1090 | if (store_bit_field_1 (str_rtx: tempreg, bitsize, bitnum: bitpos, |
1091 | bitregion_start, bitregion_end, |
1092 | fieldmode, value: orig_value, reverse, fallback_p: false, undefined_p: false)) |
1093 | { |
1094 | emit_move_insn (xop0, tempreg); |
1095 | return true; |
1096 | } |
1097 | delete_insns_since (last); |
1098 | } |
1099 | } |
1100 | |
1101 | if (!fallback_p) |
1102 | return false; |
1103 | |
1104 | store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, |
1105 | bitregion_end, value, value_mode, reverse); |
1106 | return true; |
1107 | } |
1108 | |
1109 | /* Generate code to store value from rtx VALUE |
1110 | into a bit-field within structure STR_RTX |
1111 | containing BITSIZE bits starting at bit BITNUM. |
1112 | |
1113 | BITREGION_START is bitpos of the first bitfield in this region. |
1114 | BITREGION_END is the bitpos of the ending bitfield in this region. |
1115 | These two fields are 0, if the C++ memory model does not apply, |
1116 | or we are not interested in keeping track of bitfield regions. |
1117 | |
1118 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |
1119 | |
1120 | If REVERSE is true, the store is to be done in reverse order. |
1121 | |
1122 | If UNDEFINED_P is true then STR_RTX is currently undefined. */ |
1123 | |
1124 | void |
1125 | store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
1126 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1127 | machine_mode fieldmode, |
1128 | rtx value, bool reverse, bool undefined_p) |
1129 | { |
1130 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
1131 | unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0; |
1132 | scalar_int_mode int_mode; |
1133 | if (bitsize.is_constant (const_value: &ibitsize) |
1134 | && bitnum.is_constant (const_value: &ibitnum) |
1135 | && is_a <scalar_int_mode> (m: fieldmode, result: &int_mode) |
1136 | && strict_volatile_bitfield_p (op0: str_rtx, bitsize: ibitsize, bitnum: ibitnum, fieldmode: int_mode, |
1137 | bitregion_start, bitregion_end)) |
1138 | { |
1139 | /* Storing of a full word can be done with a simple store. |
1140 | We know here that the field can be accessed with one single |
1141 | instruction. For targets that support unaligned memory, |
1142 | an unaligned access may be necessary. */ |
1143 | if (ibitsize == GET_MODE_BITSIZE (mode: int_mode)) |
1144 | { |
1145 | str_rtx = adjust_bitfield_address (str_rtx, int_mode, |
1146 | ibitnum / BITS_PER_UNIT); |
1147 | if (reverse) |
1148 | value = flip_storage_order (mode: int_mode, x: value); |
1149 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
1150 | emit_move_insn (str_rtx, value); |
1151 | } |
1152 | else |
1153 | { |
1154 | rtx temp; |
1155 | |
1156 | str_rtx = narrow_bit_field_mem (mem: str_rtx, mode: int_mode, bitsize: ibitsize, |
1157 | bitnum: ibitnum, new_bitnum: &ibitnum); |
1158 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); |
1159 | temp = copy_to_reg (str_rtx); |
1160 | if (!store_bit_field_1 (str_rtx: temp, bitsize: ibitsize, bitnum: ibitnum, bitregion_start: 0, bitregion_end: 0, |
1161 | fieldmode: int_mode, value, reverse, fallback_p: true, undefined_p)) |
1162 | gcc_unreachable (); |
1163 | |
1164 | emit_move_insn (str_rtx, temp); |
1165 | } |
1166 | |
1167 | return; |
1168 | } |
1169 | |
1170 | /* Under the C++0x memory model, we must not touch bits outside the |
1171 | bit region. Adjust the address to start at the beginning of the |
1172 | bit region. */ |
1173 | if (MEM_P (str_rtx) && maybe_ne (a: bitregion_start, b: 0U)) |
1174 | { |
1175 | scalar_int_mode best_mode; |
1176 | machine_mode addr_mode = VOIDmode; |
1177 | |
1178 | poly_uint64 offset = exact_div (a: bitregion_start, BITS_PER_UNIT); |
1179 | bitnum -= bitregion_start; |
1180 | poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize); |
1181 | bitregion_end -= bitregion_start; |
1182 | bitregion_start = 0; |
1183 | if (bitsize.is_constant (const_value: &ibitsize) |
1184 | && bitnum.is_constant (const_value: &ibitnum) |
1185 | && get_best_mode (ibitsize, ibitnum, |
1186 | bitregion_start, bitregion_end, |
1187 | MEM_ALIGN (str_rtx), INT_MAX, |
1188 | MEM_VOLATILE_P (str_rtx), &best_mode)) |
1189 | addr_mode = best_mode; |
1190 | str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, |
1191 | offset, size); |
1192 | } |
1193 | |
1194 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, |
1195 | bitregion_start, bitregion_end, |
1196 | fieldmode, value, reverse, fallback_p: true, undefined_p)) |
1197 | gcc_unreachable (); |
1198 | } |
1199 | |
1200 | /* Use shifts and boolean operations to store VALUE into a bit field of |
1201 | width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, |
1202 | it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is |
1203 | the mode of VALUE. |
1204 | |
1205 | If REVERSE is true, the store is to be done in reverse order. */ |
1206 | |
1207 | static void |
1208 | store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1209 | unsigned HOST_WIDE_INT bitsize, |
1210 | unsigned HOST_WIDE_INT bitnum, |
1211 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1212 | rtx value, scalar_int_mode value_mode, bool reverse) |
1213 | { |
1214 | /* There is a case not handled here: |
1215 | a structure with a known alignment of just a halfword |
1216 | and a field split across two aligned halfwords within the structure. |
1217 | Or likewise a structure with a known alignment of just a byte |
1218 | and a field split across two bytes. |
1219 | Such cases are not supposed to be able to occur. */ |
1220 | |
1221 | scalar_int_mode best_mode; |
1222 | if (MEM_P (op0)) |
1223 | { |
1224 | unsigned int max_bitsize = BITS_PER_WORD; |
1225 | scalar_int_mode imode; |
1226 | if (op0_mode.exists (mode: &imode) && GET_MODE_BITSIZE (mode: imode) < max_bitsize) |
1227 | max_bitsize = GET_MODE_BITSIZE (mode: imode); |
1228 | |
1229 | if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |
1230 | MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), |
1231 | &best_mode)) |
1232 | { |
1233 | /* The only way this should occur is if the field spans word |
1234 | boundaries. */ |
1235 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1236 | bitregion_start, bitregion_end, |
1237 | value, value_mode, reverse); |
1238 | return; |
1239 | } |
1240 | |
1241 | op0 = narrow_bit_field_mem (mem: op0, mode: best_mode, bitsize, bitnum, new_bitnum: &bitnum); |
1242 | } |
1243 | else |
1244 | best_mode = op0_mode.require (); |
1245 | |
1246 | store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, |
1247 | value, value_mode, reverse); |
1248 | } |
1249 | |
1250 | /* Helper function for store_fixed_bit_field, stores |
1251 | the bit field always using MODE, which is the mode of OP0. The other |
1252 | arguments are as for store_fixed_bit_field. */ |
1253 | |
1254 | static void |
1255 | store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, |
1256 | unsigned HOST_WIDE_INT bitsize, |
1257 | unsigned HOST_WIDE_INT bitnum, |
1258 | rtx value, scalar_int_mode value_mode, bool reverse) |
1259 | { |
1260 | rtx temp; |
1261 | int all_zero = 0; |
1262 | int all_one = 0; |
1263 | |
1264 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
1265 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ |
1266 | |
1267 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
1268 | /* BITNUM is the distance between our msb |
1269 | and that of the containing datum. |
1270 | Convert it to the distance from the lsb. */ |
1271 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
1272 | |
1273 | /* Now BITNUM is always the distance between our lsb |
1274 | and that of OP0. */ |
1275 | |
1276 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |
1277 | we must first convert its mode to MODE. */ |
1278 | |
1279 | if (CONST_INT_P (value)) |
1280 | { |
1281 | unsigned HOST_WIDE_INT v = UINTVAL (value); |
1282 | |
1283 | if (bitsize < HOST_BITS_PER_WIDE_INT) |
1284 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |
1285 | |
1286 | if (v == 0) |
1287 | all_zero = 1; |
1288 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |
1289 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |
1290 | || (bitsize == HOST_BITS_PER_WIDE_INT |
1291 | && v == HOST_WIDE_INT_M1U)) |
1292 | all_one = 1; |
1293 | |
1294 | value = lshift_value (mode, v, bitnum); |
1295 | } |
1296 | else |
1297 | { |
1298 | int must_and = (GET_MODE_BITSIZE (mode: value_mode) != bitsize |
1299 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |
1300 | |
1301 | if (value_mode != mode) |
1302 | value = convert_to_mode (mode, value, 1); |
1303 | |
1304 | if (must_and) |
1305 | value = expand_binop (mode, and_optab, value, |
1306 | mask_rtx (mode, bitpos: 0, bitsize, complement: 0), |
1307 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1308 | if (bitnum > 0) |
1309 | value = expand_shift (LSHIFT_EXPR, mode, value, |
1310 | bitnum, NULL_RTX, 1); |
1311 | } |
1312 | |
1313 | if (reverse) |
1314 | value = flip_storage_order (mode, x: value); |
1315 | |
1316 | /* Now clear the chosen bits in OP0, |
1317 | except that if VALUE is -1 we need not bother. */ |
1318 | /* We keep the intermediates in registers to allow CSE to combine |
1319 | consecutive bitfield assignments. */ |
1320 | |
1321 | temp = force_reg (mode, op0); |
1322 | |
1323 | if (! all_one) |
1324 | { |
1325 | rtx mask = mask_rtx (mode, bitpos: bitnum, bitsize, complement: 1); |
1326 | if (reverse) |
1327 | mask = flip_storage_order (mode, x: mask); |
1328 | temp = expand_binop (mode, and_optab, temp, mask, |
1329 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1330 | temp = force_reg (mode, temp); |
1331 | } |
1332 | |
1333 | /* Now logical-or VALUE into OP0, unless it is zero. */ |
1334 | |
1335 | if (! all_zero) |
1336 | { |
1337 | temp = expand_binop (mode, ior_optab, temp, value, |
1338 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
1339 | temp = force_reg (mode, temp); |
1340 | } |
1341 | |
1342 | if (op0 != temp) |
1343 | { |
1344 | op0 = copy_rtx (op0); |
1345 | emit_move_insn (op0, temp); |
1346 | } |
1347 | } |
1348 | |
1349 | /* Store a bit field that is split across multiple accessible memory objects. |
1350 | |
1351 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
1352 | BITSIZE is the field width; BITPOS the position of its first bit |
1353 | (within the word). |
1354 | VALUE is the value to store, which has mode VALUE_MODE. |
1355 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
1356 | a BLKmode MEM. |
1357 | |
1358 | If REVERSE is true, the store is to be done in reverse order. |
1359 | |
1360 | This does not yet handle fields wider than BITS_PER_WORD. */ |
1361 | |
1362 | static void |
1363 | store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |
1364 | unsigned HOST_WIDE_INT bitsize, |
1365 | unsigned HOST_WIDE_INT bitpos, |
1366 | poly_uint64 bitregion_start, poly_uint64 bitregion_end, |
1367 | rtx value, scalar_int_mode value_mode, bool reverse) |
1368 | { |
1369 | unsigned int unit, total_bits, bitsdone = 0; |
1370 | |
1371 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
1372 | much at a time. */ |
1373 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
1374 | unit = BITS_PER_WORD; |
1375 | else |
1376 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
1377 | |
1378 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |
1379 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us |
1380 | again, and we will mutually recurse forever. */ |
1381 | if (MEM_P (op0) && op0_mode.exists ()) |
1382 | unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); |
1383 | |
1384 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |
1385 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note |
1386 | that VALUE might be a floating-point constant. */ |
1387 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |
1388 | { |
1389 | rtx word = gen_lowpart_common (word_mode, value); |
1390 | |
1391 | if (word && (value != word)) |
1392 | value = word; |
1393 | else |
1394 | value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); |
1395 | value_mode = word_mode; |
1396 | } |
1397 | |
1398 | total_bits = GET_MODE_BITSIZE (mode: value_mode); |
1399 | |
1400 | while (bitsdone < bitsize) |
1401 | { |
1402 | unsigned HOST_WIDE_INT thissize; |
1403 | unsigned HOST_WIDE_INT thispos; |
1404 | unsigned HOST_WIDE_INT offset; |
1405 | rtx part; |
1406 | |
1407 | offset = (bitpos + bitsdone) / unit; |
1408 | thispos = (bitpos + bitsdone) % unit; |
1409 | |
1410 | /* When region of bytes we can touch is restricted, decrease |
1411 | UNIT close to the end of the region as needed. If op0 is a REG |
1412 | or SUBREG of REG, don't do this, as there can't be data races |
1413 | on a register and we can expand shorter code in some cases. */ |
1414 | if (maybe_ne (a: bitregion_end, b: 0U) |
1415 | && unit > BITS_PER_UNIT |
1416 | && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1) |
1417 | && !REG_P (op0) |
1418 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) |
1419 | { |
1420 | unit = unit / 2; |
1421 | continue; |
1422 | } |
1423 | |
1424 | /* THISSIZE must not overrun a word boundary. Otherwise, |
1425 | store_fixed_bit_field will call us again, and we will mutually |
1426 | recurse forever. */ |
1427 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
1428 | thissize = MIN (thissize, unit - thispos); |
1429 | |
1430 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
1431 | { |
1432 | /* Fetch successively less significant portions. */ |
1433 | if (CONST_INT_P (value)) |
1434 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1435 | >> (bitsize - bitsdone - thissize)) |
1436 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
1437 | /* Likewise, but the source is little-endian. */ |
1438 | else if (reverse) |
1439 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1440 | thissize, |
1441 | bitsize - bitsdone - thissize, |
1442 | NULL_RTX, 1, false); |
1443 | else |
1444 | /* The args are chosen so that the last part includes the |
1445 | lsb. Give extract_bit_field the value it needs (with |
1446 | endianness compensation) to fetch the piece we want. */ |
1447 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1448 | thissize, |
1449 | total_bits - bitsize + bitsdone, |
1450 | NULL_RTX, 1, false); |
1451 | } |
1452 | else |
1453 | { |
1454 | /* Fetch successively more significant portions. */ |
1455 | if (CONST_INT_P (value)) |
1456 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
1457 | >> bitsdone) |
1458 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |
1459 | /* Likewise, but the source is big-endian. */ |
1460 | else if (reverse) |
1461 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1462 | thissize, |
1463 | total_bits - bitsdone - thissize, |
1464 | NULL_RTX, 1, false); |
1465 | else |
1466 | part = extract_fixed_bit_field (word_mode, value, value_mode, |
1467 | thissize, bitsdone, NULL_RTX, |
1468 | 1, false); |
1469 | } |
1470 | |
1471 | /* If OP0 is a register, then handle OFFSET here. */ |
1472 | rtx op0_piece = op0; |
1473 | opt_scalar_int_mode op0_piece_mode = op0_mode; |
1474 | if (SUBREG_P (op0) || REG_P (op0)) |
1475 | { |
1476 | scalar_int_mode imode; |
1477 | if (op0_mode.exists (mode: &imode) |
1478 | && GET_MODE_SIZE (mode: imode) < UNITS_PER_WORD) |
1479 | { |
1480 | if (offset) |
1481 | op0_piece = const0_rtx; |
1482 | } |
1483 | else |
1484 | { |
1485 | op0_piece = operand_subword_force (op0, |
1486 | offset * unit / BITS_PER_WORD, |
1487 | GET_MODE (op0)); |
1488 | op0_piece_mode = word_mode; |
1489 | } |
1490 | offset &= BITS_PER_WORD / unit - 1; |
1491 | } |
1492 | |
1493 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |
1494 | it is just an out-of-bounds access. Ignore it. */ |
1495 | if (op0_piece != const0_rtx) |
1496 | store_fixed_bit_field (op0: op0_piece, op0_mode: op0_piece_mode, bitsize: thissize, |
1497 | bitnum: offset * unit + thispos, bitregion_start, |
1498 | bitregion_end, value: part, value_mode: word_mode, reverse); |
1499 | bitsdone += thissize; |
1500 | } |
1501 | } |
1502 | |
1503 | /* A subroutine of extract_bit_field_1 that converts return value X |
1504 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments |
1505 | to extract_bit_field. */ |
1506 | |
1507 | static rtx |
1508 | (rtx x, machine_mode mode, |
1509 | machine_mode tmode, bool unsignedp) |
1510 | { |
1511 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) |
1512 | return x; |
1513 | |
1514 | /* If the x mode is not a scalar integral, first convert to the |
1515 | integer mode of that size and then access it as a floating-point |
1516 | value via a SUBREG. */ |
1517 | if (!SCALAR_INT_MODE_P (tmode)) |
1518 | { |
1519 | scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); |
1520 | x = convert_to_mode (int_mode, x, unsignedp); |
1521 | x = force_reg (int_mode, x); |
1522 | return gen_lowpart (tmode, x); |
1523 | } |
1524 | |
1525 | return convert_to_mode (tmode, x, unsignedp); |
1526 | } |
1527 | |
1528 | /* Try to use an ext(z)v pattern to extract a field from OP0. |
1529 | Return the extracted value on success, otherwise return null. |
1530 | EXTV describes the extraction instruction to use. If OP0_MODE |
1531 | is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. |
1532 | The other arguments are as for extract_bit_field. */ |
1533 | |
1534 | static rtx |
1535 | (const extraction_insn *extv, rtx op0, |
1536 | opt_scalar_int_mode op0_mode, |
1537 | unsigned HOST_WIDE_INT bitsize, |
1538 | unsigned HOST_WIDE_INT bitnum, |
1539 | int unsignedp, rtx target, |
1540 | machine_mode mode, machine_mode tmode) |
1541 | { |
1542 | class expand_operand ops[4]; |
1543 | rtx spec_target = target; |
1544 | rtx spec_target_subreg = 0; |
1545 | scalar_int_mode ext_mode = extv->field_mode; |
1546 | unsigned unit = GET_MODE_BITSIZE (mode: ext_mode); |
1547 | |
1548 | if (bitsize == 0 || unit < bitsize) |
1549 | return NULL_RTX; |
1550 | |
1551 | if (MEM_P (op0)) |
1552 | /* Get a reference to the first byte of the field. */ |
1553 | op0 = narrow_bit_field_mem (mem: op0, mode: extv->struct_mode, bitsize, bitnum, |
1554 | new_bitnum: &bitnum); |
1555 | else |
1556 | { |
1557 | /* Convert from counting within OP0 to counting in EXT_MODE. */ |
1558 | if (BYTES_BIG_ENDIAN) |
1559 | bitnum += unit - GET_MODE_BITSIZE (mode: op0_mode.require ()); |
1560 | |
1561 | /* If op0 is a register, we need it in EXT_MODE to make it |
1562 | acceptable to the format of ext(z)v. */ |
1563 | if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) |
1564 | return NULL_RTX; |
1565 | if (REG_P (op0) && op0_mode.require () != ext_mode) |
1566 | op0 = gen_lowpart_SUBREG (ext_mode, op0); |
1567 | } |
1568 | |
1569 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |
1570 | "backwards" from the size of the unit we are extracting from. |
1571 | Otherwise, we count bits from the most significant on a |
1572 | BYTES/BITS_BIG_ENDIAN machine. */ |
1573 | |
1574 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
1575 | bitnum = unit - bitsize - bitnum; |
1576 | |
1577 | if (target == 0) |
1578 | target = spec_target = gen_reg_rtx (tmode); |
1579 | |
1580 | if (GET_MODE (target) != ext_mode) |
1581 | { |
1582 | rtx temp; |
1583 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |
1584 | between the mode of the extraction (word_mode) and the target |
1585 | mode. Instead, create a temporary and use convert_move to set |
1586 | the target. */ |
1587 | if (REG_P (target) |
1588 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode) |
1589 | && (temp = gen_lowpart_if_possible (ext_mode, target))) |
1590 | { |
1591 | target = temp; |
1592 | if (partial_subreg_p (GET_MODE (spec_target), innermode: ext_mode)) |
1593 | spec_target_subreg = target; |
1594 | } |
1595 | else |
1596 | target = gen_reg_rtx (ext_mode); |
1597 | } |
1598 | |
1599 | create_output_operand (op: &ops[0], x: target, mode: ext_mode); |
1600 | create_fixed_operand (op: &ops[1], x: op0); |
1601 | create_integer_operand (&ops[2], bitsize); |
1602 | create_integer_operand (&ops[3], bitnum); |
1603 | if (maybe_expand_insn (icode: extv->icode, nops: 4, ops)) |
1604 | { |
1605 | target = ops[0].value; |
1606 | if (target == spec_target) |
1607 | return target; |
1608 | if (target == spec_target_subreg) |
1609 | return spec_target; |
1610 | return convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
1611 | } |
1612 | return NULL_RTX; |
1613 | } |
1614 | |
1615 | /* See whether it would be valid to extract the part of OP0 with |
1616 | mode OP0_MODE described by BITNUM and BITSIZE into a value of |
1617 | mode MODE using a subreg operation. |
1618 | Return the subreg if so, otherwise return null. */ |
1619 | |
1620 | static rtx |
1621 | (machine_mode mode, rtx op0, |
1622 | machine_mode op0_mode, |
1623 | poly_uint64 bitsize, poly_uint64 bitnum) |
1624 | { |
1625 | poly_uint64 bytenum; |
1626 | if (multiple_p (a: bitnum, BITS_PER_UNIT, multiple: &bytenum) |
1627 | && known_eq (bitsize, GET_MODE_BITSIZE (mode)) |
1628 | && lowpart_bit_field_p (bitnum, bitsize, struct_mode: op0_mode) |
1629 | && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode)) |
1630 | return simplify_gen_subreg (outermode: mode, op: op0, innermode: op0_mode, byte: bytenum); |
1631 | return NULL_RTX; |
1632 | } |
1633 | |
1634 | /* A subroutine of extract_bit_field, with the same arguments. |
1635 | If UNSIGNEDP is -1, the result need not be sign or zero extended. |
1636 | If FALLBACK_P is true, fall back to extract_fixed_bit_field |
1637 | if we can find no other means of implementing the operation. |
1638 | if FALLBACK_P is false, return NULL instead. */ |
1639 | |
1640 | static rtx |
1641 | (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
1642 | int unsignedp, rtx target, machine_mode mode, |
1643 | machine_mode tmode, bool reverse, bool fallback_p, |
1644 | rtx *alt_rtl) |
1645 | { |
1646 | rtx op0 = str_rtx; |
1647 | machine_mode mode1; |
1648 | |
1649 | if (tmode == VOIDmode) |
1650 | tmode = mode; |
1651 | |
1652 | while (GET_CODE (op0) == SUBREG) |
1653 | { |
1654 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
1655 | op0 = SUBREG_REG (op0); |
1656 | } |
1657 | |
1658 | /* If we have an out-of-bounds access to a register, just return an |
1659 | uninitialized register of the required mode. This can occur if the |
1660 | source code contains an out-of-bounds access to a small array. */ |
1661 | if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0)))) |
1662 | return gen_reg_rtx (tmode); |
1663 | |
1664 | if (REG_P (op0) |
1665 | && mode == GET_MODE (op0) |
1666 | && known_eq (bitnum, 0U) |
1667 | && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0)))) |
1668 | { |
1669 | if (reverse) |
1670 | op0 = flip_storage_order (mode, x: op0); |
1671 | /* We're trying to extract a full register from itself. */ |
1672 | return op0; |
1673 | } |
1674 | |
1675 | /* First try to check for vector from vector extractions. */ |
1676 | if (VECTOR_MODE_P (GET_MODE (op0)) |
1677 | && !MEM_P (op0) |
1678 | && VECTOR_MODE_P (tmode) |
1679 | && known_eq (bitsize, GET_MODE_PRECISION (tmode)) |
1680 | && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode))) |
1681 | { |
1682 | machine_mode new_mode = GET_MODE (op0); |
1683 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) |
1684 | { |
1685 | scalar_mode inner_mode = GET_MODE_INNER (tmode); |
1686 | poly_uint64 nunits; |
1687 | if (!multiple_p (a: GET_MODE_BITSIZE (GET_MODE (op0)), |
1688 | GET_MODE_UNIT_BITSIZE (tmode), multiple: &nunits) |
1689 | || !related_vector_mode (tmode, inner_mode, |
1690 | nunits).exists (mode: &new_mode) |
1691 | || maybe_ne (a: GET_MODE_SIZE (mode: new_mode), |
1692 | b: GET_MODE_SIZE (GET_MODE (op0)))) |
1693 | new_mode = VOIDmode; |
1694 | } |
1695 | poly_uint64 pos; |
1696 | if (new_mode != VOIDmode |
1697 | && (convert_optab_handler (op: vec_extract_optab, to_mode: new_mode, from_mode: tmode) |
1698 | != CODE_FOR_nothing) |
1699 | && multiple_p (a: bitnum, b: GET_MODE_BITSIZE (mode: tmode), multiple: &pos)) |
1700 | { |
1701 | class expand_operand ops[3]; |
1702 | machine_mode outermode = new_mode; |
1703 | machine_mode innermode = tmode; |
1704 | enum insn_code icode |
1705 | = convert_optab_handler (op: vec_extract_optab, to_mode: outermode, from_mode: innermode); |
1706 | |
1707 | if (new_mode != GET_MODE (op0)) |
1708 | op0 = gen_lowpart (new_mode, op0); |
1709 | create_output_operand (op: &ops[0], x: target, mode: innermode); |
1710 | ops[0].target = 1; |
1711 | create_input_operand (op: &ops[1], value: op0, mode: outermode); |
1712 | create_integer_operand (&ops[2], pos); |
1713 | if (maybe_expand_insn (icode, nops: 3, ops)) |
1714 | { |
1715 | if (alt_rtl && ops[0].target) |
1716 | *alt_rtl = target; |
1717 | target = ops[0].value; |
1718 | if (GET_MODE (target) != mode) |
1719 | return gen_lowpart (tmode, target); |
1720 | return target; |
1721 | } |
1722 | } |
1723 | } |
1724 | |
1725 | /* See if we can get a better vector mode before extracting. */ |
1726 | if (VECTOR_MODE_P (GET_MODE (op0)) |
1727 | && !MEM_P (op0) |
1728 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) |
1729 | { |
1730 | machine_mode new_mode; |
1731 | |
1732 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) |
1733 | new_mode = MIN_MODE_VECTOR_FLOAT; |
1734 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
1735 | new_mode = MIN_MODE_VECTOR_FRACT; |
1736 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) |
1737 | new_mode = MIN_MODE_VECTOR_UFRACT; |
1738 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) |
1739 | new_mode = MIN_MODE_VECTOR_ACCUM; |
1740 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) |
1741 | new_mode = MIN_MODE_VECTOR_UACCUM; |
1742 | else |
1743 | new_mode = MIN_MODE_VECTOR_INT; |
1744 | |
1745 | FOR_EACH_MODE_FROM (new_mode, new_mode) |
1746 | if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0))) |
1747 | && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode)) |
1748 | && known_eq (bitsize, GET_MODE_UNIT_PRECISION (new_mode)) |
1749 | && multiple_p (a: bitnum, GET_MODE_UNIT_PRECISION (new_mode)) |
1750 | && targetm.vector_mode_supported_p (new_mode) |
1751 | && targetm.modes_tieable_p (GET_MODE (op0), new_mode)) |
1752 | break; |
1753 | if (new_mode != VOIDmode) |
1754 | op0 = gen_lowpart (new_mode, op0); |
1755 | } |
1756 | |
1757 | /* Use vec_extract patterns for extracting parts of vectors whenever |
1758 | available. If that fails, see whether the current modes and bitregion |
1759 | give a natural subreg. */ |
1760 | machine_mode outermode = GET_MODE (op0); |
1761 | if (VECTOR_MODE_P (outermode) && !MEM_P (op0)) |
1762 | { |
1763 | scalar_mode innermode = GET_MODE_INNER (outermode); |
1764 | |
1765 | enum insn_code icode |
1766 | = convert_optab_handler (op: vec_extract_optab, to_mode: outermode, from_mode: innermode); |
1767 | |
1768 | poly_uint64 pos; |
1769 | if (icode != CODE_FOR_nothing |
1770 | && known_eq (bitsize, GET_MODE_PRECISION (innermode)) |
1771 | && multiple_p (a: bitnum, b: GET_MODE_PRECISION (mode: innermode), multiple: &pos)) |
1772 | { |
1773 | class expand_operand ops[3]; |
1774 | |
1775 | create_output_operand (op: &ops[0], x: target, |
1776 | mode: insn_data[icode].operand[0].mode); |
1777 | ops[0].target = 1; |
1778 | create_input_operand (op: &ops[1], value: op0, mode: outermode); |
1779 | create_integer_operand (&ops[2], pos); |
1780 | if (maybe_expand_insn (icode, nops: 3, ops)) |
1781 | { |
1782 | if (alt_rtl && ops[0].target) |
1783 | *alt_rtl = target; |
1784 | target = ops[0].value; |
1785 | if (GET_MODE (target) != mode) |
1786 | return gen_lowpart (tmode, target); |
1787 | return target; |
1788 | } |
1789 | } |
1790 | /* Using subregs is useful if we're extracting one register vector |
1791 | from a multi-register vector. extract_bit_field_as_subreg checks |
1792 | for valid bitsize and bitnum, so we don't need to do that here. */ |
1793 | if (VECTOR_MODE_P (mode)) |
1794 | { |
1795 | rtx sub = extract_bit_field_as_subreg (mode, op0, op0_mode: outermode, |
1796 | bitsize, bitnum); |
1797 | if (sub) |
1798 | return sub; |
1799 | } |
1800 | } |
1801 | |
1802 | /* Make sure we are playing with integral modes. Pun with subregs |
1803 | if we aren't. */ |
1804 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |
1805 | scalar_int_mode imode; |
1806 | if (!op0_mode.exists (mode: &imode) || imode != GET_MODE (op0)) |
1807 | { |
1808 | if (MEM_P (op0)) |
1809 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |
1810 | 0, MEM_SIZE (op0)); |
1811 | else if (op0_mode.exists (mode: &imode)) |
1812 | { |
1813 | op0 = gen_lowpart (imode, op0); |
1814 | |
1815 | /* If we got a SUBREG, force it into a register since we |
1816 | aren't going to be able to do another SUBREG on it. */ |
1817 | if (GET_CODE (op0) == SUBREG) |
1818 | op0 = force_reg (imode, op0); |
1819 | } |
1820 | else |
1821 | { |
1822 | poly_int64 size = GET_MODE_SIZE (GET_MODE (op0)); |
1823 | rtx mem = assign_stack_temp (GET_MODE (op0), size); |
1824 | emit_move_insn (mem, op0); |
1825 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |
1826 | } |
1827 | } |
1828 | |
1829 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |
1830 | If that's wrong, the solution is to test for it and set TARGET to 0 |
1831 | if needed. */ |
1832 | |
1833 | /* Get the mode of the field to use for atomic access or subreg |
1834 | conversion. */ |
1835 | if (!SCALAR_INT_MODE_P (tmode) |
1836 | || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (mode: &mode1)) |
1837 | mode1 = mode; |
1838 | gcc_assert (mode1 != BLKmode); |
1839 | |
1840 | /* Extraction of a full MODE1 value can be done with a subreg as long |
1841 | as the least significant bit of the value is the least significant |
1842 | bit of either OP0 or a word of OP0. */ |
1843 | if (!MEM_P (op0) && !reverse && op0_mode.exists (mode: &imode)) |
1844 | { |
1845 | rtx sub = extract_bit_field_as_subreg (mode: mode1, op0, op0_mode: imode, |
1846 | bitsize, bitnum); |
1847 | if (sub) |
1848 | return convert_extracted_bit_field (x: sub, mode, tmode, unsignedp); |
1849 | } |
1850 | |
1851 | /* Extraction of a full MODE1 value can be done with a load as long as |
1852 | the field is on a byte boundary and is sufficiently aligned. */ |
1853 | poly_uint64 bytenum; |
1854 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode: mode1, bytenum: &bytenum)) |
1855 | { |
1856 | op0 = adjust_bitfield_address (op0, mode1, bytenum); |
1857 | if (reverse) |
1858 | op0 = flip_storage_order (mode: mode1, x: op0); |
1859 | return convert_extracted_bit_field (x: op0, mode, tmode, unsignedp); |
1860 | } |
1861 | |
1862 | /* If we have a memory source and a non-constant bit offset, restrict |
1863 | the memory to the referenced bytes. This is a worst-case fallback |
1864 | but is useful for things like vector booleans. */ |
1865 | if (MEM_P (op0) && !bitnum.is_constant ()) |
1866 | { |
1867 | bytenum = bits_to_bytes_round_down (bitnum); |
1868 | bitnum = num_trailing_bits (bitnum); |
1869 | poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize); |
1870 | op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize); |
1871 | op0_mode = opt_scalar_int_mode (); |
1872 | } |
1873 | |
1874 | /* It's possible we'll need to handle other cases here for |
1875 | polynomial bitnum and bitsize. */ |
1876 | |
1877 | /* From here on we need to be looking at a fixed-size insertion. */ |
1878 | return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (), |
1879 | bitnum.to_constant (), unsignedp, |
1880 | target, mode, tmode, reverse, fallback_p); |
1881 | } |
1882 | |
1883 | /* Subroutine of extract_bit_field_1, with the same arguments, except |
1884 | that BITSIZE and BITNUM are constant. Handle cases specific to |
1885 | integral modes. If OP0_MODE is defined, it is the mode of OP0, |
1886 | otherwise OP0 is a BLKmode MEM. */ |
1887 | |
1888 | static rtx |
1889 | (rtx op0, opt_scalar_int_mode op0_mode, |
1890 | unsigned HOST_WIDE_INT bitsize, |
1891 | unsigned HOST_WIDE_INT bitnum, int unsignedp, |
1892 | rtx target, machine_mode mode, machine_mode tmode, |
1893 | bool reverse, bool fallback_p) |
1894 | { |
1895 | /* Handle fields bigger than a word. */ |
1896 | |
1897 | if (bitsize > BITS_PER_WORD) |
1898 | { |
1899 | /* Here we transfer the words of the field |
1900 | in the order least significant first. |
1901 | This is because the most significant word is the one which may |
1902 | be less than full. */ |
1903 | |
1904 | const bool backwards = WORDS_BIG_ENDIAN; |
1905 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
1906 | unsigned int i; |
1907 | rtx_insn *last; |
1908 | |
1909 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |
1910 | target = gen_reg_rtx (mode); |
1911 | |
1912 | /* In case we're about to clobber a base register or something |
1913 | (see gcc.c-torture/execute/20040625-1.c). */ |
1914 | if (reg_mentioned_p (target, op0)) |
1915 | target = gen_reg_rtx (mode); |
1916 | |
1917 | /* Indicate for flow that the entire target reg is being set. */ |
1918 | emit_clobber (target); |
1919 | |
1920 | /* The mode must be fixed-size, since extract_bit_field_1 handles |
1921 | extractions from variable-sized objects before calling this |
1922 | function. */ |
1923 | unsigned int target_size |
1924 | = GET_MODE_SIZE (GET_MODE (target)).to_constant (); |
1925 | last = get_last_insn (); |
1926 | for (i = 0; i < nwords; i++) |
1927 | { |
1928 | /* If I is 0, use the low-order word in both field and target; |
1929 | if I is 1, use the next to lowest word; and so on. */ |
1930 | /* Word number in TARGET to use. */ |
1931 | unsigned int wordnum |
1932 | = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i); |
1933 | /* Offset from start of field in OP0. */ |
1934 | unsigned int bit_offset = (backwards ^ reverse |
1935 | ? MAX ((int) bitsize - ((int) i + 1) |
1936 | * BITS_PER_WORD, |
1937 | 0) |
1938 | : (int) i * BITS_PER_WORD); |
1939 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
1940 | rtx result_part |
1941 | = extract_bit_field_1 (str_rtx: op0, MIN (BITS_PER_WORD, |
1942 | bitsize - i * BITS_PER_WORD), |
1943 | bitnum: bitnum + bit_offset, |
1944 | unsignedp: (unsignedp ? 1 : -1), target: target_part, |
1945 | mode, tmode: word_mode, reverse, fallback_p, NULL); |
1946 | |
1947 | gcc_assert (target_part); |
1948 | if (!result_part) |
1949 | { |
1950 | delete_insns_since (last); |
1951 | return NULL; |
1952 | } |
1953 | |
1954 | if (result_part != target_part) |
1955 | emit_move_insn (target_part, result_part); |
1956 | } |
1957 | |
1958 | if (unsignedp) |
1959 | { |
1960 | /* Unless we've filled TARGET, the upper regs in a multi-reg value |
1961 | need to be zero'd out. */ |
1962 | if (target_size > nwords * UNITS_PER_WORD) |
1963 | { |
1964 | unsigned int i, total_words; |
1965 | |
1966 | total_words = target_size / UNITS_PER_WORD; |
1967 | for (i = nwords; i < total_words; i++) |
1968 | emit_move_insn |
1969 | (operand_subword (target, |
1970 | backwards ? total_words - i - 1 : i, |
1971 | 1, VOIDmode), |
1972 | const0_rtx); |
1973 | } |
1974 | return target; |
1975 | } |
1976 | |
1977 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
1978 | target = expand_shift (LSHIFT_EXPR, mode, target, |
1979 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
1980 | return expand_shift (RSHIFT_EXPR, mode, target, |
1981 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |
1982 | } |
1983 | |
1984 | /* If OP0 is a multi-word register, narrow it to the affected word. |
1985 | If the region spans two words, defer to extract_split_bit_field. */ |
1986 | if (!MEM_P (op0) && GET_MODE_SIZE (mode: op0_mode.require ()) > UNITS_PER_WORD) |
1987 | { |
1988 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |
1989 | { |
1990 | if (!fallback_p) |
1991 | return NULL_RTX; |
1992 | target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
1993 | unsignedp, reverse); |
1994 | return convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
1995 | } |
1996 | /* If OP0 is a hard register, copy it to a pseudo before calling |
1997 | simplify_gen_subreg. */ |
1998 | if (REG_P (op0) && HARD_REGISTER_P (op0)) |
1999 | op0 = copy_to_reg (op0); |
2000 | op0 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: op0_mode.require (), |
2001 | byte: bitnum / BITS_PER_WORD * UNITS_PER_WORD); |
2002 | op0_mode = word_mode; |
2003 | bitnum %= BITS_PER_WORD; |
2004 | } |
2005 | |
2006 | /* From here on we know the desired field is smaller than a word. |
2007 | If OP0 is a register, it too fits within a word. */ |
2008 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |
2009 | extraction_insn extv; |
2010 | if (!MEM_P (op0) |
2011 | && !reverse |
2012 | /* ??? We could limit the structure size to the part of OP0 that |
2013 | contains the field, with appropriate checks for endianness |
2014 | and TARGET_TRULY_NOOP_TRUNCATION. */ |
2015 | && get_best_reg_extraction_insn (&extv, pattern, |
2016 | GET_MODE_BITSIZE (mode: op0_mode.require ()), |
2017 | tmode)) |
2018 | { |
2019 | rtx result = extract_bit_field_using_extv (extv: &extv, op0, op0_mode, |
2020 | bitsize, bitnum, |
2021 | unsignedp, target, mode, |
2022 | tmode); |
2023 | if (result) |
2024 | return result; |
2025 | } |
2026 | |
2027 | /* If OP0 is a memory, try copying it to a register and seeing if a |
2028 | cheap register alternative is available. */ |
2029 | if (MEM_P (op0) & !reverse) |
2030 | { |
2031 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |
2032 | tmode)) |
2033 | { |
2034 | rtx result = extract_bit_field_using_extv (extv: &extv, op0, op0_mode, |
2035 | bitsize, bitnum, |
2036 | unsignedp, target, mode, |
2037 | tmode); |
2038 | if (result) |
2039 | return result; |
2040 | } |
2041 | |
2042 | rtx_insn *last = get_last_insn (); |
2043 | |
2044 | /* Try loading part of OP0 into a register and extracting the |
2045 | bitfield from that. */ |
2046 | unsigned HOST_WIDE_INT bitpos; |
2047 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, |
2048 | bitregion_start: 0, bitregion_end: 0, fieldmode: tmode, new_bitnum: &bitpos); |
2049 | if (xop0) |
2050 | { |
2051 | xop0 = copy_to_reg (xop0); |
2052 | rtx result = extract_bit_field_1 (str_rtx: xop0, bitsize, bitnum: bitpos, |
2053 | unsignedp, target, |
2054 | mode, tmode, reverse, fallback_p: false, NULL); |
2055 | if (result) |
2056 | return result; |
2057 | delete_insns_since (last); |
2058 | } |
2059 | } |
2060 | |
2061 | if (!fallback_p) |
2062 | return NULL; |
2063 | |
2064 | /* Find a correspondingly-sized integer field, so we can apply |
2065 | shifts and masks to it. */ |
2066 | scalar_int_mode int_mode; |
2067 | if (!int_mode_for_mode (tmode).exists (mode: &int_mode)) |
2068 | /* If this fails, we should probably push op0 out to memory and then |
2069 | do a load. */ |
2070 | int_mode = int_mode_for_mode (mode).require (); |
2071 | |
2072 | target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, |
2073 | bitnum, target, unsignedp, reverse); |
2074 | |
2075 | /* Complex values must be reversed piecewise, so we need to undo the global |
2076 | reversal, convert to the complex mode and reverse again. */ |
2077 | if (reverse && COMPLEX_MODE_P (tmode)) |
2078 | { |
2079 | target = flip_storage_order (mode: int_mode, x: target); |
2080 | target = convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
2081 | target = flip_storage_order (mode: tmode, x: target); |
2082 | } |
2083 | else |
2084 | target = convert_extracted_bit_field (x: target, mode, tmode, unsignedp); |
2085 | |
2086 | return target; |
2087 | } |
2088 | |
2089 | /* Generate code to extract a byte-field from STR_RTX |
2090 | containing BITSIZE bits, starting at BITNUM, |
2091 | and put it in TARGET if possible (if TARGET is nonzero). |
2092 | Regardless of TARGET, we return the rtx for where the value is placed. |
2093 | |
2094 | STR_RTX is the structure containing the byte (a REG or MEM). |
2095 | UNSIGNEDP is nonzero if this is an unsigned bit field. |
2096 | MODE is the natural mode of the field value once extracted. |
2097 | TMODE is the mode the caller would like the value to have; |
2098 | but the value may be returned with type MODE instead. |
2099 | |
2100 | If REVERSE is true, the extraction is to be done in reverse order. |
2101 | |
2102 | If a TARGET is specified and we can store in it at no extra cost, |
2103 | we do so, and return TARGET. |
2104 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred |
2105 | if they are equally easy. |
2106 | |
2107 | If the result can be stored at TARGET, and ALT_RTL is non-NULL, |
2108 | then *ALT_RTL is set to TARGET (before legitimziation). */ |
2109 | |
2110 | rtx |
2111 | (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum, |
2112 | int unsignedp, rtx target, machine_mode mode, |
2113 | machine_mode tmode, bool reverse, rtx *alt_rtl) |
2114 | { |
2115 | machine_mode mode1; |
2116 | |
2117 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |
2118 | if (maybe_ne (a: GET_MODE_BITSIZE (GET_MODE (str_rtx)), b: 0)) |
2119 | mode1 = GET_MODE (str_rtx); |
2120 | else if (target && maybe_ne (a: GET_MODE_BITSIZE (GET_MODE (target)), b: 0)) |
2121 | mode1 = GET_MODE (target); |
2122 | else |
2123 | mode1 = tmode; |
2124 | |
2125 | unsigned HOST_WIDE_INT ibitsize, ibitnum; |
2126 | scalar_int_mode int_mode; |
2127 | if (bitsize.is_constant (const_value: &ibitsize) |
2128 | && bitnum.is_constant (const_value: &ibitnum) |
2129 | && is_a <scalar_int_mode> (m: mode1, result: &int_mode) |
2130 | && strict_volatile_bitfield_p (op0: str_rtx, bitsize: ibitsize, bitnum: ibitnum, |
2131 | fieldmode: int_mode, bitregion_start: 0, bitregion_end: 0)) |
2132 | { |
2133 | /* Extraction of a full INT_MODE value can be done with a simple load. |
2134 | We know here that the field can be accessed with one single |
2135 | instruction. For targets that support unaligned memory, |
2136 | an unaligned access may be necessary. */ |
2137 | if (ibitsize == GET_MODE_BITSIZE (mode: int_mode)) |
2138 | { |
2139 | rtx result = adjust_bitfield_address (str_rtx, int_mode, |
2140 | ibitnum / BITS_PER_UNIT); |
2141 | if (reverse) |
2142 | result = flip_storage_order (mode: int_mode, x: result); |
2143 | gcc_assert (ibitnum % BITS_PER_UNIT == 0); |
2144 | return convert_extracted_bit_field (x: result, mode, tmode, unsignedp); |
2145 | } |
2146 | |
2147 | str_rtx = narrow_bit_field_mem (mem: str_rtx, mode: int_mode, bitsize: ibitsize, bitnum: ibitnum, |
2148 | new_bitnum: &ibitnum); |
2149 | gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode)); |
2150 | str_rtx = copy_to_reg (str_rtx); |
2151 | return extract_bit_field_1 (str_rtx, bitsize: ibitsize, bitnum: ibitnum, unsignedp, |
2152 | target, mode, tmode, reverse, fallback_p: true, alt_rtl); |
2153 | } |
2154 | |
2155 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
2156 | target, mode, tmode, reverse, fallback_p: true, alt_rtl); |
2157 | } |
2158 | |
2159 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |
2160 | from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, |
2161 | otherwise OP0 is a BLKmode MEM. |
2162 | |
2163 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). |
2164 | If REVERSE is true, the extraction is to be done in reverse order. |
2165 | |
2166 | If TARGET is nonzero, attempts to store the value there |
2167 | and return TARGET, but this is not guaranteed. |
2168 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
2169 | |
2170 | static rtx |
2171 | (machine_mode tmode, rtx op0, |
2172 | opt_scalar_int_mode op0_mode, |
2173 | unsigned HOST_WIDE_INT bitsize, |
2174 | unsigned HOST_WIDE_INT bitnum, rtx target, |
2175 | int unsignedp, bool reverse) |
2176 | { |
2177 | scalar_int_mode mode; |
2178 | if (MEM_P (op0)) |
2179 | { |
2180 | if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |
2181 | BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) |
2182 | /* The only way this should occur is if the field spans word |
2183 | boundaries. */ |
2184 | return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |
2185 | unsignedp, reverse); |
2186 | |
2187 | op0 = narrow_bit_field_mem (mem: op0, mode, bitsize, bitnum, new_bitnum: &bitnum); |
2188 | } |
2189 | else |
2190 | mode = op0_mode.require (); |
2191 | |
2192 | return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, |
2193 | target, unsignedp, reverse); |
2194 | } |
2195 | |
2196 | /* Helper function for extract_fixed_bit_field, extracts |
2197 | the bit field always using MODE, which is the mode of OP0. |
2198 | If UNSIGNEDP is -1, the result need not be sign or zero extended. |
2199 | The other arguments are as for extract_fixed_bit_field. */ |
2200 | |
2201 | static rtx |
2202 | (machine_mode tmode, rtx op0, scalar_int_mode mode, |
2203 | unsigned HOST_WIDE_INT bitsize, |
2204 | unsigned HOST_WIDE_INT bitnum, rtx target, |
2205 | int unsignedp, bool reverse) |
2206 | { |
2207 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |
2208 | for invalid input, such as extract equivalent of f5 from |
2209 | gcc.dg/pr48335-2.c. */ |
2210 | |
2211 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
2212 | /* BITNUM is the distance between our msb and that of OP0. |
2213 | Convert it to the distance from the lsb. */ |
2214 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |
2215 | |
2216 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |
2217 | We have reduced the big-endian case to the little-endian case. */ |
2218 | if (reverse) |
2219 | op0 = flip_storage_order (mode, x: op0); |
2220 | |
2221 | if (unsignedp) |
2222 | { |
2223 | if (bitnum) |
2224 | { |
2225 | /* If the field does not already start at the lsb, |
2226 | shift it so it does. */ |
2227 | /* Maybe propagate the target for the shift. */ |
2228 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
2229 | if (tmode != mode) |
2230 | subtarget = 0; |
2231 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |
2232 | } |
2233 | /* Convert the value to the desired mode. TMODE must also be a |
2234 | scalar integer for this conversion to make sense, since we |
2235 | shouldn't reinterpret the bits. */ |
2236 | scalar_int_mode new_mode = as_a <scalar_int_mode> (m: tmode); |
2237 | if (mode != new_mode) |
2238 | op0 = convert_to_mode (new_mode, op0, 1); |
2239 | |
2240 | /* Unless the msb of the field used to be the msb when we shifted, |
2241 | mask out the upper bits. */ |
2242 | |
2243 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize |
2244 | && unsignedp != -1) |
2245 | return expand_binop (new_mode, and_optab, op0, |
2246 | mask_rtx (mode: new_mode, bitpos: 0, bitsize, complement: 0), |
2247 | target, 1, OPTAB_LIB_WIDEN); |
2248 | return op0; |
2249 | } |
2250 | |
2251 | /* To extract a signed bit-field, first shift its msb to the msb of the word, |
2252 | then arithmetic-shift its lsb to the lsb of the word. */ |
2253 | op0 = force_reg (mode, op0); |
2254 | |
2255 | /* Find the narrowest integer mode that contains the field. */ |
2256 | |
2257 | opt_scalar_int_mode mode_iter; |
2258 | FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |
2259 | if (GET_MODE_BITSIZE (mode: mode_iter.require ()) >= bitsize + bitnum) |
2260 | break; |
2261 | |
2262 | mode = mode_iter.require (); |
2263 | op0 = convert_to_mode (mode, op0, 0); |
2264 | |
2265 | if (mode != tmode) |
2266 | target = 0; |
2267 | |
2268 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |
2269 | { |
2270 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |
2271 | /* Maybe propagate the target for the shift. */ |
2272 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
2273 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
2274 | } |
2275 | |
2276 | return expand_shift (RSHIFT_EXPR, mode, op0, |
2277 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |
2278 | } |
2279 | |
2280 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value |
2281 | VALUE << BITPOS. */ |
2282 | |
2283 | static rtx |
2284 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |
2285 | int bitpos) |
2286 | { |
2287 | return immed_wide_int_const (wi::lshift (x: value, y: bitpos), mode); |
2288 | } |
2289 | |
2290 | /* Extract a bit field that is split across two words |
2291 | and return an RTX for the result. |
2292 | |
2293 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. |
2294 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. |
2295 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |
2296 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |
2297 | a BLKmode MEM. |
2298 | |
2299 | If REVERSE is true, the extraction is to be done in reverse order. */ |
2300 | |
2301 | static rtx |
2302 | (rtx op0, opt_scalar_int_mode op0_mode, |
2303 | unsigned HOST_WIDE_INT bitsize, |
2304 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |
2305 | bool reverse) |
2306 | { |
2307 | unsigned int unit; |
2308 | unsigned int bitsdone = 0; |
2309 | rtx result = NULL_RTX; |
2310 | int first = 1; |
2311 | |
2312 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
2313 | much at a time. */ |
2314 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
2315 | unit = BITS_PER_WORD; |
2316 | else |
2317 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
2318 | |
2319 | while (bitsdone < bitsize) |
2320 | { |
2321 | unsigned HOST_WIDE_INT thissize; |
2322 | rtx part; |
2323 | unsigned HOST_WIDE_INT thispos; |
2324 | unsigned HOST_WIDE_INT offset; |
2325 | |
2326 | offset = (bitpos + bitsdone) / unit; |
2327 | thispos = (bitpos + bitsdone) % unit; |
2328 | |
2329 | /* THISSIZE must not overrun a word boundary. Otherwise, |
2330 | extract_fixed_bit_field will call us again, and we will mutually |
2331 | recurse forever. */ |
2332 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
2333 | thissize = MIN (thissize, unit - thispos); |
2334 | |
2335 | /* If OP0 is a register, then handle OFFSET here. */ |
2336 | rtx op0_piece = op0; |
2337 | opt_scalar_int_mode op0_piece_mode = op0_mode; |
2338 | if (SUBREG_P (op0) || REG_P (op0)) |
2339 | { |
2340 | op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); |
2341 | op0_piece_mode = word_mode; |
2342 | offset = 0; |
2343 | } |
2344 | |
2345 | /* Extract the parts in bit-counting order, |
2346 | whose meaning is determined by BYTES_PER_UNIT. |
2347 | OFFSET is in UNITs, and UNIT is in bits. */ |
2348 | part = extract_fixed_bit_field (tmode: word_mode, op0: op0_piece, op0_mode: op0_piece_mode, |
2349 | bitsize: thissize, bitnum: offset * unit + thispos, |
2350 | target: 0, unsignedp: 1, reverse); |
2351 | bitsdone += thissize; |
2352 | |
2353 | /* Shift this part into place for the result. */ |
2354 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |
2355 | { |
2356 | if (bitsize != bitsdone) |
2357 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |
2358 | bitsize - bitsdone, 0, 1); |
2359 | } |
2360 | else |
2361 | { |
2362 | if (bitsdone != thissize) |
2363 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |
2364 | bitsdone - thissize, 0, 1); |
2365 | } |
2366 | |
2367 | if (first) |
2368 | result = part; |
2369 | else |
2370 | /* Combine the parts with bitwise or. This works |
2371 | because we extracted each part as an unsigned bit field. */ |
2372 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, |
2373 | OPTAB_LIB_WIDEN); |
2374 | |
2375 | first = 0; |
2376 | } |
2377 | |
2378 | /* Unsigned bit field: we are done. */ |
2379 | if (unsignedp) |
2380 | return result; |
2381 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |
2382 | result = expand_shift (LSHIFT_EXPR, word_mode, result, |
2383 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
2384 | return expand_shift (RSHIFT_EXPR, word_mode, result, |
2385 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |
2386 | } |
2387 | |
2388 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
2389 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than |
2390 | MODE, fill the upper bits with zeros. Fail if the layout of either |
2391 | mode is unknown (as for CC modes) or if the extraction would involve |
2392 | unprofitable mode punning. Return the value on success, otherwise |
2393 | return null. |
2394 | |
2395 | This is different from gen_lowpart* in these respects: |
2396 | |
2397 | - the returned value must always be considered an rvalue |
2398 | |
2399 | - when MODE is wider than SRC_MODE, the extraction involves |
2400 | a zero extension |
2401 | |
2402 | - when MODE is smaller than SRC_MODE, the extraction involves |
2403 | a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). |
2404 | |
2405 | In other words, this routine performs a computation, whereas the |
2406 | gen_lowpart* routines are conceptually lvalue or rvalue subreg |
2407 | operations. */ |
2408 | |
2409 | rtx |
2410 | (machine_mode mode, machine_mode src_mode, rtx src) |
2411 | { |
2412 | scalar_int_mode int_mode, src_int_mode; |
2413 | |
2414 | if (mode == src_mode) |
2415 | return src; |
2416 | |
2417 | if (CONSTANT_P (src)) |
2418 | { |
2419 | /* simplify_gen_subreg can't be used here, as if simplify_subreg |
2420 | fails, it will happily create (subreg (symbol_ref)) or similar |
2421 | invalid SUBREGs. */ |
2422 | poly_uint64 byte = subreg_lowpart_offset (outermode: mode, innermode: src_mode); |
2423 | rtx ret = simplify_subreg (outermode: mode, op: src, innermode: src_mode, byte); |
2424 | if (ret) |
2425 | return ret; |
2426 | |
2427 | if (GET_MODE (src) == VOIDmode |
2428 | || !validate_subreg (mode, src_mode, src, byte)) |
2429 | return NULL_RTX; |
2430 | |
2431 | src = force_reg (GET_MODE (src), src); |
2432 | return gen_rtx_SUBREG (mode, src, byte); |
2433 | } |
2434 | |
2435 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) |
2436 | return NULL_RTX; |
2437 | |
2438 | if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode)) |
2439 | && targetm.modes_tieable_p (mode, src_mode)) |
2440 | { |
2441 | rtx x = gen_lowpart_common (mode, src); |
2442 | if (x) |
2443 | return x; |
2444 | } |
2445 | |
2446 | if (!int_mode_for_mode (src_mode).exists (mode: &src_int_mode) |
2447 | || !int_mode_for_mode (mode).exists (mode: &int_mode)) |
2448 | return NULL_RTX; |
2449 | |
2450 | if (!targetm.modes_tieable_p (src_int_mode, src_mode)) |
2451 | return NULL_RTX; |
2452 | if (!targetm.modes_tieable_p (int_mode, mode)) |
2453 | return NULL_RTX; |
2454 | |
2455 | src = gen_lowpart (src_int_mode, src); |
2456 | if (!validate_subreg (int_mode, src_int_mode, src, |
2457 | subreg_lowpart_offset (outermode: int_mode, innermode: src_int_mode))) |
2458 | return NULL_RTX; |
2459 | |
2460 | src = convert_modes (mode: int_mode, oldmode: src_int_mode, x: src, unsignedp: true); |
2461 | src = gen_lowpart (mode, src); |
2462 | return src; |
2463 | } |
2464 | |
2465 | /* Add INC into TARGET. */ |
2466 | |
2467 | void |
2468 | expand_inc (rtx target, rtx inc) |
2469 | { |
2470 | rtx value = expand_binop (GET_MODE (target), add_optab, |
2471 | target, inc, |
2472 | target, 0, OPTAB_LIB_WIDEN); |
2473 | if (value != target) |
2474 | emit_move_insn (target, value); |
2475 | } |
2476 | |
2477 | /* Subtract DEC from TARGET. */ |
2478 | |
2479 | void |
2480 | expand_dec (rtx target, rtx dec) |
2481 | { |
2482 | rtx value = expand_binop (GET_MODE (target), sub_optab, |
2483 | target, dec, |
2484 | target, 0, OPTAB_LIB_WIDEN); |
2485 | if (value != target) |
2486 | emit_move_insn (target, value); |
2487 | } |
2488 | |
2489 | /* Output a shift instruction for expression code CODE, |
2490 | with SHIFTED being the rtx for the value to shift, |
2491 | and AMOUNT the rtx for the amount to shift by. |
2492 | Store the result in the rtx TARGET, if that is convenient. |
2493 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2494 | Return the rtx for where the value is. |
2495 | If that cannot be done, abort the compilation unless MAY_FAIL is true, |
2496 | in which case 0 is returned. */ |
2497 | |
2498 | static rtx |
2499 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |
2500 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |
2501 | { |
2502 | rtx op1, temp = 0; |
2503 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); |
2504 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); |
2505 | optab lshift_optab = ashl_optab; |
2506 | optab rshift_arith_optab = ashr_optab; |
2507 | optab rshift_uns_optab = lshr_optab; |
2508 | optab lrotate_optab = rotl_optab; |
2509 | optab rrotate_optab = rotr_optab; |
2510 | machine_mode op1_mode; |
2511 | scalar_mode scalar_mode = GET_MODE_INNER (mode); |
2512 | int attempt; |
2513 | bool speed = optimize_insn_for_speed_p (); |
2514 | |
2515 | op1 = amount; |
2516 | op1_mode = GET_MODE (op1); |
2517 | |
2518 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the |
2519 | shift amount is a vector, use the vector/vector shift patterns. */ |
2520 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) |
2521 | { |
2522 | lshift_optab = vashl_optab; |
2523 | rshift_arith_optab = vashr_optab; |
2524 | rshift_uns_optab = vlshr_optab; |
2525 | lrotate_optab = vrotl_optab; |
2526 | rrotate_optab = vrotr_optab; |
2527 | } |
2528 | |
2529 | /* Previously detected shift-counts computed by NEGATE_EXPR |
2530 | and shifted in the other direction; but that does not work |
2531 | on all machines. */ |
2532 | |
2533 | if (SHIFT_COUNT_TRUNCATED) |
2534 | { |
2535 | if (CONST_INT_P (op1) |
2536 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
2537 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode: scalar_mode))) |
2538 | op1 = gen_int_shift_amount (mode, |
2539 | (unsigned HOST_WIDE_INT) INTVAL (op1) |
2540 | % GET_MODE_BITSIZE (mode: scalar_mode)); |
2541 | else if (GET_CODE (op1) == SUBREG |
2542 | && subreg_lowpart_p (op1) |
2543 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |
2544 | && SCALAR_INT_MODE_P (GET_MODE (op1))) |
2545 | op1 = SUBREG_REG (op1); |
2546 | } |
2547 | |
2548 | /* Canonicalize rotates by constant amount. We may canonicalize |
2549 | to reduce the immediate or if the ISA can rotate by constants |
2550 | in only on direction. */ |
2551 | if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1)) |
2552 | { |
2553 | op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (mode: scalar_mode) |
2554 | - INTVAL (op1))); |
2555 | left = !left; |
2556 | code = left ? LROTATE_EXPR : RROTATE_EXPR; |
2557 | } |
2558 | |
2559 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |
2560 | Note that this is not the case for bigger values. For instance a rotation |
2561 | of 0x01020304 by 16 bits gives 0x03040102 which is different from |
2562 | 0x04030201 (bswapsi). */ |
2563 | if (rotate |
2564 | && CONST_INT_P (op1) |
2565 | && INTVAL (op1) == BITS_PER_UNIT |
2566 | && GET_MODE_SIZE (mode: scalar_mode) == 2 |
2567 | && optab_handler (op: bswap_optab, mode) != CODE_FOR_nothing) |
2568 | return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp); |
2569 | |
2570 | if (op1 == const0_rtx) |
2571 | return shifted; |
2572 | |
2573 | /* Check whether its cheaper to implement a left shift by a constant |
2574 | bit count by a sequence of additions. */ |
2575 | if (code == LSHIFT_EXPR |
2576 | && CONST_INT_P (op1) |
2577 | && INTVAL (op1) > 0 |
2578 | && INTVAL (op1) < GET_MODE_PRECISION (mode: scalar_mode) |
2579 | && INTVAL (op1) < MAX_BITS_PER_WORD |
2580 | && (shift_cost (speed, mode, INTVAL (op1)) |
2581 | > INTVAL (op1) * add_cost (speed, mode)) |
2582 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) |
2583 | { |
2584 | int i; |
2585 | for (i = 0; i < INTVAL (op1); i++) |
2586 | { |
2587 | temp = force_reg (mode, shifted); |
2588 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, |
2589 | unsignedp, OPTAB_LIB_WIDEN); |
2590 | } |
2591 | return shifted; |
2592 | } |
2593 | |
2594 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
2595 | { |
2596 | enum optab_methods methods; |
2597 | |
2598 | if (attempt == 0) |
2599 | methods = OPTAB_DIRECT; |
2600 | else if (attempt == 1) |
2601 | methods = OPTAB_WIDEN; |
2602 | else |
2603 | methods = OPTAB_LIB_WIDEN; |
2604 | |
2605 | if (rotate) |
2606 | { |
2607 | /* Widening does not work for rotation. */ |
2608 | if (methods == OPTAB_WIDEN) |
2609 | continue; |
2610 | else if (methods == OPTAB_LIB_WIDEN) |
2611 | { |
2612 | /* If we have been unable to open-code this by a rotation, |
2613 | do it as the IOR of two shifts. I.e., to rotate A |
2614 | by N bits, compute |
2615 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) |
2616 | where C is the bitsize of A. |
2617 | |
2618 | It is theoretically possible that the target machine might |
2619 | not be able to perform either shift and hence we would |
2620 | be making two libcalls rather than just the one for the |
2621 | shift (similarly if IOR could not be done). We will allow |
2622 | this extremely unlikely lossage to avoid complicating the |
2623 | code below. */ |
2624 | |
2625 | rtx subtarget = target == shifted ? 0 : target; |
2626 | rtx new_amount, other_amount; |
2627 | rtx temp1; |
2628 | |
2629 | new_amount = op1; |
2630 | if (op1 == const0_rtx) |
2631 | return shifted; |
2632 | else if (CONST_INT_P (op1)) |
2633 | other_amount = gen_int_shift_amount |
2634 | (mode, GET_MODE_BITSIZE (mode: scalar_mode) - INTVAL (op1)); |
2635 | else |
2636 | { |
2637 | other_amount |
2638 | = simplify_gen_unary (code: NEG, GET_MODE (op1), |
2639 | op: op1, GET_MODE (op1)); |
2640 | HOST_WIDE_INT mask = GET_MODE_PRECISION (mode: scalar_mode) - 1; |
2641 | other_amount |
2642 | = simplify_gen_binary (code: AND, GET_MODE (op1), op0: other_amount, |
2643 | op1: gen_int_mode (mask, GET_MODE (op1))); |
2644 | } |
2645 | |
2646 | shifted = force_reg (mode, shifted); |
2647 | |
2648 | temp = expand_shift_1 (code: left ? LSHIFT_EXPR : RSHIFT_EXPR, |
2649 | mode, shifted, amount: new_amount, target: 0, unsignedp: 1); |
2650 | temp1 = expand_shift_1 (code: left ? RSHIFT_EXPR : LSHIFT_EXPR, |
2651 | mode, shifted, amount: other_amount, |
2652 | target: subtarget, unsignedp: 1); |
2653 | return expand_binop (mode, ior_optab, temp, temp1, target, |
2654 | unsignedp, methods); |
2655 | } |
2656 | |
2657 | temp = expand_binop (mode, |
2658 | left ? lrotate_optab : rrotate_optab, |
2659 | shifted, op1, target, unsignedp, methods); |
2660 | } |
2661 | else if (unsignedp) |
2662 | temp = expand_binop (mode, |
2663 | left ? lshift_optab : rshift_uns_optab, |
2664 | shifted, op1, target, unsignedp, methods); |
2665 | |
2666 | /* Do arithmetic shifts. |
2667 | Also, if we are going to widen the operand, we can just as well |
2668 | use an arithmetic right-shift instead of a logical one. */ |
2669 | if (temp == 0 && ! rotate |
2670 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) |
2671 | { |
2672 | enum optab_methods methods1 = methods; |
2673 | |
2674 | /* If trying to widen a log shift to an arithmetic shift, |
2675 | don't accept an arithmetic shift of the same size. */ |
2676 | if (unsignedp) |
2677 | methods1 = OPTAB_MUST_WIDEN; |
2678 | |
2679 | /* Arithmetic shift */ |
2680 | |
2681 | temp = expand_binop (mode, |
2682 | left ? lshift_optab : rshift_arith_optab, |
2683 | shifted, op1, target, unsignedp, methods1); |
2684 | } |
2685 | |
2686 | /* We used to try extzv here for logical right shifts, but that was |
2687 | only useful for one machine, the VAX, and caused poor code |
2688 | generation there for lshrdi3, so the code was deleted and a |
2689 | define_expand for lshrsi3 was added to vax.md. */ |
2690 | } |
2691 | |
2692 | gcc_assert (temp != NULL_RTX || may_fail); |
2693 | return temp; |
2694 | } |
2695 | |
2696 | /* Output a shift instruction for expression code CODE, |
2697 | with SHIFTED being the rtx for the value to shift, |
2698 | and AMOUNT the amount to shift by. |
2699 | Store the result in the rtx TARGET, if that is convenient. |
2700 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2701 | Return the rtx for where the value is. */ |
2702 | |
2703 | rtx |
2704 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2705 | poly_int64 amount, rtx target, int unsignedp) |
2706 | { |
2707 | return expand_shift_1 (code, mode, shifted, |
2708 | amount: gen_int_shift_amount (mode, amount), |
2709 | target, unsignedp); |
2710 | } |
2711 | |
2712 | /* Likewise, but return 0 if that cannot be done. */ |
2713 | |
2714 | rtx |
2715 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2716 | int amount, rtx target, int unsignedp) |
2717 | { |
2718 | return expand_shift_1 (code, mode, |
2719 | shifted, GEN_INT (amount), target, unsignedp, may_fail: true); |
2720 | } |
2721 | |
2722 | /* Output a shift instruction for expression code CODE, |
2723 | with SHIFTED being the rtx for the value to shift, |
2724 | and AMOUNT the tree for the amount to shift by. |
2725 | Store the result in the rtx TARGET, if that is convenient. |
2726 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
2727 | Return the rtx for where the value is. */ |
2728 | |
2729 | rtx |
2730 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |
2731 | tree amount, rtx target, int unsignedp) |
2732 | { |
2733 | return expand_shift_1 (code, mode, |
2734 | shifted, amount: expand_normal (exp: amount), target, unsignedp); |
2735 | } |
2736 | |
2737 | |
2738 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
2739 | const struct mult_cost *, machine_mode mode); |
2740 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |
2741 | const struct algorithm *, enum mult_variant); |
2742 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
2743 | static rtx extract_high_half (scalar_int_mode, rtx); |
2744 | static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); |
2745 | static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, |
2746 | int, int); |
2747 | /* Compute and return the best algorithm for multiplying by T. |
2748 | The algorithm must cost less than cost_limit |
2749 | If retval.cost >= COST_LIMIT, no algorithm was found and all |
2750 | other field of the returned struct are undefined. |
2751 | MODE is the machine mode of the multiplication. */ |
2752 | |
2753 | static void |
2754 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
2755 | const struct mult_cost *cost_limit, machine_mode mode) |
2756 | { |
2757 | int m; |
2758 | struct algorithm *alg_in, *best_alg; |
2759 | struct mult_cost best_cost; |
2760 | struct mult_cost new_limit; |
2761 | int op_cost, op_latency; |
2762 | unsigned HOST_WIDE_INT orig_t = t; |
2763 | unsigned HOST_WIDE_INT q; |
2764 | int maxm, hash_index; |
2765 | bool cache_hit = false; |
2766 | enum alg_code cache_alg = alg_zero; |
2767 | bool speed = optimize_insn_for_speed_p (); |
2768 | scalar_int_mode imode; |
2769 | struct alg_hash_entry *entry_ptr; |
2770 | |
2771 | /* Indicate that no algorithm is yet found. If no algorithm |
2772 | is found, this value will be returned and indicate failure. */ |
2773 | alg_out->cost.cost = cost_limit->cost + 1; |
2774 | alg_out->cost.latency = cost_limit->latency + 1; |
2775 | |
2776 | if (cost_limit->cost < 0 |
2777 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) |
2778 | return; |
2779 | |
2780 | /* Be prepared for vector modes. */ |
2781 | imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); |
2782 | |
2783 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); |
2784 | |
2785 | /* Restrict the bits of "t" to the multiplication's mode. */ |
2786 | t &= GET_MODE_MASK (imode); |
2787 | |
2788 | /* t == 1 can be done in zero cost. */ |
2789 | if (t == 1) |
2790 | { |
2791 | alg_out->ops = 1; |
2792 | alg_out->cost.cost = 0; |
2793 | alg_out->cost.latency = 0; |
2794 | alg_out->op[0] = alg_m; |
2795 | return; |
2796 | } |
2797 | |
2798 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
2799 | fail now. */ |
2800 | if (t == 0) |
2801 | { |
2802 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |
2803 | return; |
2804 | else |
2805 | { |
2806 | alg_out->ops = 1; |
2807 | alg_out->cost.cost = zero_cost (speed); |
2808 | alg_out->cost.latency = zero_cost (speed); |
2809 | alg_out->op[0] = alg_zero; |
2810 | return; |
2811 | } |
2812 | } |
2813 | |
2814 | /* We'll be needing a couple extra algorithm structures now. */ |
2815 | |
2816 | alg_in = XALLOCA (struct algorithm); |
2817 | best_alg = XALLOCA (struct algorithm); |
2818 | best_cost = *cost_limit; |
2819 | |
2820 | /* Compute the hash index. */ |
2821 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
2822 | |
2823 | /* See if we already know what to do for T. */ |
2824 | entry_ptr = alg_hash_entry_ptr (idx: hash_index); |
2825 | if (entry_ptr->t == t |
2826 | && entry_ptr->mode == mode |
2827 | && entry_ptr->speed == speed |
2828 | && entry_ptr->alg != alg_unknown) |
2829 | { |
2830 | cache_alg = entry_ptr->alg; |
2831 | |
2832 | if (cache_alg == alg_impossible) |
2833 | { |
2834 | /* The cache tells us that it's impossible to synthesize |
2835 | multiplication by T within entry_ptr->cost. */ |
2836 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) |
2837 | /* COST_LIMIT is at least as restrictive as the one |
2838 | recorded in the hash table, in which case we have no |
2839 | hope of synthesizing a multiplication. Just |
2840 | return. */ |
2841 | return; |
2842 | |
2843 | /* If we get here, COST_LIMIT is less restrictive than the |
2844 | one recorded in the hash table, so we may be able to |
2845 | synthesize a multiplication. Proceed as if we didn't |
2846 | have the cache entry. */ |
2847 | } |
2848 | else |
2849 | { |
2850 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |
2851 | /* The cached algorithm shows that this multiplication |
2852 | requires more cost than COST_LIMIT. Just return. This |
2853 | way, we don't clobber this cache entry with |
2854 | alg_impossible but retain useful information. */ |
2855 | return; |
2856 | |
2857 | cache_hit = true; |
2858 | |
2859 | switch (cache_alg) |
2860 | { |
2861 | case alg_shift: |
2862 | goto do_alg_shift; |
2863 | |
2864 | case alg_add_t_m2: |
2865 | case alg_sub_t_m2: |
2866 | goto do_alg_addsub_t_m2; |
2867 | |
2868 | case alg_add_factor: |
2869 | case alg_sub_factor: |
2870 | goto do_alg_addsub_factor; |
2871 | |
2872 | case alg_add_t2_m: |
2873 | goto do_alg_add_t2_m; |
2874 | |
2875 | case alg_sub_t2_m: |
2876 | goto do_alg_sub_t2_m; |
2877 | |
2878 | default: |
2879 | gcc_unreachable (); |
2880 | } |
2881 | } |
2882 | } |
2883 | |
2884 | /* If we have a group of zero bits at the low-order part of T, try |
2885 | multiplying by the remaining bits and then doing a shift. */ |
2886 | |
2887 | if ((t & 1) == 0) |
2888 | { |
2889 | do_alg_shift: |
2890 | m = ctz_or_zero (x: t); /* m = number of low zero bits */ |
2891 | if (m < maxm) |
2892 | { |
2893 | q = t >> m; |
2894 | /* The function expand_shift will choose between a shift and |
2895 | a sequence of additions, so the observed cost is given as |
2896 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |
2897 | op_cost = m * add_cost (speed, mode); |
2898 | if (shift_cost (speed, mode, bits: m) < op_cost) |
2899 | op_cost = shift_cost (speed, mode, bits: m); |
2900 | new_limit.cost = best_cost.cost - op_cost; |
2901 | new_limit.latency = best_cost.latency - op_cost; |
2902 | synth_mult (alg_out: alg_in, t: q, cost_limit: &new_limit, mode); |
2903 | |
2904 | alg_in->cost.cost += op_cost; |
2905 | alg_in->cost.latency += op_cost; |
2906 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2907 | { |
2908 | best_cost = alg_in->cost; |
2909 | std::swap (a&: alg_in, b&: best_alg); |
2910 | best_alg->log[best_alg->ops] = m; |
2911 | best_alg->op[best_alg->ops] = alg_shift; |
2912 | } |
2913 | |
2914 | /* See if treating ORIG_T as a signed number yields a better |
2915 | sequence. Try this sequence only for a negative ORIG_T |
2916 | as it would be useless for a non-negative ORIG_T. */ |
2917 | if ((HOST_WIDE_INT) orig_t < 0) |
2918 | { |
2919 | /* Shift ORIG_T as follows because a right shift of a |
2920 | negative-valued signed type is implementation |
2921 | defined. */ |
2922 | q = ~(~orig_t >> m); |
2923 | /* The function expand_shift will choose between a shift |
2924 | and a sequence of additions, so the observed cost is |
2925 | given as MIN (m * add_cost(speed, mode), |
2926 | shift_cost(speed, mode, m)). */ |
2927 | op_cost = m * add_cost (speed, mode); |
2928 | if (shift_cost (speed, mode, bits: m) < op_cost) |
2929 | op_cost = shift_cost (speed, mode, bits: m); |
2930 | new_limit.cost = best_cost.cost - op_cost; |
2931 | new_limit.latency = best_cost.latency - op_cost; |
2932 | synth_mult (alg_out: alg_in, t: q, cost_limit: &new_limit, mode); |
2933 | |
2934 | alg_in->cost.cost += op_cost; |
2935 | alg_in->cost.latency += op_cost; |
2936 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2937 | { |
2938 | best_cost = alg_in->cost; |
2939 | std::swap (a&: alg_in, b&: best_alg); |
2940 | best_alg->log[best_alg->ops] = m; |
2941 | best_alg->op[best_alg->ops] = alg_shift; |
2942 | } |
2943 | } |
2944 | } |
2945 | if (cache_hit) |
2946 | goto done; |
2947 | } |
2948 | |
2949 | /* If we have an odd number, add or subtract one. */ |
2950 | if ((t & 1) != 0) |
2951 | { |
2952 | unsigned HOST_WIDE_INT w; |
2953 | |
2954 | do_alg_addsub_t_m2: |
2955 | for (w = 1; (w & t) != 0; w <<= 1) |
2956 | ; |
2957 | /* If T was -1, then W will be zero after the loop. This is another |
2958 | case where T ends with ...111. Handling this with (T + 1) and |
2959 | subtract 1 produces slightly better code and results in algorithm |
2960 | selection much faster than treating it like the ...0111 case |
2961 | below. */ |
2962 | if (w == 0 |
2963 | || (w > 2 |
2964 | /* Reject the case where t is 3. |
2965 | Thus we prefer addition in that case. */ |
2966 | && t != 3)) |
2967 | { |
2968 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |
2969 | |
2970 | op_cost = add_cost (speed, mode); |
2971 | new_limit.cost = best_cost.cost - op_cost; |
2972 | new_limit.latency = best_cost.latency - op_cost; |
2973 | synth_mult (alg_out: alg_in, t: t + 1, cost_limit: &new_limit, mode); |
2974 | |
2975 | alg_in->cost.cost += op_cost; |
2976 | alg_in->cost.latency += op_cost; |
2977 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2978 | { |
2979 | best_cost = alg_in->cost; |
2980 | std::swap (a&: alg_in, b&: best_alg); |
2981 | best_alg->log[best_alg->ops] = 0; |
2982 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |
2983 | } |
2984 | } |
2985 | else |
2986 | { |
2987 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |
2988 | |
2989 | op_cost = add_cost (speed, mode); |
2990 | new_limit.cost = best_cost.cost - op_cost; |
2991 | new_limit.latency = best_cost.latency - op_cost; |
2992 | synth_mult (alg_out: alg_in, t: t - 1, cost_limit: &new_limit, mode); |
2993 | |
2994 | alg_in->cost.cost += op_cost; |
2995 | alg_in->cost.latency += op_cost; |
2996 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
2997 | { |
2998 | best_cost = alg_in->cost; |
2999 | std::swap (a&: alg_in, b&: best_alg); |
3000 | best_alg->log[best_alg->ops] = 0; |
3001 | best_alg->op[best_alg->ops] = alg_add_t_m2; |
3002 | } |
3003 | } |
3004 | |
3005 | /* We may be able to calculate a * -7, a * -15, a * -31, etc |
3006 | quickly with a - a * n for some appropriate constant n. */ |
3007 | m = exact_log2 (x: -orig_t + 1); |
3008 | if (m >= 0 && m < maxm) |
3009 | { |
3010 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3011 | /* If the target has a cheap shift-and-subtract insn use |
3012 | that in preference to a shift insn followed by a sub insn. |
3013 | Assume that the shift-and-sub is "atomic" with a latency |
3014 | equal to it's cost, otherwise assume that on superscalar |
3015 | hardware the shift may be executed concurrently with the |
3016 | earlier steps in the algorithm. */ |
3017 | if (shiftsub1_cost (speed, mode, bits: m) <= op_cost) |
3018 | { |
3019 | op_cost = shiftsub1_cost (speed, mode, bits: m); |
3020 | op_latency = op_cost; |
3021 | } |
3022 | else |
3023 | op_latency = add_cost (speed, mode); |
3024 | |
3025 | new_limit.cost = best_cost.cost - op_cost; |
3026 | new_limit.latency = best_cost.latency - op_latency; |
3027 | synth_mult (alg_out: alg_in, t: (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |
3028 | cost_limit: &new_limit, mode); |
3029 | |
3030 | alg_in->cost.cost += op_cost; |
3031 | alg_in->cost.latency += op_latency; |
3032 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3033 | { |
3034 | best_cost = alg_in->cost; |
3035 | std::swap (a&: alg_in, b&: best_alg); |
3036 | best_alg->log[best_alg->ops] = m; |
3037 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |
3038 | } |
3039 | } |
3040 | |
3041 | if (cache_hit) |
3042 | goto done; |
3043 | } |
3044 | |
3045 | /* Look for factors of t of the form |
3046 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
3047 | If we find such a factor, we can multiply by t using an algorithm that |
3048 | multiplies by q, shift the result by m and add/subtract it to itself. |
3049 | |
3050 | We search for large factors first and loop down, even if large factors |
3051 | are less probable than small; if we find a large factor we will find a |
3052 | good sequence quickly, and therefore be able to prune (by decreasing |
3053 | COST_LIMIT) the search. */ |
3054 | |
3055 | do_alg_addsub_factor: |
3056 | for (m = floor_log2 (x: t - 1); m >= 2; m--) |
3057 | { |
3058 | unsigned HOST_WIDE_INT d; |
3059 | |
3060 | d = (HOST_WIDE_INT_1U << m) + 1; |
3061 | if (t % d == 0 && t > d && m < maxm |
3062 | && (!cache_hit || cache_alg == alg_add_factor)) |
3063 | { |
3064 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3065 | if (shiftadd_cost (speed, mode, bits: m) <= op_cost) |
3066 | op_cost = shiftadd_cost (speed, mode, bits: m); |
3067 | |
3068 | op_latency = op_cost; |
3069 | |
3070 | |
3071 | new_limit.cost = best_cost.cost - op_cost; |
3072 | new_limit.latency = best_cost.latency - op_latency; |
3073 | synth_mult (alg_out: alg_in, t: t / d, cost_limit: &new_limit, mode); |
3074 | |
3075 | alg_in->cost.cost += op_cost; |
3076 | alg_in->cost.latency += op_latency; |
3077 | if (alg_in->cost.latency < op_cost) |
3078 | alg_in->cost.latency = op_cost; |
3079 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3080 | { |
3081 | best_cost = alg_in->cost; |
3082 | std::swap (a&: alg_in, b&: best_alg); |
3083 | best_alg->log[best_alg->ops] = m; |
3084 | best_alg->op[best_alg->ops] = alg_add_factor; |
3085 | } |
3086 | /* Other factors will have been taken care of in the recursion. */ |
3087 | break; |
3088 | } |
3089 | |
3090 | d = (HOST_WIDE_INT_1U << m) - 1; |
3091 | if (t % d == 0 && t > d && m < maxm |
3092 | && (!cache_hit || cache_alg == alg_sub_factor)) |
3093 | { |
3094 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, bits: m); |
3095 | if (shiftsub0_cost (speed, mode, bits: m) <= op_cost) |
3096 | op_cost = shiftsub0_cost (speed, mode, bits: m); |
3097 | |
3098 | op_latency = op_cost; |
3099 | |
3100 | new_limit.cost = best_cost.cost - op_cost; |
3101 | new_limit.latency = best_cost.latency - op_latency; |
3102 | synth_mult (alg_out: alg_in, t: t / d, cost_limit: &new_limit, mode); |
3103 | |
3104 | alg_in->cost.cost += op_cost; |
3105 | alg_in->cost.latency += op_latency; |
3106 | if (alg_in->cost.latency < op_cost) |
3107 | alg_in->cost.latency = op_cost; |
3108 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3109 | { |
3110 | best_cost = alg_in->cost; |
3111 | std::swap (a&: alg_in, b&: best_alg); |
3112 | best_alg->log[best_alg->ops] = m; |
3113 | best_alg->op[best_alg->ops] = alg_sub_factor; |
3114 | } |
3115 | break; |
3116 | } |
3117 | } |
3118 | if (cache_hit) |
3119 | goto done; |
3120 | |
3121 | /* Try shift-and-add (load effective address) instructions, |
3122 | i.e. do a*3, a*5, a*9. */ |
3123 | if ((t & 1) != 0) |
3124 | { |
3125 | do_alg_add_t2_m: |
3126 | q = t - 1; |
3127 | m = ctz_hwi (x: q); |
3128 | if (q && m < maxm) |
3129 | { |
3130 | op_cost = shiftadd_cost (speed, mode, bits: m); |
3131 | new_limit.cost = best_cost.cost - op_cost; |
3132 | new_limit.latency = best_cost.latency - op_cost; |
3133 | synth_mult (alg_out: alg_in, t: (t - 1) >> m, cost_limit: &new_limit, mode); |
3134 | |
3135 | alg_in->cost.cost += op_cost; |
3136 | alg_in->cost.latency += op_cost; |
3137 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3138 | { |
3139 | best_cost = alg_in->cost; |
3140 | std::swap (a&: alg_in, b&: best_alg); |
3141 | best_alg->log[best_alg->ops] = m; |
3142 | best_alg->op[best_alg->ops] = alg_add_t2_m; |
3143 | } |
3144 | } |
3145 | if (cache_hit) |
3146 | goto done; |
3147 | |
3148 | do_alg_sub_t2_m: |
3149 | q = t + 1; |
3150 | m = ctz_hwi (x: q); |
3151 | if (q && m < maxm) |
3152 | { |
3153 | op_cost = shiftsub0_cost (speed, mode, bits: m); |
3154 | new_limit.cost = best_cost.cost - op_cost; |
3155 | new_limit.latency = best_cost.latency - op_cost; |
3156 | synth_mult (alg_out: alg_in, t: (t + 1) >> m, cost_limit: &new_limit, mode); |
3157 | |
3158 | alg_in->cost.cost += op_cost; |
3159 | alg_in->cost.latency += op_cost; |
3160 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
3161 | { |
3162 | best_cost = alg_in->cost; |
3163 | std::swap (a&: alg_in, b&: best_alg); |
3164 | best_alg->log[best_alg->ops] = m; |
3165 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |
3166 | } |
3167 | } |
3168 | if (cache_hit) |
3169 | goto done; |
3170 | } |
3171 | |
3172 | done: |
3173 | /* If best_cost has not decreased, we have not found any algorithm. */ |
3174 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) |
3175 | { |
3176 | /* We failed to find an algorithm. Record alg_impossible for |
3177 | this case (that is, <T, MODE, COST_LIMIT>) so that next time |
3178 | we are asked to find an algorithm for T within the same or |
3179 | lower COST_LIMIT, we can immediately return to the |
3180 | caller. */ |
3181 | entry_ptr->t = t; |
3182 | entry_ptr->mode = mode; |
3183 | entry_ptr->speed = speed; |
3184 | entry_ptr->alg = alg_impossible; |
3185 | entry_ptr->cost = *cost_limit; |
3186 | return; |
3187 | } |
3188 | |
3189 | /* Cache the result. */ |
3190 | if (!cache_hit) |
3191 | { |
3192 | entry_ptr->t = t; |
3193 | entry_ptr->mode = mode; |
3194 | entry_ptr->speed = speed; |
3195 | entry_ptr->alg = best_alg->op[best_alg->ops]; |
3196 | entry_ptr->cost.cost = best_cost.cost; |
3197 | entry_ptr->cost.latency = best_cost.latency; |
3198 | } |
3199 | |
3200 | /* If we are getting a too long sequence for `struct algorithm' |
3201 | to record, make this search fail. */ |
3202 | if (best_alg->ops == MAX_BITS_PER_WORD) |
3203 | return; |
3204 | |
3205 | /* Copy the algorithm from temporary space to the space at alg_out. |
3206 | We avoid using structure assignment because the majority of |
3207 | best_alg is normally undefined, and this is a critical function. */ |
3208 | alg_out->ops = best_alg->ops + 1; |
3209 | alg_out->cost = best_cost; |
3210 | memcpy (dest: alg_out->op, src: best_alg->op, |
3211 | n: alg_out->ops * sizeof *alg_out->op); |
3212 | memcpy (dest: alg_out->log, src: best_alg->log, |
3213 | n: alg_out->ops * sizeof *alg_out->log); |
3214 | } |
3215 | |
3216 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
3217 | Try three variations: |
3218 | |
3219 | - a shift/add sequence based on VAL itself |
3220 | - a shift/add sequence based on -VAL, followed by a negation |
3221 | - a shift/add sequence based on VAL - 1, followed by an addition. |
3222 | |
3223 | Return true if the cheapest of these cost less than MULT_COST, |
3224 | describing the algorithm in *ALG and final fixup in *VARIANT. */ |
3225 | |
3226 | bool |
3227 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |
3228 | struct algorithm *alg, enum mult_variant *variant, |
3229 | int mult_cost) |
3230 | { |
3231 | struct algorithm alg2; |
3232 | struct mult_cost limit; |
3233 | int op_cost; |
3234 | bool speed = optimize_insn_for_speed_p (); |
3235 | |
3236 | /* Fail quickly for impossible bounds. */ |
3237 | if (mult_cost < 0) |
3238 | return false; |
3239 | |
3240 | /* Ensure that mult_cost provides a reasonable upper bound. |
3241 | Any constant multiplication can be performed with less |
3242 | than 2 * bits additions. */ |
3243 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |
3244 | if (mult_cost > op_cost) |
3245 | mult_cost = op_cost; |
3246 | |
3247 | *variant = basic_variant; |
3248 | limit.cost = mult_cost; |
3249 | limit.latency = mult_cost; |
3250 | synth_mult (alg_out: alg, t: val, cost_limit: &limit, mode); |
3251 | |
3252 | /* This works only if the inverted value actually fits in an |
3253 | `unsigned int' */ |
3254 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |
3255 | { |
3256 | op_cost = neg_cost (speed, mode); |
3257 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3258 | { |
3259 | limit.cost = alg->cost.cost - op_cost; |
3260 | limit.latency = alg->cost.latency - op_cost; |
3261 | } |
3262 | else |
3263 | { |
3264 | limit.cost = mult_cost - op_cost; |
3265 | limit.latency = mult_cost - op_cost; |
3266 | } |
3267 | |
3268 | synth_mult (alg_out: &alg2, t: -val, cost_limit: &limit, mode); |
3269 | alg2.cost.cost += op_cost; |
3270 | alg2.cost.latency += op_cost; |
3271 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
3272 | *alg = alg2, *variant = negate_variant; |
3273 | } |
3274 | |
3275 | /* This proves very useful for division-by-constant. */ |
3276 | op_cost = add_cost (speed, mode); |
3277 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |
3278 | { |
3279 | limit.cost = alg->cost.cost - op_cost; |
3280 | limit.latency = alg->cost.latency - op_cost; |
3281 | } |
3282 | else |
3283 | { |
3284 | limit.cost = mult_cost - op_cost; |
3285 | limit.latency = mult_cost - op_cost; |
3286 | } |
3287 | |
3288 | if (val != HOST_WIDE_INT_MIN |
3289 | || GET_MODE_UNIT_PRECISION (mode) == HOST_BITS_PER_WIDE_INT) |
3290 | { |
3291 | synth_mult (alg_out: &alg2, t: val - HOST_WIDE_INT_1U, cost_limit: &limit, mode); |
3292 | alg2.cost.cost += op_cost; |
3293 | alg2.cost.latency += op_cost; |
3294 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
3295 | *alg = alg2, *variant = add_variant; |
3296 | } |
3297 | |
3298 | return MULT_COST_LESS (&alg->cost, mult_cost); |
3299 | } |
3300 | |
3301 | /* A subroutine of expand_mult, used for constant multiplications. |
3302 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if |
3303 | convenient. Use the shift/add sequence described by ALG and apply |
3304 | the final fixup specified by VARIANT. */ |
3305 | |
3306 | static rtx |
3307 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |
3308 | rtx target, const struct algorithm *alg, |
3309 | enum mult_variant variant) |
3310 | { |
3311 | unsigned HOST_WIDE_INT val_so_far; |
3312 | rtx_insn *insn; |
3313 | rtx accum, tem; |
3314 | int opno; |
3315 | machine_mode nmode; |
3316 | |
3317 | /* Avoid referencing memory over and over and invalid sharing |
3318 | on SUBREGs. */ |
3319 | op0 = force_reg (mode, op0); |
3320 | |
3321 | /* ACCUM starts out either as OP0 or as a zero, depending on |
3322 | the first operation. */ |
3323 | |
3324 | if (alg->op[0] == alg_zero) |
3325 | { |
3326 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |
3327 | val_so_far = 0; |
3328 | } |
3329 | else if (alg->op[0] == alg_m) |
3330 | { |
3331 | accum = copy_to_mode_reg (mode, op0); |
3332 | val_so_far = 1; |
3333 | } |
3334 | else |
3335 | gcc_unreachable (); |
3336 | |
3337 | for (opno = 1; opno < alg->ops; opno++) |
3338 | { |
3339 | int log = alg->log[opno]; |
3340 | rtx shift_subtarget = optimize ? 0 : accum; |
3341 | rtx add_target |
3342 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant |
3343 | && !optimize) |
3344 | ? target : 0; |
3345 | rtx accum_target = optimize ? 0 : accum; |
3346 | rtx accum_inner; |
3347 | |
3348 | switch (alg->op[opno]) |
3349 | { |
3350 | case alg_shift: |
3351 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3352 | /* REG_EQUAL note will be attached to the following insn. */ |
3353 | emit_move_insn (accum, tem); |
3354 | val_so_far <<= log; |
3355 | break; |
3356 | |
3357 | case alg_add_t_m2: |
3358 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, amount: log, NULL_RTX, unsignedp: 0); |
3359 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3360 | add_target ? add_target : accum_target); |
3361 | val_so_far += HOST_WIDE_INT_1U << log; |
3362 | break; |
3363 | |
3364 | case alg_sub_t_m2: |
3365 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, amount: log, NULL_RTX, unsignedp: 0); |
3366 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
3367 | add_target ? add_target : accum_target); |
3368 | val_so_far -= HOST_WIDE_INT_1U << log; |
3369 | break; |
3370 | |
3371 | case alg_add_t2_m: |
3372 | accum = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, |
3373 | amount: log, target: shift_subtarget, unsignedp: 0); |
3374 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
3375 | add_target ? add_target : accum_target); |
3376 | val_so_far = (val_so_far << log) + 1; |
3377 | break; |
3378 | |
3379 | case alg_sub_t2_m: |
3380 | accum = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, |
3381 | amount: log, target: shift_subtarget, unsignedp: 0); |
3382 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
3383 | add_target ? add_target : accum_target); |
3384 | val_so_far = (val_so_far << log) - 1; |
3385 | break; |
3386 | |
3387 | case alg_add_factor: |
3388 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3389 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
3390 | add_target ? add_target : accum_target); |
3391 | val_so_far += val_so_far << log; |
3392 | break; |
3393 | |
3394 | case alg_sub_factor: |
3395 | tem = expand_shift (code: LSHIFT_EXPR, mode, shifted: accum, amount: log, NULL_RTX, unsignedp: 0); |
3396 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
3397 | (add_target |
3398 | ? add_target : (optimize ? 0 : tem))); |
3399 | val_so_far = (val_so_far << log) - val_so_far; |
3400 | break; |
3401 | |
3402 | default: |
3403 | gcc_unreachable (); |
3404 | } |
3405 | |
3406 | if (SCALAR_INT_MODE_P (mode)) |
3407 | { |
3408 | /* Write a REG_EQUAL note on the last insn so that we can cse |
3409 | multiplication sequences. Note that if ACCUM is a SUBREG, |
3410 | we've set the inner register and must properly indicate that. */ |
3411 | tem = op0, nmode = mode; |
3412 | accum_inner = accum; |
3413 | if (GET_CODE (accum) == SUBREG) |
3414 | { |
3415 | accum_inner = SUBREG_REG (accum); |
3416 | nmode = GET_MODE (accum_inner); |
3417 | tem = gen_lowpart (nmode, op0); |
3418 | } |
3419 | |
3420 | /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG. |
3421 | In that case, only the low bits of accum would be guaranteed to |
3422 | be equal to the content of the REG_EQUAL note, the upper bits |
3423 | can be anything. */ |
3424 | if (!paradoxical_subreg_p (x: tem)) |
3425 | { |
3426 | insn = get_last_insn (); |
3427 | wide_int wval_so_far |
3428 | = wi::uhwi (val: val_so_far, |
3429 | precision: GET_MODE_PRECISION (mode: as_a <scalar_mode> (m: nmode))); |
3430 | rtx c = immed_wide_int_const (wval_so_far, nmode); |
3431 | set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c), |
3432 | accum_inner); |
3433 | } |
3434 | } |
3435 | } |
3436 | |
3437 | if (variant == negate_variant) |
3438 | { |
3439 | val_so_far = -val_so_far; |
3440 | accum = expand_unop (mode, neg_optab, accum, target, 0); |
3441 | } |
3442 | else if (variant == add_variant) |
3443 | { |
3444 | val_so_far = val_so_far + 1; |
3445 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); |
3446 | } |
3447 | |
3448 | /* Compare only the bits of val and val_so_far that are significant |
3449 | in the result mode, to avoid sign-/zero-extension confusion. */ |
3450 | nmode = GET_MODE_INNER (mode); |
3451 | val &= GET_MODE_MASK (nmode); |
3452 | val_so_far &= GET_MODE_MASK (nmode); |
3453 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |
3454 | |
3455 | return accum; |
3456 | } |
3457 | |
3458 | /* Perform a multiplication and return an rtx for the result. |
3459 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |
3460 | TARGET is a suggestion for where to store the result (an rtx). |
3461 | |
3462 | We check specially for a constant integer as OP1. |
3463 | If you want this check for OP0 as well, then before calling |
3464 | you should swap the two operands if OP0 would be constant. */ |
3465 | |
3466 | rtx |
3467 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
3468 | int unsignedp, bool no_libcall) |
3469 | { |
3470 | enum mult_variant variant; |
3471 | struct algorithm algorithm; |
3472 | rtx scalar_op1; |
3473 | int max_cost; |
3474 | bool speed = optimize_insn_for_speed_p (); |
3475 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |
3476 | |
3477 | if (CONSTANT_P (op0)) |
3478 | std::swap (a&: op0, b&: op1); |
3479 | |
3480 | /* For vectors, there are several simplifications that can be made if |
3481 | all elements of the vector constant are identical. */ |
3482 | scalar_op1 = unwrap_const_vec_duplicate (x: op1); |
3483 | |
3484 | if (INTEGRAL_MODE_P (mode)) |
3485 | { |
3486 | rtx fake_reg; |
3487 | HOST_WIDE_INT coeff; |
3488 | bool is_neg; |
3489 | int mode_bitsize; |
3490 | |
3491 | if (op1 == CONST0_RTX (mode)) |
3492 | return op1; |
3493 | if (op1 == CONST1_RTX (mode)) |
3494 | return op0; |
3495 | if (op1 == CONSTM1_RTX (mode)) |
3496 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, |
3497 | op0, target, 0); |
3498 | |
3499 | if (do_trapv) |
3500 | goto skip_synth; |
3501 | |
3502 | /* If mode is integer vector mode, check if the backend supports |
3503 | vector lshift (by scalar or vector) at all. If not, we can't use |
3504 | synthetized multiply. */ |
3505 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
3506 | && optab_handler (op: vashl_optab, mode) == CODE_FOR_nothing |
3507 | && optab_handler (op: ashl_optab, mode) == CODE_FOR_nothing) |
3508 | goto skip_synth; |
3509 | |
3510 | /* These are the operations that are potentially turned into |
3511 | a sequence of shifts and additions. */ |
3512 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |
3513 | |
3514 | /* synth_mult does an `unsigned int' multiply. As long as the mode is |
3515 | less than or equal in size to `unsigned int' this doesn't matter. |
3516 | If the mode is larger than `unsigned int', then synth_mult works |
3517 | only if the constant value exactly fits in an `unsigned int' without |
3518 | any truncation. This means that multiplying by negative values does |
3519 | not work; results are off by 2^32 on a 32 bit machine. */ |
3520 | if (CONST_INT_P (scalar_op1)) |
3521 | { |
3522 | coeff = INTVAL (scalar_op1); |
3523 | is_neg = coeff < 0; |
3524 | } |
3525 | #if TARGET_SUPPORTS_WIDE_INT |
3526 | else if (CONST_WIDE_INT_P (scalar_op1)) |
3527 | #else |
3528 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |
3529 | #endif |
3530 | { |
3531 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |
3532 | /* Perfect power of 2 (other than 1, which is handled above). */ |
3533 | if (shift > 0) |
3534 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3535 | amount: shift, target, unsignedp); |
3536 | else |
3537 | goto skip_synth; |
3538 | } |
3539 | else |
3540 | goto skip_synth; |
3541 | |
3542 | /* We used to test optimize here, on the grounds that it's better to |
3543 | produce a smaller program when -O is not used. But this causes |
3544 | such a terrible slowdown sometimes that it seems better to always |
3545 | use synth_mult. */ |
3546 | |
3547 | /* Special case powers of two. */ |
3548 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |
3549 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) |
3550 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3551 | amount: floor_log2 (x: coeff), target, unsignedp); |
3552 | |
3553 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
3554 | |
3555 | /* Attempt to handle multiplication of DImode values by negative |
3556 | coefficients, by performing the multiplication by a positive |
3557 | multiplier and then inverting the result. */ |
3558 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |
3559 | { |
3560 | /* Its safe to use -coeff even for INT_MIN, as the |
3561 | result is interpreted as an unsigned coefficient. |
3562 | Exclude cost of op0 from max_cost to match the cost |
3563 | calculation of the synth_mult. */ |
3564 | coeff = -(unsigned HOST_WIDE_INT) coeff; |
3565 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |
3566 | mode, speed_p: speed) |
3567 | - neg_cost (speed, mode)); |
3568 | if (max_cost <= 0) |
3569 | goto skip_synth; |
3570 | |
3571 | /* Special case powers of two. */ |
3572 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |
3573 | { |
3574 | rtx temp = expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3575 | amount: floor_log2 (x: coeff), target, unsignedp); |
3576 | return expand_unop (mode, neg_optab, temp, target, 0); |
3577 | } |
3578 | |
3579 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, |
3580 | mult_cost: max_cost)) |
3581 | { |
3582 | rtx temp = expand_mult_const (mode, op0, val: coeff, NULL_RTX, |
3583 | alg: &algorithm, variant); |
3584 | return expand_unop (mode, neg_optab, temp, target, 0); |
3585 | } |
3586 | goto skip_synth; |
3587 | } |
3588 | |
3589 | /* Exclude cost of op0 from max_cost to match the cost |
3590 | calculation of the synth_mult. */ |
3591 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed_p: speed); |
3592 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, mult_cost: max_cost)) |
3593 | return expand_mult_const (mode, op0, val: coeff, target, |
3594 | alg: &algorithm, variant); |
3595 | } |
3596 | skip_synth: |
3597 | |
3598 | /* Expand x*2.0 as x+x. */ |
3599 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |
3600 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) |
3601 | { |
3602 | op0 = force_reg (GET_MODE (op0), op0); |
3603 | return expand_binop (mode, add_optab, op0, op0, |
3604 | target, unsignedp, |
3605 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |
3606 | } |
3607 | |
3608 | /* This used to use umul_optab if unsigned, but for non-widening multiply |
3609 | there is no difference between signed and unsigned. */ |
3610 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |
3611 | op0, op1, target, unsignedp, |
3612 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |
3613 | gcc_assert (op0 || no_libcall); |
3614 | return op0; |
3615 | } |
3616 | |
3617 | /* Return a cost estimate for multiplying a register by the given |
3618 | COEFFicient in the given MODE and SPEED. */ |
3619 | |
3620 | int |
3621 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |
3622 | { |
3623 | int max_cost; |
3624 | struct algorithm algorithm; |
3625 | enum mult_variant variant; |
3626 | |
3627 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
3628 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |
3629 | mode, speed_p: speed); |
3630 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, mult_cost: max_cost)) |
3631 | return algorithm.cost.cost; |
3632 | else |
3633 | return max_cost; |
3634 | } |
3635 | |
3636 | /* Perform a widening multiplication and return an rtx for the result. |
3637 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |
3638 | TARGET is a suggestion for where to store the result (an rtx). |
3639 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab |
3640 | or smul_widen_optab. |
3641 | |
3642 | We check specially for a constant integer as OP1, comparing the |
3643 | cost of a widening multiply against the cost of a sequence of shifts |
3644 | and adds. */ |
3645 | |
3646 | rtx |
3647 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |
3648 | int unsignedp, optab this_optab) |
3649 | { |
3650 | bool speed = optimize_insn_for_speed_p (); |
3651 | rtx cop1; |
3652 | |
3653 | if (CONST_INT_P (op1) |
3654 | && GET_MODE (op0) != VOIDmode |
3655 | && (cop1 = convert_modes (mode, GET_MODE (op0), x: op1, |
3656 | unsignedp: this_optab == umul_widen_optab)) |
3657 | && CONST_INT_P (cop1) |
3658 | && (INTVAL (cop1) >= 0 |
3659 | || HWI_COMPUTABLE_MODE_P (mode))) |
3660 | { |
3661 | HOST_WIDE_INT coeff = INTVAL (cop1); |
3662 | int max_cost; |
3663 | enum mult_variant variant; |
3664 | struct algorithm algorithm; |
3665 | |
3666 | if (coeff == 0) |
3667 | return CONST0_RTX (mode); |
3668 | |
3669 | /* Special case powers of two. */ |
3670 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |
3671 | { |
3672 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |
3673 | return expand_shift (code: LSHIFT_EXPR, mode, shifted: op0, |
3674 | amount: floor_log2 (x: coeff), target, unsignedp); |
3675 | } |
3676 | |
3677 | /* Exclude cost of op0 from max_cost to match the cost |
3678 | calculation of the synth_mult. */ |
3679 | max_cost = mul_widen_cost (speed, mode); |
3680 | if (choose_mult_variant (mode, val: coeff, alg: &algorithm, variant: &variant, |
3681 | mult_cost: max_cost)) |
3682 | { |
3683 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |
3684 | return expand_mult_const (mode, op0, val: coeff, target, |
3685 | alg: &algorithm, variant); |
3686 | } |
3687 | } |
3688 | return expand_binop (mode, this_optab, op0, op1, target, |
3689 | unsignedp, OPTAB_LIB_WIDEN); |
3690 | } |
3691 | |
3692 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |
3693 | replace division by D, and put the least significant N bits of the result |
3694 | in *MULTIPLIER_PTR and return the most significant bit. |
3695 | |
3696 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the |
3697 | needed precision is in PRECISION (should be <= N). |
3698 | |
3699 | PRECISION should be as small as possible so this function can choose |
3700 | multiplier more freely. |
3701 | |
3702 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that |
3703 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. |
3704 | |
3705 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), |
3706 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ |
3707 | |
3708 | unsigned HOST_WIDE_INT |
3709 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |
3710 | unsigned HOST_WIDE_INT *multiplier_ptr, |
3711 | int *post_shift_ptr, int *lgup_ptr) |
3712 | { |
3713 | int lgup, post_shift; |
3714 | int pow, pow2; |
3715 | |
3716 | /* lgup = ceil(log2(divisor)); */ |
3717 | lgup = ceil_log2 (x: d); |
3718 | |
3719 | gcc_assert (lgup <= n); |
3720 | |
3721 | pow = n + lgup; |
3722 | pow2 = n + lgup - precision; |
3723 | |
3724 | /* mlow = 2^(N + lgup)/d */ |
3725 | wide_int val = wi::set_bit_in_zero (bit: pow, HOST_BITS_PER_DOUBLE_INT); |
3726 | wide_int mlow = wi::udiv_trunc (x: val, y: d); |
3727 | |
3728 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |
3729 | val |= wi::set_bit_in_zero (bit: pow2, HOST_BITS_PER_DOUBLE_INT); |
3730 | wide_int mhigh = wi::udiv_trunc (x: val, y: d); |
3731 | |
3732 | /* If precision == N, then mlow, mhigh exceed 2^N |
3733 | (but they do not exceed 2^(N+1)). */ |
3734 | |
3735 | /* Reduce to lowest terms. */ |
3736 | for (post_shift = lgup; post_shift > 0; post_shift--) |
3737 | { |
3738 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (x: mlow, bitpos: 1, |
3739 | HOST_BITS_PER_WIDE_INT); |
3740 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (x: mhigh, bitpos: 1, |
3741 | HOST_BITS_PER_WIDE_INT); |
3742 | if (ml_lo >= mh_lo) |
3743 | break; |
3744 | |
3745 | mlow = wi::uhwi (val: ml_lo, HOST_BITS_PER_DOUBLE_INT); |
3746 | mhigh = wi::uhwi (val: mh_lo, HOST_BITS_PER_DOUBLE_INT); |
3747 | } |
3748 | |
3749 | *post_shift_ptr = post_shift; |
3750 | *lgup_ptr = lgup; |
3751 | if (n < HOST_BITS_PER_WIDE_INT) |
3752 | { |
3753 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |
3754 | *multiplier_ptr = mhigh.to_uhwi () & mask; |
3755 | return mhigh.to_uhwi () > mask; |
3756 | } |
3757 | else |
3758 | { |
3759 | *multiplier_ptr = mhigh.to_uhwi (); |
3760 | return wi::extract_uhwi (x: mhigh, HOST_BITS_PER_WIDE_INT, width: 1); |
3761 | } |
3762 | } |
3763 | |
3764 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is |
3765 | congruent to 1 (mod 2**N). */ |
3766 | |
3767 | static unsigned HOST_WIDE_INT |
3768 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |
3769 | { |
3770 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |
3771 | |
3772 | /* The algorithm notes that the choice y = x satisfies |
3773 | x*y == 1 mod 2^3, since x is assumed odd. |
3774 | Each iteration doubles the number of bits of significance in y. */ |
3775 | |
3776 | unsigned HOST_WIDE_INT mask; |
3777 | unsigned HOST_WIDE_INT y = x; |
3778 | int nbit = 3; |
3779 | |
3780 | mask = (n == HOST_BITS_PER_WIDE_INT |
3781 | ? HOST_WIDE_INT_M1U |
3782 | : (HOST_WIDE_INT_1U << n) - 1); |
3783 | |
3784 | while (nbit < n) |
3785 | { |
3786 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ |
3787 | nbit *= 2; |
3788 | } |
3789 | return y; |
3790 | } |
3791 | |
3792 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness |
3793 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the |
3794 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product |
3795 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to |
3796 | become signed. |
3797 | |
3798 | The result is put in TARGET if that is convenient. |
3799 | |
3800 | MODE is the mode of operation. */ |
3801 | |
3802 | rtx |
3803 | expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, |
3804 | rtx op1, rtx target, int unsignedp) |
3805 | { |
3806 | rtx tem; |
3807 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; |
3808 | |
3809 | tem = expand_shift (code: RSHIFT_EXPR, mode, shifted: op0, |
3810 | amount: GET_MODE_BITSIZE (mode) - 1, NULL_RTX, unsignedp: 0); |
3811 | tem = expand_and (mode, tem, op1, NULL_RTX); |
3812 | adj_operand |
3813 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3814 | adj_operand); |
3815 | |
3816 | tem = expand_shift (code: RSHIFT_EXPR, mode, shifted: op1, |
3817 | amount: GET_MODE_BITSIZE (mode) - 1, NULL_RTX, unsignedp: 0); |
3818 | tem = expand_and (mode, tem, op0, NULL_RTX); |
3819 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |
3820 | target); |
3821 | |
3822 | return target; |
3823 | } |
3824 | |
3825 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |
3826 | |
3827 | static rtx |
3828 | (scalar_int_mode mode, rtx op) |
3829 | { |
3830 | if (mode == word_mode) |
3831 | return gen_highpart (mode, op); |
3832 | |
3833 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3834 | |
3835 | op = expand_shift (code: RSHIFT_EXPR, mode: wider_mode, shifted: op, |
3836 | amount: GET_MODE_BITSIZE (mode), target: 0, unsignedp: 1); |
3837 | return convert_modes (mode, oldmode: wider_mode, x: op, unsignedp: 0); |
3838 | } |
3839 | |
3840 | /* Like expmed_mult_highpart, but only consider using a multiplication |
3841 | optab. OP1 is an rtx for the constant operand. */ |
3842 | |
3843 | static rtx |
3844 | expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, |
3845 | rtx target, int unsignedp, int max_cost) |
3846 | { |
3847 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |
3848 | optab moptab; |
3849 | rtx tem; |
3850 | int size; |
3851 | bool speed = optimize_insn_for_speed_p (); |
3852 | |
3853 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3854 | |
3855 | size = GET_MODE_BITSIZE (mode); |
3856 | |
3857 | /* Firstly, try using a multiplication insn that only generates the needed |
3858 | high part of the product, and in the sign flavor of unsignedp. */ |
3859 | if (mul_highpart_cost (speed, mode) < max_cost) |
3860 | { |
3861 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |
3862 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
3863 | unsignedp, OPTAB_DIRECT); |
3864 | if (tem) |
3865 | return tem; |
3866 | } |
3867 | |
3868 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. |
3869 | Need to adjust the result after the multiplication. */ |
3870 | if (size - 1 < BITS_PER_WORD |
3871 | && (mul_highpart_cost (speed, mode) |
3872 | + 2 * shift_cost (speed, mode, bits: size-1) |
3873 | + 4 * add_cost (speed, mode) < max_cost)) |
3874 | { |
3875 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |
3876 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |
3877 | unsignedp, OPTAB_DIRECT); |
3878 | if (tem) |
3879 | /* We used the wrong signedness. Adjust the result. */ |
3880 | return expand_mult_highpart_adjust (mode, adj_operand: tem, op0, op1: narrow_op1, |
3881 | target: tem, unsignedp); |
3882 | } |
3883 | |
3884 | /* Try widening multiplication. */ |
3885 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |
3886 | if (convert_optab_handler (op: moptab, to_mode: wider_mode, from_mode: mode) != CODE_FOR_nothing |
3887 | && mul_widen_cost (speed, mode: wider_mode) < max_cost) |
3888 | { |
3889 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |
3890 | unsignedp, OPTAB_WIDEN); |
3891 | if (tem) |
3892 | return extract_high_half (mode, op: tem); |
3893 | } |
3894 | |
3895 | /* Try widening the mode and perform a non-widening multiplication. */ |
3896 | if (optab_handler (op: smul_optab, mode: wider_mode) != CODE_FOR_nothing |
3897 | && size - 1 < BITS_PER_WORD |
3898 | && (mul_cost (speed, mode: wider_mode) + shift_cost (speed, mode, bits: size-1) |
3899 | < max_cost)) |
3900 | { |
3901 | rtx_insn *insns; |
3902 | rtx wop0, wop1; |
3903 | |
3904 | /* We need to widen the operands, for example to ensure the |
3905 | constant multiplier is correctly sign or zero extended. |
3906 | Use a sequence to clean-up any instructions emitted by |
3907 | the conversions if things don't work out. */ |
3908 | start_sequence (); |
3909 | wop0 = convert_modes (mode: wider_mode, oldmode: mode, x: op0, unsignedp); |
3910 | wop1 = convert_modes (mode: wider_mode, oldmode: mode, x: op1, unsignedp); |
3911 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, |
3912 | unsignedp, OPTAB_WIDEN); |
3913 | insns = get_insns (); |
3914 | end_sequence (); |
3915 | |
3916 | if (tem) |
3917 | { |
3918 | emit_insn (insns); |
3919 | return extract_high_half (mode, op: tem); |
3920 | } |
3921 | } |
3922 | |
3923 | /* Try widening multiplication of opposite signedness, and adjust. */ |
3924 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; |
3925 | if (convert_optab_handler (op: moptab, to_mode: wider_mode, from_mode: mode) != CODE_FOR_nothing |
3926 | && size - 1 < BITS_PER_WORD |
3927 | && (mul_widen_cost (speed, mode: wider_mode) |
3928 | + 2 * shift_cost (speed, mode, bits: size-1) |
3929 | + 4 * add_cost (speed, mode) < max_cost)) |
3930 | { |
3931 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |
3932 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |
3933 | if (tem != 0) |
3934 | { |
3935 | tem = extract_high_half (mode, op: tem); |
3936 | /* We used the wrong signedness. Adjust the result. */ |
3937 | return expand_mult_highpart_adjust (mode, adj_operand: tem, op0, op1: narrow_op1, |
3938 | target, unsignedp); |
3939 | } |
3940 | } |
3941 | |
3942 | return 0; |
3943 | } |
3944 | |
3945 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |
3946 | putting the high half of the result in TARGET if that is convenient, |
3947 | and return where the result is. If the operation cannot be performed, |
3948 | 0 is returned. |
3949 | |
3950 | MODE is the mode of operation and result. |
3951 | |
3952 | UNSIGNEDP nonzero means unsigned multiply. |
3953 | |
3954 | MAX_COST is the total allowed cost for the expanded RTL. */ |
3955 | |
3956 | static rtx |
3957 | expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, |
3958 | rtx target, int unsignedp, int max_cost) |
3959 | { |
3960 | unsigned HOST_WIDE_INT cnst1; |
3961 | int ; |
3962 | bool sign_adjust = false; |
3963 | enum mult_variant variant; |
3964 | struct algorithm alg; |
3965 | rtx tem; |
3966 | bool speed = optimize_insn_for_speed_p (); |
3967 | |
3968 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |
3969 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |
3970 | |
3971 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |
3972 | |
3973 | /* We can't optimize modes wider than BITS_PER_WORD. |
3974 | ??? We might be able to perform double-word arithmetic if |
3975 | mode == word_mode, however all the cost calculations in |
3976 | synth_mult etc. assume single-word operations. */ |
3977 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (m: mode).require (); |
3978 | if (GET_MODE_BITSIZE (mode: wider_mode) > BITS_PER_WORD) |
3979 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
3980 | unsignedp, max_cost); |
3981 | |
3982 | extra_cost = shift_cost (speed, mode, bits: GET_MODE_BITSIZE (mode) - 1); |
3983 | |
3984 | /* Check whether we try to multiply by a negative constant. */ |
3985 | if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1)) |
3986 | { |
3987 | sign_adjust = true; |
3988 | extra_cost += add_cost (speed, mode); |
3989 | } |
3990 | |
3991 | /* See whether shift/add multiplication is cheap enough. */ |
3992 | if (choose_mult_variant (mode: wider_mode, val: cnst1, alg: &alg, variant: &variant, |
3993 | mult_cost: max_cost - extra_cost)) |
3994 | { |
3995 | /* See whether the specialized multiplication optabs are |
3996 | cheaper than the shift/add version. */ |
3997 | tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp, |
3998 | max_cost: alg.cost.cost + extra_cost); |
3999 | if (tem) |
4000 | return tem; |
4001 | |
4002 | tem = convert_to_mode (wider_mode, op0, unsignedp); |
4003 | tem = expand_mult_const (mode: wider_mode, op0: tem, val: cnst1, target: 0, alg: &alg, variant); |
4004 | tem = extract_high_half (mode, op: tem); |
4005 | |
4006 | /* Adjust result for signedness. */ |
4007 | if (sign_adjust) |
4008 | tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem); |
4009 | |
4010 | return tem; |
4011 | } |
4012 | return expmed_mult_highpart_optab (mode, op0, op1, target, |
4013 | unsignedp, max_cost); |
4014 | } |
4015 | |
4016 | |
4017 | /* Expand signed modulus of OP0 by a power of two D in mode MODE. */ |
4018 | |
4019 | static rtx |
4020 | expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
4021 | { |
4022 | rtx result, temp, shift; |
4023 | rtx_code_label *label; |
4024 | int logd; |
4025 | int prec = GET_MODE_PRECISION (mode); |
4026 | |
4027 | logd = floor_log2 (x: d); |
4028 | result = gen_reg_rtx (mode); |
4029 | |
4030 | /* Avoid conditional branches when they're expensive. */ |
4031 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2 |
4032 | && optimize_insn_for_speed_p ()) |
4033 | { |
4034 | rtx signmask = emit_store_flag (result, LT, op0, const0_rtx, |
4035 | mode, 0, -1); |
4036 | if (signmask) |
4037 | { |
4038 | HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1; |
4039 | signmask = force_reg (mode, signmask); |
4040 | shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd); |
4041 | |
4042 | /* Use the rtx_cost of a LSHIFTRT instruction to determine |
4043 | which instruction sequence to use. If logical right shifts |
4044 | are expensive the use 2 XORs, 2 SUBs and an AND, otherwise |
4045 | use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */ |
4046 | |
4047 | temp = gen_rtx_LSHIFTRT (mode, result, shift); |
4048 | if (optab_handler (op: lshr_optab, mode) == CODE_FOR_nothing |
4049 | || (set_src_cost (x: temp, mode, speed_p: optimize_insn_for_speed_p ()) |
4050 | > COSTS_N_INSNS (2))) |
4051 | { |
4052 | temp = expand_binop (mode, xor_optab, op0, signmask, |
4053 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4054 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4055 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4056 | temp = expand_binop (mode, and_optab, temp, |
4057 | gen_int_mode (masklow, mode), |
4058 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4059 | temp = expand_binop (mode, xor_optab, temp, signmask, |
4060 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4061 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4062 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4063 | } |
4064 | else |
4065 | { |
4066 | signmask = expand_binop (mode, lshr_optab, signmask, shift, |
4067 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4068 | signmask = force_reg (mode, signmask); |
4069 | |
4070 | temp = expand_binop (mode, add_optab, op0, signmask, |
4071 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4072 | temp = expand_binop (mode, and_optab, temp, |
4073 | gen_int_mode (masklow, mode), |
4074 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4075 | temp = expand_binop (mode, sub_optab, temp, signmask, |
4076 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |
4077 | } |
4078 | return temp; |
4079 | } |
4080 | } |
4081 | |
4082 | /* Mask contains the mode's signbit and the significant bits of the |
4083 | modulus. By including the signbit in the operation, many targets |
4084 | can avoid an explicit compare operation in the following comparison |
4085 | against zero. */ |
4086 | wide_int mask = wi::mask (width: logd, negate_p: false, precision: prec); |
4087 | mask = wi::set_bit (x: mask, bit: prec - 1); |
4088 | |
4089 | temp = expand_binop (mode, and_optab, op0, |
4090 | immed_wide_int_const (mask, mode), |
4091 | result, 1, OPTAB_LIB_WIDEN); |
4092 | if (temp != result) |
4093 | emit_move_insn (result, temp); |
4094 | |
4095 | label = gen_label_rtx (); |
4096 | do_cmp_and_jump (result, const0_rtx, GE, mode, label); |
4097 | |
4098 | temp = expand_binop (mode, sub_optab, result, const1_rtx, result, |
4099 | 0, OPTAB_LIB_WIDEN); |
4100 | |
4101 | mask = wi::mask (width: logd, negate_p: true, precision: prec); |
4102 | temp = expand_binop (mode, ior_optab, temp, |
4103 | immed_wide_int_const (mask, mode), |
4104 | result, 1, OPTAB_LIB_WIDEN); |
4105 | temp = expand_binop (mode, add_optab, temp, const1_rtx, result, |
4106 | 0, OPTAB_LIB_WIDEN); |
4107 | if (temp != result) |
4108 | emit_move_insn (result, temp); |
4109 | emit_label (label); |
4110 | return result; |
4111 | } |
4112 | |
4113 | /* Expand signed division of OP0 by a power of two D in mode MODE. |
4114 | This routine is only called for positive values of D. */ |
4115 | |
4116 | static rtx |
4117 | expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d) |
4118 | { |
4119 | rtx temp; |
4120 | rtx_code_label *label; |
4121 | int logd; |
4122 | |
4123 | logd = floor_log2 (x: d); |
4124 | |
4125 | if (d == 2 |
4126 | && BRANCH_COST (optimize_insn_for_speed_p (), |
4127 | false) >= 1) |
4128 | { |
4129 | temp = gen_reg_rtx (mode); |
4130 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1); |
4131 | if (temp != NULL_RTX) |
4132 | { |
4133 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
4134 | 0, OPTAB_LIB_WIDEN); |
4135 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4136 | } |
4137 | } |
4138 | |
4139 | if (HAVE_conditional_move |
4140 | && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2) |
4141 | { |
4142 | rtx temp2; |
4143 | |
4144 | start_sequence (); |
4145 | temp2 = copy_to_mode_reg (mode, op0); |
4146 | temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode), |
4147 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4148 | temp = force_reg (mode, temp); |
4149 | |
4150 | /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */ |
4151 | temp2 = emit_conditional_move (temp2, { .code: LT, .op0: temp2, const0_rtx, .mode: mode }, |
4152 | temp, temp2, mode, 0); |
4153 | if (temp2) |
4154 | { |
4155 | rtx_insn *seq = get_insns (); |
4156 | end_sequence (); |
4157 | emit_insn (seq); |
4158 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp2, amount: logd, NULL_RTX, unsignedp: 0); |
4159 | } |
4160 | end_sequence (); |
4161 | } |
4162 | |
4163 | if (BRANCH_COST (optimize_insn_for_speed_p (), |
4164 | false) >= 2) |
4165 | { |
4166 | int ushift = GET_MODE_BITSIZE (mode) - logd; |
4167 | |
4168 | temp = gen_reg_rtx (mode); |
4169 | temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1); |
4170 | if (temp != NULL_RTX) |
4171 | { |
4172 | if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD |
4173 | || shift_cost (speed: optimize_insn_for_speed_p (), mode, bits: ushift) |
4174 | > COSTS_N_INSNS (1)) |
4175 | temp = expand_binop (mode, and_optab, temp, |
4176 | gen_int_mode (d - 1, mode), |
4177 | NULL_RTX, 0, OPTAB_LIB_WIDEN); |
4178 | else |
4179 | temp = expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, |
4180 | amount: ushift, NULL_RTX, unsignedp: 1); |
4181 | temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX, |
4182 | 0, OPTAB_LIB_WIDEN); |
4183 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4184 | } |
4185 | } |
4186 | |
4187 | label = gen_label_rtx (); |
4188 | temp = copy_to_mode_reg (mode, op0); |
4189 | do_cmp_and_jump (temp, const0_rtx, GE, mode, label); |
4190 | expand_inc (target: temp, inc: gen_int_mode (d - 1, mode)); |
4191 | emit_label (label); |
4192 | return expand_shift (code: RSHIFT_EXPR, mode, shifted: temp, amount: logd, NULL_RTX, unsignedp: 0); |
4193 | } |
4194 | |
4195 | /* Emit the code to divide OP0 by OP1, putting the result in TARGET |
4196 | if that is convenient, and returning where the result is. |
4197 | You may request either the quotient or the remainder as the result; |
4198 | specify REM_FLAG nonzero to get the remainder. |
4199 | |
4200 | CODE is the expression code for which kind of division this is; |
4201 | it controls how rounding is done. MODE is the machine mode to use. |
4202 | UNSIGNEDP nonzero means do unsigned division. */ |
4203 | |
4204 | /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI |
4205 | and then correct it by or'ing in missing high bits |
4206 | if result of ANDI is nonzero. |
4207 | For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result. |
4208 | This could optimize to a bfexts instruction. |
4209 | But C doesn't use these operations, so their optimizations are |
4210 | left for later. */ |
4211 | /* ??? For modulo, we don't actually need the highpart of the first product, |
4212 | the low part will do nicely. And for small divisors, the second multiply |
4213 | can also be a low-part only multiply or even be completely left out. |
4214 | E.g. to calculate the remainder of a division by 3 with a 32 bit |
4215 | multiply, multiply with 0x55555556 and extract the upper two bits; |
4216 | the result is exact for inputs up to 0x1fffffff. |
4217 | The input range can be reduced by using cross-sum rules. |
4218 | For odd divisors >= 3, the following table gives right shift counts |
4219 | so that if a number is shifted by an integer multiple of the given |
4220 | amount, the remainder stays the same: |
4221 | 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20, |
4222 | 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0, |
4223 | 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0, |
4224 | 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33, |
4225 | 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12 |
4226 | |
4227 | Cross-sum rules for even numbers can be derived by leaving as many bits |
4228 | to the right alone as the divisor has zeros to the right. |
4229 | E.g. if x is an unsigned 32 bit number: |
4230 | (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28 |
4231 | */ |
4232 | |
4233 | rtx |
4234 | expand_divmod (int rem_flag, enum tree_code code, machine_mode mode, |
4235 | rtx op0, rtx op1, rtx target, int unsignedp, |
4236 | enum optab_methods methods) |
4237 | { |
4238 | machine_mode compute_mode; |
4239 | rtx tquotient; |
4240 | rtx quotient = 0, remainder = 0; |
4241 | rtx_insn *last; |
4242 | rtx_insn *insn; |
4243 | optab optab1, optab2; |
4244 | int op1_is_constant, op1_is_pow2 = 0; |
4245 | int max_cost, ; |
4246 | static HOST_WIDE_INT last_div_const = 0; |
4247 | bool speed = optimize_insn_for_speed_p (); |
4248 | |
4249 | op1_is_constant = CONST_INT_P (op1); |
4250 | if (op1_is_constant) |
4251 | { |
4252 | wide_int ext_op1 = rtx_mode_t (op1, mode); |
4253 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 |
4254 | || (! unsignedp |
4255 | && wi::popcount (wi::neg (x: ext_op1)) == 1)); |
4256 | } |
4257 | |
4258 | /* |
4259 | This is the structure of expand_divmod: |
4260 | |
4261 | First comes code to fix up the operands so we can perform the operations |
4262 | correctly and efficiently. |
4263 | |
4264 | Second comes a switch statement with code specific for each rounding mode. |
4265 | For some special operands this code emits all RTL for the desired |
4266 | operation, for other cases, it generates only a quotient and stores it in |
4267 | QUOTIENT. The case for trunc division/remainder might leave quotient = 0, |
4268 | to indicate that it has not done anything. |
4269 | |
4270 | Last comes code that finishes the operation. If QUOTIENT is set and |
4271 | REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If |
4272 | QUOTIENT is not set, it is computed using trunc rounding. |
4273 | |
4274 | We try to generate special code for division and remainder when OP1 is a |
4275 | constant. If |OP1| = 2**n we can use shifts and some other fast |
4276 | operations. For other values of OP1, we compute a carefully selected |
4277 | fixed-point approximation m = 1/OP1, and generate code that multiplies OP0 |
4278 | by m. |
4279 | |
4280 | In all cases but EXACT_DIV_EXPR, this multiplication requires the upper |
4281 | half of the product. Different strategies for generating the product are |
4282 | implemented in expmed_mult_highpart. |
4283 | |
4284 | If what we actually want is the remainder, we generate that by another |
4285 | by-constant multiplication and a subtraction. */ |
4286 | |
4287 | /* We shouldn't be called with OP1 == const1_rtx, but some of the |
4288 | code below will malfunction if we are, so check here and handle |
4289 | the special case if so. */ |
4290 | if (op1 == const1_rtx) |
4291 | return rem_flag ? const0_rtx : op0; |
4292 | |
4293 | /* When dividing by -1, we could get an overflow. |
4294 | negv_optab can handle overflows. */ |
4295 | if (! unsignedp && op1 == constm1_rtx) |
4296 | { |
4297 | if (rem_flag) |
4298 | return const0_rtx; |
4299 | return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT |
4300 | ? negv_optab : neg_optab, op0, target, 0); |
4301 | } |
4302 | |
4303 | if (target |
4304 | /* Don't use the function value register as a target |
4305 | since we have to read it as well as write it, |
4306 | and function-inlining gets confused by this. */ |
4307 | && ((REG_P (target) && REG_FUNCTION_VALUE_P (target)) |
4308 | /* Don't clobber an operand while doing a multi-step calculation. */ |
4309 | || ((rem_flag || op1_is_constant) |
4310 | && (reg_mentioned_p (target, op0) |
4311 | || (MEM_P (op0) && MEM_P (target)))) |
4312 | || reg_mentioned_p (target, op1) |
4313 | || (MEM_P (op1) && MEM_P (target)))) |
4314 | target = 0; |
4315 | |
4316 | /* Get the mode in which to perform this computation. Normally it will |
4317 | be MODE, but sometimes we can't do the desired operation in MODE. |
4318 | If so, pick a wider mode in which we can do the operation. Convert |
4319 | to that mode at the start to avoid repeated conversions. |
4320 | |
4321 | First see what operations we need. These depend on the expression |
4322 | we are evaluating. (We assume that divxx3 insns exist under the |
4323 | same conditions that modxx3 insns and that these insns don't normally |
4324 | fail. If these assumptions are not correct, we may generate less |
4325 | efficient code in some cases.) |
4326 | |
4327 | Then see if we find a mode in which we can open-code that operation |
4328 | (either a division, modulus, or shift). Finally, check for the smallest |
4329 | mode for which we can do the operation with a library call. */ |
4330 | |
4331 | /* We might want to refine this now that we have division-by-constant |
4332 | optimization. Since expmed_mult_highpart tries so many variants, it is |
4333 | not straightforward to generalize this. Maybe we should make an array |
4334 | of possible modes in init_expmed? Save this for GCC 2.7. */ |
4335 | |
4336 | optab1 = (op1_is_pow2 |
4337 | ? (unsignedp ? lshr_optab : ashr_optab) |
4338 | : (unsignedp ? udiv_optab : sdiv_optab)); |
4339 | optab2 = (op1_is_pow2 ? optab1 |
4340 | : (unsignedp ? udivmod_optab : sdivmod_optab)); |
4341 | |
4342 | if (methods == OPTAB_WIDEN || methods == OPTAB_LIB_WIDEN) |
4343 | { |
4344 | FOR_EACH_MODE_FROM (compute_mode, mode) |
4345 | if (optab_handler (op: optab1, mode: compute_mode) != CODE_FOR_nothing |
4346 | || optab_handler (op: optab2, mode: compute_mode) != CODE_FOR_nothing) |
4347 | break; |
4348 | |
4349 | if (compute_mode == VOIDmode && methods == OPTAB_LIB_WIDEN) |
4350 | FOR_EACH_MODE_FROM (compute_mode, mode) |
4351 | if (optab_libfunc (optab1, compute_mode) |
4352 | || optab_libfunc (optab2, compute_mode)) |
4353 | break; |
4354 | } |
4355 | else |
4356 | compute_mode = mode; |
4357 | |
4358 | /* If we still couldn't find a mode, use MODE, but expand_binop will |
4359 | probably die. */ |
4360 | if (compute_mode == VOIDmode) |
4361 | compute_mode = mode; |
4362 | |
4363 | if (target && GET_MODE (target) == compute_mode) |
4364 | tquotient = target; |
4365 | else |
4366 | tquotient = gen_reg_rtx (compute_mode); |
4367 | |
4368 | #if 0 |
4369 | /* It should be possible to restrict the precision to GET_MODE_BITSIZE |
4370 | (mode), and thereby get better code when OP1 is a constant. Do that |
4371 | later. It will require going over all usages of SIZE below. */ |
4372 | size = GET_MODE_BITSIZE (mode); |
4373 | #endif |
4374 | |
4375 | /* Only deduct something for a REM if the last divide done was |
4376 | for a different constant. Then set the constant of the last |
4377 | divide. */ |
4378 | max_cost = (unsignedp |
4379 | ? udiv_cost (speed, mode: compute_mode) |
4380 | : sdiv_cost (speed, mode: compute_mode)); |
4381 | if (rem_flag && ! (last_div_const != 0 && op1_is_constant |
4382 | && INTVAL (op1) == last_div_const)) |
4383 | max_cost -= (mul_cost (speed, mode: compute_mode) |
4384 | + add_cost (speed, mode: compute_mode)); |
4385 | |
4386 | last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0; |
4387 | |
4388 | /* Now convert to the best mode to use. */ |
4389 | if (compute_mode != mode) |
4390 | { |
4391 | op0 = convert_modes (mode: compute_mode, oldmode: mode, x: op0, unsignedp); |
4392 | op1 = convert_modes (mode: compute_mode, oldmode: mode, x: op1, unsignedp); |
4393 | |
4394 | /* convert_modes may have placed op1 into a register, so we |
4395 | must recompute the following. */ |
4396 | op1_is_constant = CONST_INT_P (op1); |
4397 | if (op1_is_constant) |
4398 | { |
4399 | wide_int ext_op1 = rtx_mode_t (op1, compute_mode); |
4400 | op1_is_pow2 = (wi::popcount (ext_op1) == 1 |
4401 | || (! unsignedp |
4402 | && wi::popcount (wi::neg (x: ext_op1)) == 1)); |
4403 | } |
4404 | else |
4405 | op1_is_pow2 = 0; |
4406 | } |
4407 | |
4408 | /* If one of the operands is a volatile MEM, copy it into a register. */ |
4409 | |
4410 | if (MEM_P (op0) && MEM_VOLATILE_P (op0)) |
4411 | op0 = force_reg (compute_mode, op0); |
4412 | if (MEM_P (op1) && MEM_VOLATILE_P (op1)) |
4413 | op1 = force_reg (compute_mode, op1); |
4414 | |
4415 | /* If we need the remainder or if OP1 is constant, we need to |
4416 | put OP0 in a register in case it has any queued subexpressions. */ |
4417 | if (rem_flag || op1_is_constant) |
4418 | op0 = force_reg (compute_mode, op0); |
4419 | |
4420 | last = get_last_insn (); |
4421 | |
4422 | /* Promote floor rounding to trunc rounding for unsigned operations. */ |
4423 | if (unsignedp) |
4424 | { |
4425 | if (code == FLOOR_DIV_EXPR) |
4426 | code = TRUNC_DIV_EXPR; |
4427 | if (code == FLOOR_MOD_EXPR) |
4428 | code = TRUNC_MOD_EXPR; |
4429 | if (code == EXACT_DIV_EXPR && op1_is_pow2) |
4430 | code = TRUNC_DIV_EXPR; |
4431 | } |
4432 | |
4433 | if (op1 != const0_rtx) |
4434 | switch (code) |
4435 | { |
4436 | case TRUNC_MOD_EXPR: |
4437 | case TRUNC_DIV_EXPR: |
4438 | if (op1_is_constant) |
4439 | { |
4440 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
4441 | int size = GET_MODE_BITSIZE (mode: int_mode); |
4442 | if (unsignedp) |
4443 | { |
4444 | unsigned HOST_WIDE_INT mh, ml; |
4445 | int pre_shift, post_shift; |
4446 | int dummy; |
4447 | wide_int wd = rtx_mode_t (op1, int_mode); |
4448 | unsigned HOST_WIDE_INT d = wd.to_uhwi (); |
4449 | |
4450 | if (wi::popcount (wd) == 1) |
4451 | { |
4452 | pre_shift = floor_log2 (x: d); |
4453 | if (rem_flag) |
4454 | { |
4455 | unsigned HOST_WIDE_INT mask |
4456 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
4457 | remainder |
4458 | = expand_binop (int_mode, and_optab, op0, |
4459 | gen_int_mode (mask, int_mode), |
4460 | remainder, 1, methods); |
4461 | if (remainder) |
4462 | return gen_lowpart (mode, remainder); |
4463 | } |
4464 | quotient = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4465 | amount: pre_shift, target: tquotient, unsignedp: 1); |
4466 | } |
4467 | else if (size <= HOST_BITS_PER_WIDE_INT) |
4468 | { |
4469 | if (d >= (HOST_WIDE_INT_1U << (size - 1))) |
4470 | { |
4471 | /* Most significant bit of divisor is set; emit an scc |
4472 | insn. */ |
4473 | quotient = emit_store_flag_force (tquotient, GEU, op0, op1, |
4474 | int_mode, 1, 1); |
4475 | } |
4476 | else |
4477 | { |
4478 | /* Find a suitable multiplier and right shift count |
4479 | instead of multiplying with D. */ |
4480 | |
4481 | mh = choose_multiplier (d, n: size, precision: size, |
4482 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &dummy); |
4483 | |
4484 | /* If the suggested multiplier is more than SIZE bits, |
4485 | we can do better for even divisors, using an |
4486 | initial right shift. */ |
4487 | if (mh != 0 && (d & 1) == 0) |
4488 | { |
4489 | pre_shift = ctz_or_zero (x: d); |
4490 | mh = choose_multiplier (d: d >> pre_shift, n: size, |
4491 | precision: size - pre_shift, |
4492 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &dummy); |
4493 | gcc_assert (!mh); |
4494 | } |
4495 | else |
4496 | pre_shift = 0; |
4497 | |
4498 | if (mh != 0) |
4499 | { |
4500 | rtx t1, t2, t3, t4; |
4501 | |
4502 | if (post_shift - 1 >= BITS_PER_WORD) |
4503 | goto fail1; |
4504 | |
4505 | extra_cost |
4506 | = (shift_cost (speed, mode: int_mode, bits: post_shift - 1) |
4507 | + shift_cost (speed, mode: int_mode, bits: 1) |
4508 | + 2 * add_cost (speed, mode: int_mode)); |
4509 | t1 = expmed_mult_highpart |
4510 | (mode: int_mode, op0, op1: gen_int_mode (ml, int_mode), |
4511 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4512 | if (t1 == 0) |
4513 | goto fail1; |
4514 | t2 = force_operand (gen_rtx_MINUS (int_mode, |
4515 | op0, t1), |
4516 | NULL_RTX); |
4517 | t3 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, |
4518 | shifted: t2, amount: 1, NULL_RTX, unsignedp: 1); |
4519 | t4 = force_operand (gen_rtx_PLUS (int_mode, |
4520 | t1, t3), |
4521 | NULL_RTX); |
4522 | quotient = expand_shift |
4523 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t4, |
4524 | amount: post_shift - 1, target: tquotient, unsignedp: 1); |
4525 | } |
4526 | else |
4527 | { |
4528 | rtx t1, t2; |
4529 | |
4530 | if (pre_shift >= BITS_PER_WORD |
4531 | || post_shift >= BITS_PER_WORD) |
4532 | goto fail1; |
4533 | |
4534 | t1 = expand_shift |
4535 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4536 | amount: pre_shift, NULL_RTX, unsignedp: 1); |
4537 | extra_cost |
4538 | = (shift_cost (speed, mode: int_mode, bits: pre_shift) |
4539 | + shift_cost (speed, mode: int_mode, bits: post_shift)); |
4540 | t2 = expmed_mult_highpart |
4541 | (mode: int_mode, op0: t1, |
4542 | op1: gen_int_mode (ml, int_mode), |
4543 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4544 | if (t2 == 0) |
4545 | goto fail1; |
4546 | quotient = expand_shift |
4547 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4548 | amount: post_shift, target: tquotient, unsignedp: 1); |
4549 | } |
4550 | } |
4551 | } |
4552 | else /* Too wide mode to use tricky code */ |
4553 | break; |
4554 | |
4555 | insn = get_last_insn (); |
4556 | if (insn != last) |
4557 | set_dst_reg_note (insn, REG_EQUAL, |
4558 | gen_rtx_UDIV (int_mode, op0, op1), |
4559 | quotient); |
4560 | } |
4561 | else /* TRUNC_DIV, signed */ |
4562 | { |
4563 | unsigned HOST_WIDE_INT ml; |
4564 | int lgup, post_shift; |
4565 | rtx mlr; |
4566 | HOST_WIDE_INT d = INTVAL (op1); |
4567 | unsigned HOST_WIDE_INT abs_d; |
4568 | |
4569 | /* Not prepared to handle division/remainder by |
4570 | 0xffffffffffffffff8000000000000000 etc. */ |
4571 | if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT) |
4572 | break; |
4573 | |
4574 | /* Since d might be INT_MIN, we have to cast to |
4575 | unsigned HOST_WIDE_INT before negating to avoid |
4576 | undefined signed overflow. */ |
4577 | abs_d = (d >= 0 |
4578 | ? (unsigned HOST_WIDE_INT) d |
4579 | : - (unsigned HOST_WIDE_INT) d); |
4580 | |
4581 | /* n rem d = n rem -d */ |
4582 | if (rem_flag && d < 0) |
4583 | { |
4584 | d = abs_d; |
4585 | op1 = gen_int_mode (abs_d, int_mode); |
4586 | } |
4587 | |
4588 | if (d == 1) |
4589 | quotient = op0; |
4590 | else if (d == -1) |
4591 | quotient = expand_unop (int_mode, neg_optab, op0, |
4592 | tquotient, 0); |
4593 | else if (size <= HOST_BITS_PER_WIDE_INT |
4594 | && abs_d == HOST_WIDE_INT_1U << (size - 1)) |
4595 | { |
4596 | /* This case is not handled correctly below. */ |
4597 | quotient = emit_store_flag (tquotient, EQ, op0, op1, |
4598 | int_mode, 1, 1); |
4599 | if (quotient == 0) |
4600 | goto fail1; |
4601 | } |
4602 | else if (EXACT_POWER_OF_2_OR_ZERO_P (d) |
4603 | && (size <= HOST_BITS_PER_WIDE_INT || d >= 0) |
4604 | && (rem_flag |
4605 | ? smod_pow2_cheap (speed, mode: int_mode) |
4606 | : sdiv_pow2_cheap (speed, mode: int_mode)) |
4607 | /* We assume that cheap metric is true if the |
4608 | optab has an expander for this mode. */ |
4609 | && ((optab_handler (op: (rem_flag ? smod_optab |
4610 | : sdiv_optab), |
4611 | mode: int_mode) |
4612 | != CODE_FOR_nothing) |
4613 | || (optab_handler (op: sdivmod_optab, mode: int_mode) |
4614 | != CODE_FOR_nothing))) |
4615 | ; |
4616 | else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)) |
4617 | { |
4618 | if (rem_flag) |
4619 | { |
4620 | remainder = expand_smod_pow2 (mode: int_mode, op0, d); |
4621 | if (remainder) |
4622 | return gen_lowpart (mode, remainder); |
4623 | } |
4624 | |
4625 | if (sdiv_pow2_cheap (speed, mode: int_mode) |
4626 | && ((optab_handler (op: sdiv_optab, mode: int_mode) |
4627 | != CODE_FOR_nothing) |
4628 | || (optab_handler (op: sdivmod_optab, mode: int_mode) |
4629 | != CODE_FOR_nothing))) |
4630 | quotient = expand_divmod (rem_flag: 0, code: TRUNC_DIV_EXPR, |
4631 | mode: int_mode, op0, |
4632 | op1: gen_int_mode (abs_d, |
4633 | int_mode), |
4634 | NULL_RTX, unsignedp: 0); |
4635 | else |
4636 | quotient = expand_sdiv_pow2 (mode: int_mode, op0, d: abs_d); |
4637 | |
4638 | /* We have computed OP0 / abs(OP1). If OP1 is negative, |
4639 | negate the quotient. */ |
4640 | if (d < 0) |
4641 | { |
4642 | insn = get_last_insn (); |
4643 | if (insn != last |
4644 | && abs_d < (HOST_WIDE_INT_1U |
4645 | << (HOST_BITS_PER_WIDE_INT - 1))) |
4646 | set_dst_reg_note (insn, REG_EQUAL, |
4647 | gen_rtx_DIV (int_mode, op0, |
4648 | gen_int_mode |
4649 | (abs_d, |
4650 | int_mode)), |
4651 | quotient); |
4652 | |
4653 | quotient = expand_unop (int_mode, neg_optab, |
4654 | quotient, quotient, 0); |
4655 | } |
4656 | } |
4657 | else if (size <= HOST_BITS_PER_WIDE_INT) |
4658 | { |
4659 | choose_multiplier (d: abs_d, n: size, precision: size - 1, |
4660 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &lgup); |
4661 | if (ml < HOST_WIDE_INT_1U << (size - 1)) |
4662 | { |
4663 | rtx t1, t2, t3; |
4664 | |
4665 | if (post_shift >= BITS_PER_WORD |
4666 | || size - 1 >= BITS_PER_WORD) |
4667 | goto fail1; |
4668 | |
4669 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4670 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4671 | + add_cost (speed, mode: int_mode)); |
4672 | t1 = expmed_mult_highpart |
4673 | (mode: int_mode, op0, op1: gen_int_mode (ml, int_mode), |
4674 | NULL_RTX, unsignedp: 0, max_cost: max_cost - extra_cost); |
4675 | if (t1 == 0) |
4676 | goto fail1; |
4677 | t2 = expand_shift |
4678 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t1, |
4679 | amount: post_shift, NULL_RTX, unsignedp: 0); |
4680 | t3 = expand_shift |
4681 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4682 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4683 | if (d < 0) |
4684 | quotient |
4685 | = force_operand (gen_rtx_MINUS (int_mode, t3, t2), |
4686 | tquotient); |
4687 | else |
4688 | quotient |
4689 | = force_operand (gen_rtx_MINUS (int_mode, t2, t3), |
4690 | tquotient); |
4691 | } |
4692 | else |
4693 | { |
4694 | rtx t1, t2, t3, t4; |
4695 | |
4696 | if (post_shift >= BITS_PER_WORD |
4697 | || size - 1 >= BITS_PER_WORD) |
4698 | goto fail1; |
4699 | |
4700 | ml |= HOST_WIDE_INT_M1U << (size - 1); |
4701 | mlr = gen_int_mode (ml, int_mode); |
4702 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4703 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4704 | + 2 * add_cost (speed, mode: int_mode)); |
4705 | t1 = expmed_mult_highpart (mode: int_mode, op0, op1: mlr, |
4706 | NULL_RTX, unsignedp: 0, |
4707 | max_cost: max_cost - extra_cost); |
4708 | if (t1 == 0) |
4709 | goto fail1; |
4710 | t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0), |
4711 | NULL_RTX); |
4712 | t3 = expand_shift |
4713 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4714 | amount: post_shift, NULL_RTX, unsignedp: 0); |
4715 | t4 = expand_shift |
4716 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4717 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4718 | if (d < 0) |
4719 | quotient |
4720 | = force_operand (gen_rtx_MINUS (int_mode, t4, t3), |
4721 | tquotient); |
4722 | else |
4723 | quotient |
4724 | = force_operand (gen_rtx_MINUS (int_mode, t3, t4), |
4725 | tquotient); |
4726 | } |
4727 | } |
4728 | else /* Too wide mode to use tricky code */ |
4729 | break; |
4730 | |
4731 | insn = get_last_insn (); |
4732 | if (insn != last) |
4733 | set_dst_reg_note (insn, REG_EQUAL, |
4734 | gen_rtx_DIV (int_mode, op0, op1), |
4735 | quotient); |
4736 | } |
4737 | break; |
4738 | } |
4739 | fail1: |
4740 | delete_insns_since (last); |
4741 | break; |
4742 | |
4743 | case FLOOR_DIV_EXPR: |
4744 | case FLOOR_MOD_EXPR: |
4745 | /* We will come here only for signed operations. */ |
4746 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (mode: compute_mode)) |
4747 | { |
4748 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
4749 | int size = GET_MODE_BITSIZE (mode: int_mode); |
4750 | unsigned HOST_WIDE_INT mh, ml; |
4751 | int pre_shift, lgup, post_shift; |
4752 | HOST_WIDE_INT d = INTVAL (op1); |
4753 | |
4754 | if (d > 0) |
4755 | { |
4756 | /* We could just as easily deal with negative constants here, |
4757 | but it does not seem worth the trouble for GCC 2.6. */ |
4758 | if (EXACT_POWER_OF_2_OR_ZERO_P (d)) |
4759 | { |
4760 | pre_shift = floor_log2 (x: d); |
4761 | if (rem_flag) |
4762 | { |
4763 | unsigned HOST_WIDE_INT mask |
4764 | = (HOST_WIDE_INT_1U << pre_shift) - 1; |
4765 | remainder = expand_binop |
4766 | (int_mode, and_optab, op0, |
4767 | gen_int_mode (mask, int_mode), |
4768 | remainder, 0, methods); |
4769 | if (remainder) |
4770 | return gen_lowpart (mode, remainder); |
4771 | } |
4772 | quotient = expand_shift |
4773 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4774 | amount: pre_shift, target: tquotient, unsignedp: 0); |
4775 | } |
4776 | else |
4777 | { |
4778 | rtx t1, t2, t3, t4; |
4779 | |
4780 | mh = choose_multiplier (d, n: size, precision: size - 1, |
4781 | multiplier_ptr: &ml, post_shift_ptr: &post_shift, lgup_ptr: &lgup); |
4782 | gcc_assert (!mh); |
4783 | |
4784 | if (post_shift < BITS_PER_WORD |
4785 | && size - 1 < BITS_PER_WORD) |
4786 | { |
4787 | t1 = expand_shift |
4788 | (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4789 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4790 | t2 = expand_binop (int_mode, xor_optab, op0, t1, |
4791 | NULL_RTX, 0, OPTAB_WIDEN); |
4792 | extra_cost = (shift_cost (speed, mode: int_mode, bits: post_shift) |
4793 | + shift_cost (speed, mode: int_mode, bits: size - 1) |
4794 | + 2 * add_cost (speed, mode: int_mode)); |
4795 | t3 = expmed_mult_highpart |
4796 | (mode: int_mode, op0: t2, op1: gen_int_mode (ml, int_mode), |
4797 | NULL_RTX, unsignedp: 1, max_cost: max_cost - extra_cost); |
4798 | if (t3 != 0) |
4799 | { |
4800 | t4 = expand_shift |
4801 | (code: RSHIFT_EXPR, mode: int_mode, shifted: t3, |
4802 | amount: post_shift, NULL_RTX, unsignedp: 1); |
4803 | quotient = expand_binop (int_mode, xor_optab, |
4804 | t4, t1, tquotient, 0, |
4805 | OPTAB_WIDEN); |
4806 | } |
4807 | } |
4808 | } |
4809 | } |
4810 | else |
4811 | { |
4812 | rtx nsign, t1, t2, t3, t4; |
4813 | t1 = force_operand (gen_rtx_PLUS (int_mode, |
4814 | op0, constm1_rtx), NULL_RTX); |
4815 | t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX, |
4816 | 0, OPTAB_WIDEN); |
4817 | nsign = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: t2, |
4818 | amount: size - 1, NULL_RTX, unsignedp: 0); |
4819 | t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign), |
4820 | NULL_RTX); |
4821 | t4 = expand_divmod (rem_flag: 0, code: TRUNC_DIV_EXPR, mode: int_mode, op0: t3, op1, |
4822 | NULL_RTX, unsignedp: 0); |
4823 | if (t4) |
4824 | { |
4825 | rtx t5; |
4826 | t5 = expand_unop (int_mode, one_cmpl_optab, nsign, |
4827 | NULL_RTX, 0); |
4828 | quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5), |
4829 | tquotient); |
4830 | } |
4831 | } |
4832 | } |
4833 | |
4834 | if (quotient != 0) |
4835 | break; |
4836 | delete_insns_since (last); |
4837 | |
4838 | /* Try using an instruction that produces both the quotient and |
4839 | remainder, using truncation. We can easily compensate the quotient |
4840 | or remainder to get floor rounding, once we have the remainder. |
4841 | Notice that we compute also the final remainder value here, |
4842 | and return the result right away. */ |
4843 | if (target == 0 || GET_MODE (target) != compute_mode) |
4844 | target = gen_reg_rtx (compute_mode); |
4845 | |
4846 | if (rem_flag) |
4847 | { |
4848 | remainder |
4849 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
4850 | quotient = gen_reg_rtx (compute_mode); |
4851 | } |
4852 | else |
4853 | { |
4854 | quotient |
4855 | = REG_P (target) ? target : gen_reg_rtx (compute_mode); |
4856 | remainder = gen_reg_rtx (compute_mode); |
4857 | } |
4858 | |
4859 | if (expand_twoval_binop (sdivmod_optab, op0, op1, |
4860 | quotient, remainder, 0)) |
4861 | { |
4862 | /* This could be computed with a branch-less sequence. |
4863 | Save that for later. */ |
4864 | rtx tem; |
4865 | rtx_code_label *label = gen_label_rtx (); |
4866 | do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label); |
4867 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
4868 | NULL_RTX, 0, OPTAB_WIDEN); |
4869 | do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label); |
4870 | expand_dec (target: quotient, const1_rtx); |
4871 | expand_inc (target: remainder, inc: op1); |
4872 | emit_label (label); |
4873 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
4874 | } |
4875 | |
4876 | /* No luck with division elimination or divmod. Have to do it |
4877 | by conditionally adjusting op0 *and* the result. */ |
4878 | { |
4879 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
4880 | rtx adjusted_op0; |
4881 | rtx tem; |
4882 | |
4883 | quotient = gen_reg_rtx (compute_mode); |
4884 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
4885 | label1 = gen_label_rtx (); |
4886 | label2 = gen_label_rtx (); |
4887 | label3 = gen_label_rtx (); |
4888 | label4 = gen_label_rtx (); |
4889 | label5 = gen_label_rtx (); |
4890 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
4891 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1); |
4892 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4893 | quotient, 0, methods); |
4894 | if (tem != quotient) |
4895 | emit_move_insn (quotient, tem); |
4896 | emit_jump_insn (targetm.gen_jump (label5)); |
4897 | emit_barrier (); |
4898 | emit_label (label1); |
4899 | expand_inc (target: adjusted_op0, const1_rtx); |
4900 | emit_jump_insn (targetm.gen_jump (label4)); |
4901 | emit_barrier (); |
4902 | emit_label (label2); |
4903 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3); |
4904 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4905 | quotient, 0, methods); |
4906 | if (tem != quotient) |
4907 | emit_move_insn (quotient, tem); |
4908 | emit_jump_insn (targetm.gen_jump (label5)); |
4909 | emit_barrier (); |
4910 | emit_label (label3); |
4911 | expand_dec (target: adjusted_op0, const1_rtx); |
4912 | emit_label (label4); |
4913 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
4914 | quotient, 0, methods); |
4915 | if (tem != quotient) |
4916 | emit_move_insn (quotient, tem); |
4917 | expand_dec (target: quotient, const1_rtx); |
4918 | emit_label (label5); |
4919 | } |
4920 | break; |
4921 | |
4922 | case CEIL_DIV_EXPR: |
4923 | case CEIL_MOD_EXPR: |
4924 | if (unsignedp) |
4925 | { |
4926 | if (op1_is_constant |
4927 | && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
4928 | && (HWI_COMPUTABLE_MODE_P (mode: compute_mode) |
4929 | || INTVAL (op1) >= 0)) |
4930 | { |
4931 | scalar_int_mode int_mode |
4932 | = as_a <scalar_int_mode> (m: compute_mode); |
4933 | rtx t1, t2, t3; |
4934 | unsigned HOST_WIDE_INT d = INTVAL (op1); |
4935 | t1 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
4936 | amount: floor_log2 (x: d), target: tquotient, unsignedp: 1); |
4937 | t2 = expand_binop (int_mode, and_optab, op0, |
4938 | gen_int_mode (d - 1, int_mode), |
4939 | NULL_RTX, 1, methods); |
4940 | t3 = gen_reg_rtx (int_mode); |
4941 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1); |
4942 | if (t3 == 0) |
4943 | { |
4944 | rtx_code_label *lab; |
4945 | lab = gen_label_rtx (); |
4946 | do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab); |
4947 | expand_inc (target: t1, const1_rtx); |
4948 | emit_label (lab); |
4949 | quotient = t1; |
4950 | } |
4951 | else |
4952 | quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3), |
4953 | tquotient); |
4954 | break; |
4955 | } |
4956 | |
4957 | /* Try using an instruction that produces both the quotient and |
4958 | remainder, using truncation. We can easily compensate the |
4959 | quotient or remainder to get ceiling rounding, once we have the |
4960 | remainder. Notice that we compute also the final remainder |
4961 | value here, and return the result right away. */ |
4962 | if (target == 0 || GET_MODE (target) != compute_mode) |
4963 | target = gen_reg_rtx (compute_mode); |
4964 | |
4965 | if (rem_flag) |
4966 | { |
4967 | remainder = (REG_P (target) |
4968 | ? target : gen_reg_rtx (compute_mode)); |
4969 | quotient = gen_reg_rtx (compute_mode); |
4970 | } |
4971 | else |
4972 | { |
4973 | quotient = (REG_P (target) |
4974 | ? target : gen_reg_rtx (compute_mode)); |
4975 | remainder = gen_reg_rtx (compute_mode); |
4976 | } |
4977 | |
4978 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, |
4979 | remainder, 1)) |
4980 | { |
4981 | /* This could be computed with a branch-less sequence. |
4982 | Save that for later. */ |
4983 | rtx_code_label *label = gen_label_rtx (); |
4984 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
4985 | compute_mode, label); |
4986 | expand_inc (target: quotient, const1_rtx); |
4987 | expand_dec (target: remainder, dec: op1); |
4988 | emit_label (label); |
4989 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
4990 | } |
4991 | |
4992 | /* No luck with division elimination or divmod. Have to do it |
4993 | by conditionally adjusting op0 *and* the result. */ |
4994 | { |
4995 | rtx_code_label *label1, *label2; |
4996 | rtx adjusted_op0, tem; |
4997 | |
4998 | quotient = gen_reg_rtx (compute_mode); |
4999 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
5000 | label1 = gen_label_rtx (); |
5001 | label2 = gen_label_rtx (); |
5002 | do_cmp_and_jump (adjusted_op0, const0_rtx, NE, |
5003 | compute_mode, label1); |
5004 | emit_move_insn (quotient, const0_rtx); |
5005 | emit_jump_insn (targetm.gen_jump (label2)); |
5006 | emit_barrier (); |
5007 | emit_label (label1); |
5008 | expand_dec (target: adjusted_op0, const1_rtx); |
5009 | tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1, |
5010 | quotient, 1, methods); |
5011 | if (tem != quotient) |
5012 | emit_move_insn (quotient, tem); |
5013 | expand_inc (target: quotient, const1_rtx); |
5014 | emit_label (label2); |
5015 | } |
5016 | } |
5017 | else /* signed */ |
5018 | { |
5019 | if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) |
5020 | && INTVAL (op1) >= 0) |
5021 | { |
5022 | /* This is extremely similar to the code for the unsigned case |
5023 | above. For 2.7 we should merge these variants, but for |
5024 | 2.6.1 I don't want to touch the code for unsigned since that |
5025 | get used in C. The signed case will only be used by other |
5026 | languages (Ada). */ |
5027 | |
5028 | rtx t1, t2, t3; |
5029 | unsigned HOST_WIDE_INT d = INTVAL (op1); |
5030 | t1 = expand_shift (code: RSHIFT_EXPR, mode: compute_mode, shifted: op0, |
5031 | amount: floor_log2 (x: d), target: tquotient, unsignedp: 0); |
5032 | t2 = expand_binop (compute_mode, and_optab, op0, |
5033 | gen_int_mode (d - 1, compute_mode), |
5034 | NULL_RTX, 1, methods); |
5035 | t3 = gen_reg_rtx (compute_mode); |
5036 | t3 = emit_store_flag (t3, NE, t2, const0_rtx, |
5037 | compute_mode, 1, 1); |
5038 | if (t3 == 0) |
5039 | { |
5040 | rtx_code_label *lab; |
5041 | lab = gen_label_rtx (); |
5042 | do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab); |
5043 | expand_inc (target: t1, const1_rtx); |
5044 | emit_label (lab); |
5045 | quotient = t1; |
5046 | } |
5047 | else |
5048 | quotient = force_operand (gen_rtx_PLUS (compute_mode, |
5049 | t1, t3), |
5050 | tquotient); |
5051 | break; |
5052 | } |
5053 | |
5054 | /* Try using an instruction that produces both the quotient and |
5055 | remainder, using truncation. We can easily compensate the |
5056 | quotient or remainder to get ceiling rounding, once we have the |
5057 | remainder. Notice that we compute also the final remainder |
5058 | value here, and return the result right away. */ |
5059 | if (target == 0 || GET_MODE (target) != compute_mode) |
5060 | target = gen_reg_rtx (compute_mode); |
5061 | if (rem_flag) |
5062 | { |
5063 | remainder= (REG_P (target) |
5064 | ? target : gen_reg_rtx (compute_mode)); |
5065 | quotient = gen_reg_rtx (compute_mode); |
5066 | } |
5067 | else |
5068 | { |
5069 | quotient = (REG_P (target) |
5070 | ? target : gen_reg_rtx (compute_mode)); |
5071 | remainder = gen_reg_rtx (compute_mode); |
5072 | } |
5073 | |
5074 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, |
5075 | remainder, 0)) |
5076 | { |
5077 | /* This could be computed with a branch-less sequence. |
5078 | Save that for later. */ |
5079 | rtx tem; |
5080 | rtx_code_label *label = gen_label_rtx (); |
5081 | do_cmp_and_jump (remainder, const0_rtx, EQ, |
5082 | compute_mode, label); |
5083 | tem = expand_binop (compute_mode, xor_optab, op0, op1, |
5084 | NULL_RTX, 0, OPTAB_WIDEN); |
5085 | do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label); |
5086 | expand_inc (target: quotient, const1_rtx); |
5087 | expand_dec (target: remainder, dec: op1); |
5088 | emit_label (label); |
5089 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5090 | } |
5091 | |
5092 | /* No luck with division elimination or divmod. Have to do it |
5093 | by conditionally adjusting op0 *and* the result. */ |
5094 | { |
5095 | rtx_code_label *label1, *label2, *label3, *label4, *label5; |
5096 | rtx adjusted_op0; |
5097 | rtx tem; |
5098 | |
5099 | quotient = gen_reg_rtx (compute_mode); |
5100 | adjusted_op0 = copy_to_mode_reg (compute_mode, op0); |
5101 | label1 = gen_label_rtx (); |
5102 | label2 = gen_label_rtx (); |
5103 | label3 = gen_label_rtx (); |
5104 | label4 = gen_label_rtx (); |
5105 | label5 = gen_label_rtx (); |
5106 | do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2); |
5107 | do_cmp_and_jump (adjusted_op0, const0_rtx, GT, |
5108 | compute_mode, label1); |
5109 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5110 | quotient, 0, methods); |
5111 | if (tem != quotient) |
5112 | emit_move_insn (quotient, tem); |
5113 | emit_jump_insn (targetm.gen_jump (label5)); |
5114 | emit_barrier (); |
5115 | emit_label (label1); |
5116 | expand_dec (target: adjusted_op0, const1_rtx); |
5117 | emit_jump_insn (targetm.gen_jump (label4)); |
5118 | emit_barrier (); |
5119 | emit_label (label2); |
5120 | do_cmp_and_jump (adjusted_op0, const0_rtx, LT, |
5121 | compute_mode, label3); |
5122 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5123 | quotient, 0, methods); |
5124 | if (tem != quotient) |
5125 | emit_move_insn (quotient, tem); |
5126 | emit_jump_insn (targetm.gen_jump (label5)); |
5127 | emit_barrier (); |
5128 | emit_label (label3); |
5129 | expand_inc (target: adjusted_op0, const1_rtx); |
5130 | emit_label (label4); |
5131 | tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1, |
5132 | quotient, 0, methods); |
5133 | if (tem != quotient) |
5134 | emit_move_insn (quotient, tem); |
5135 | expand_inc (target: quotient, const1_rtx); |
5136 | emit_label (label5); |
5137 | } |
5138 | } |
5139 | break; |
5140 | |
5141 | case EXACT_DIV_EXPR: |
5142 | if (op1_is_constant && HWI_COMPUTABLE_MODE_P (mode: compute_mode)) |
5143 | { |
5144 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5145 | int size = GET_MODE_BITSIZE (mode: int_mode); |
5146 | HOST_WIDE_INT d = INTVAL (op1); |
5147 | unsigned HOST_WIDE_INT ml; |
5148 | int pre_shift; |
5149 | rtx t1; |
5150 | |
5151 | pre_shift = ctz_or_zero (x: d); |
5152 | ml = invert_mod2n (x: d >> pre_shift, n: size); |
5153 | t1 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
5154 | amount: pre_shift, NULL_RTX, unsignedp); |
5155 | quotient = expand_mult (mode: int_mode, op0: t1, op1: gen_int_mode (ml, int_mode), |
5156 | NULL_RTX, unsignedp: 1); |
5157 | |
5158 | insn = get_last_insn (); |
5159 | set_dst_reg_note (insn, REG_EQUAL, |
5160 | gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, |
5161 | int_mode, op0, op1), |
5162 | quotient); |
5163 | } |
5164 | break; |
5165 | |
5166 | case ROUND_DIV_EXPR: |
5167 | case ROUND_MOD_EXPR: |
5168 | if (unsignedp) |
5169 | { |
5170 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5171 | rtx tem; |
5172 | rtx_code_label *label; |
5173 | label = gen_label_rtx (); |
5174 | quotient = gen_reg_rtx (int_mode); |
5175 | remainder = gen_reg_rtx (int_mode); |
5176 | if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0) |
5177 | { |
5178 | rtx tem; |
5179 | quotient = expand_binop (int_mode, udiv_optab, op0, op1, |
5180 | quotient, 1, methods); |
5181 | tem = expand_mult (mode: int_mode, op0: quotient, op1, NULL_RTX, unsignedp: 1); |
5182 | remainder = expand_binop (int_mode, sub_optab, op0, tem, |
5183 | remainder, 1, methods); |
5184 | } |
5185 | tem = plus_constant (int_mode, op1, -1); |
5186 | tem = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: tem, amount: 1, NULL_RTX, unsignedp: 1); |
5187 | do_cmp_and_jump (remainder, tem, LEU, int_mode, label); |
5188 | expand_inc (target: quotient, const1_rtx); |
5189 | expand_dec (target: remainder, dec: op1); |
5190 | emit_label (label); |
5191 | } |
5192 | else |
5193 | { |
5194 | scalar_int_mode int_mode = as_a <scalar_int_mode> (m: compute_mode); |
5195 | int size = GET_MODE_BITSIZE (mode: int_mode); |
5196 | rtx abs_rem, abs_op1, tem, mask; |
5197 | rtx_code_label *label; |
5198 | label = gen_label_rtx (); |
5199 | quotient = gen_reg_rtx (int_mode); |
5200 | remainder = gen_reg_rtx (int_mode); |
5201 | if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0) |
5202 | { |
5203 | rtx tem; |
5204 | quotient = expand_binop (int_mode, sdiv_optab, op0, op1, |
5205 | quotient, 0, methods); |
5206 | tem = expand_mult (mode: int_mode, op0: quotient, op1, NULL_RTX, unsignedp: 0); |
5207 | remainder = expand_binop (int_mode, sub_optab, op0, tem, |
5208 | remainder, 0, methods); |
5209 | } |
5210 | abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0); |
5211 | abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0); |
5212 | tem = expand_shift (code: LSHIFT_EXPR, mode: int_mode, shifted: abs_rem, |
5213 | amount: 1, NULL_RTX, unsignedp: 1); |
5214 | do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label); |
5215 | tem = expand_binop (int_mode, xor_optab, op0, op1, |
5216 | NULL_RTX, 0, OPTAB_WIDEN); |
5217 | mask = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: tem, |
5218 | amount: size - 1, NULL_RTX, unsignedp: 0); |
5219 | tem = expand_binop (int_mode, xor_optab, mask, const1_rtx, |
5220 | NULL_RTX, 0, OPTAB_WIDEN); |
5221 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
5222 | NULL_RTX, 0, OPTAB_WIDEN); |
5223 | expand_inc (target: quotient, inc: tem); |
5224 | tem = expand_binop (int_mode, xor_optab, mask, op1, |
5225 | NULL_RTX, 0, OPTAB_WIDEN); |
5226 | tem = expand_binop (int_mode, sub_optab, tem, mask, |
5227 | NULL_RTX, 0, OPTAB_WIDEN); |
5228 | expand_dec (target: remainder, dec: tem); |
5229 | emit_label (label); |
5230 | } |
5231 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5232 | |
5233 | default: |
5234 | gcc_unreachable (); |
5235 | } |
5236 | |
5237 | if (quotient == 0) |
5238 | { |
5239 | if (target && GET_MODE (target) != compute_mode) |
5240 | target = 0; |
5241 | |
5242 | if (rem_flag) |
5243 | { |
5244 | /* Try to produce the remainder without producing the quotient. |
5245 | If we seem to have a divmod pattern that does not require widening, |
5246 | don't try widening here. We should really have a WIDEN argument |
5247 | to expand_twoval_binop, since what we'd really like to do here is |
5248 | 1) try a mod insn in compute_mode |
5249 | 2) try a divmod insn in compute_mode |
5250 | 3) try a div insn in compute_mode and multiply-subtract to get |
5251 | remainder |
5252 | 4) try the same things with widening allowed. */ |
5253 | remainder |
5254 | = sign_expand_binop (compute_mode, umod_optab, smod_optab, |
5255 | op0, op1, target, |
5256 | unsignedp, |
5257 | ((optab_handler (op: optab2, mode: compute_mode) |
5258 | != CODE_FOR_nothing) |
5259 | ? OPTAB_DIRECT : OPTAB_WIDEN)); |
5260 | if (remainder == 0) |
5261 | { |
5262 | /* No luck there. Can we do remainder and divide at once |
5263 | without a library call? */ |
5264 | remainder = gen_reg_rtx (compute_mode); |
5265 | if (! expand_twoval_binop ((unsignedp |
5266 | ? udivmod_optab |
5267 | : sdivmod_optab), |
5268 | op0, op1, |
5269 | NULL_RTX, remainder, unsignedp)) |
5270 | remainder = 0; |
5271 | } |
5272 | |
5273 | if (remainder) |
5274 | return gen_lowpart (mode, remainder); |
5275 | } |
5276 | |
5277 | /* Produce the quotient. Try a quotient insn, but not a library call. |
5278 | If we have a divmod in this mode, use it in preference to widening |
5279 | the div (for this test we assume it will not fail). Note that optab2 |
5280 | is set to the one of the two optabs that the call below will use. */ |
5281 | quotient |
5282 | = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab, |
5283 | op0, op1, rem_flag ? NULL_RTX : target, |
5284 | unsignedp, |
5285 | ((optab_handler (op: optab2, mode: compute_mode) |
5286 | != CODE_FOR_nothing) |
5287 | ? OPTAB_DIRECT : OPTAB_WIDEN)); |
5288 | |
5289 | if (quotient == 0) |
5290 | { |
5291 | /* No luck there. Try a quotient-and-remainder insn, |
5292 | keeping the quotient alone. */ |
5293 | quotient = gen_reg_rtx (compute_mode); |
5294 | if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab, |
5295 | op0, op1, |
5296 | quotient, NULL_RTX, unsignedp)) |
5297 | { |
5298 | quotient = 0; |
5299 | if (! rem_flag) |
5300 | /* Still no luck. If we are not computing the remainder, |
5301 | use a library call for the quotient. */ |
5302 | quotient = sign_expand_binop (compute_mode, |
5303 | udiv_optab, sdiv_optab, |
5304 | op0, op1, target, |
5305 | unsignedp, methods); |
5306 | } |
5307 | } |
5308 | } |
5309 | |
5310 | if (rem_flag) |
5311 | { |
5312 | if (target && GET_MODE (target) != compute_mode) |
5313 | target = 0; |
5314 | |
5315 | if (quotient == 0) |
5316 | { |
5317 | /* No divide instruction either. Use library for remainder. */ |
5318 | remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab, |
5319 | op0, op1, target, |
5320 | unsignedp, methods); |
5321 | /* No remainder function. Try a quotient-and-remainder |
5322 | function, keeping the remainder. */ |
5323 | if (!remainder |
5324 | && (methods == OPTAB_LIB || methods == OPTAB_LIB_WIDEN)) |
5325 | { |
5326 | remainder = gen_reg_rtx (compute_mode); |
5327 | if (!expand_twoval_binop_libfunc |
5328 | (unsignedp ? udivmod_optab : sdivmod_optab, |
5329 | op0, op1, |
5330 | NULL_RTX, remainder, |
5331 | unsignedp ? UMOD : MOD)) |
5332 | remainder = NULL_RTX; |
5333 | } |
5334 | } |
5335 | else |
5336 | { |
5337 | /* We divided. Now finish doing X - Y * (X / Y). */ |
5338 | remainder = expand_mult (mode: compute_mode, op0: quotient, op1, |
5339 | NULL_RTX, unsignedp); |
5340 | remainder = expand_binop (compute_mode, sub_optab, op0, |
5341 | remainder, target, unsignedp, |
5342 | methods); |
5343 | } |
5344 | } |
5345 | |
5346 | if (methods != OPTAB_LIB_WIDEN |
5347 | && (rem_flag ? remainder : quotient) == NULL_RTX) |
5348 | return NULL_RTX; |
5349 | |
5350 | return gen_lowpart (mode, rem_flag ? remainder : quotient); |
5351 | } |
5352 | |
5353 | /* Return a tree node with data type TYPE, describing the value of X. |
5354 | Usually this is an VAR_DECL, if there is no obvious better choice. |
5355 | X may be an expression, however we only support those expressions |
5356 | generated by loop.c. */ |
5357 | |
5358 | tree |
5359 | make_tree (tree type, rtx x) |
5360 | { |
5361 | tree t; |
5362 | |
5363 | switch (GET_CODE (x)) |
5364 | { |
5365 | case CONST_INT: |
5366 | case CONST_WIDE_INT: |
5367 | t = wide_int_to_tree (type, cst: rtx_mode_t (x, TYPE_MODE (type))); |
5368 | return t; |
5369 | |
5370 | case CONST_DOUBLE: |
5371 | STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT); |
5372 | if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode) |
5373 | t = wide_int_to_tree (type, |
5374 | cst: wide_int::from_array (val: &CONST_DOUBLE_LOW (x), len: 2, |
5375 | HOST_BITS_PER_WIDE_INT * 2)); |
5376 | else |
5377 | t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x)); |
5378 | |
5379 | return t; |
5380 | |
5381 | case CONST_VECTOR: |
5382 | { |
5383 | unsigned int npatterns = CONST_VECTOR_NPATTERNS (x); |
5384 | unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); |
5385 | tree itype = TREE_TYPE (type); |
5386 | |
5387 | /* Build a tree with vector elements. */ |
5388 | tree_vector_builder elts (type, npatterns, nelts_per_pattern); |
5389 | unsigned int count = elts.encoded_nelts (); |
5390 | for (unsigned int i = 0; i < count; ++i) |
5391 | { |
5392 | rtx elt = CONST_VECTOR_ELT (x, i); |
5393 | elts.quick_push (obj: make_tree (type: itype, x: elt)); |
5394 | } |
5395 | |
5396 | return elts.build (); |
5397 | } |
5398 | |
5399 | case PLUS: |
5400 | return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5401 | make_tree (type, XEXP (x, 1))); |
5402 | |
5403 | case MINUS: |
5404 | return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)), |
5405 | make_tree (type, XEXP (x, 1))); |
5406 | |
5407 | case NEG: |
5408 | return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))); |
5409 | |
5410 | case MULT: |
5411 | return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5412 | make_tree (type, XEXP (x, 1))); |
5413 | |
5414 | case ASHIFT: |
5415 | return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)), |
5416 | make_tree (type, XEXP (x, 1))); |
5417 | |
5418 | case LSHIFTRT: |
5419 | t = unsigned_type_for (type); |
5420 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5421 | make_tree (t, XEXP (x, 0)), |
5422 | make_tree (type, XEXP (x, 1)))); |
5423 | |
5424 | case ASHIFTRT: |
5425 | t = signed_type_for (type); |
5426 | return fold_convert (type, build2 (RSHIFT_EXPR, t, |
5427 | make_tree (t, XEXP (x, 0)), |
5428 | make_tree (type, XEXP (x, 1)))); |
5429 | |
5430 | case DIV: |
5431 | if (TREE_CODE (type) != REAL_TYPE) |
5432 | t = signed_type_for (type); |
5433 | else |
5434 | t = type; |
5435 | |
5436 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5437 | make_tree (t, XEXP (x, 0)), |
5438 | make_tree (t, XEXP (x, 1)))); |
5439 | case UDIV: |
5440 | t = unsigned_type_for (type); |
5441 | return fold_convert (type, build2 (TRUNC_DIV_EXPR, t, |
5442 | make_tree (t, XEXP (x, 0)), |
5443 | make_tree (t, XEXP (x, 1)))); |
5444 | |
5445 | case SIGN_EXTEND: |
5446 | case ZERO_EXTEND: |
5447 | t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)), |
5448 | GET_CODE (x) == ZERO_EXTEND); |
5449 | return fold_convert (type, make_tree (t, XEXP (x, 0))); |
5450 | |
5451 | case CONST: |
5452 | return make_tree (type, XEXP (x, 0)); |
5453 | |
5454 | case SYMBOL_REF: |
5455 | t = SYMBOL_REF_DECL (x); |
5456 | if (t) |
5457 | return fold_convert (type, build_fold_addr_expr (t)); |
5458 | /* fall through. */ |
5459 | |
5460 | default: |
5461 | if (CONST_POLY_INT_P (x)) |
5462 | return wide_int_to_tree (type: t, cst: const_poly_int_value (x)); |
5463 | |
5464 | t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type); |
5465 | |
5466 | /* If TYPE is a POINTER_TYPE, we might need to convert X from |
5467 | address mode to pointer mode. */ |
5468 | if (POINTER_TYPE_P (type)) |
5469 | x = convert_memory_address_addr_space |
5470 | (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type))); |
5471 | |
5472 | /* Note that we do *not* use SET_DECL_RTL here, because we do not |
5473 | want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */ |
5474 | t->decl_with_rtl.rtl = x; |
5475 | |
5476 | return t; |
5477 | } |
5478 | } |
5479 | |
5480 | /* Compute the logical-and of OP0 and OP1, storing it in TARGET |
5481 | and returning TARGET. |
5482 | |
5483 | If TARGET is 0, a pseudo-register or constant is returned. */ |
5484 | |
5485 | rtx |
5486 | expand_and (machine_mode mode, rtx op0, rtx op1, rtx target) |
5487 | { |
5488 | rtx tem = 0; |
5489 | |
5490 | if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode) |
5491 | tem = simplify_binary_operation (code: AND, mode, op0, op1); |
5492 | if (tem == 0) |
5493 | tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN); |
5494 | |
5495 | if (target == 0) |
5496 | target = tem; |
5497 | else if (tem != target) |
5498 | emit_move_insn (target, tem); |
5499 | return target; |
5500 | } |
5501 | |
5502 | /* Helper function for emit_store_flag. */ |
5503 | rtx |
5504 | emit_cstore (rtx target, enum insn_code icode, enum rtx_code code, |
5505 | machine_mode mode, machine_mode compare_mode, |
5506 | int unsignedp, rtx x, rtx y, int normalizep, |
5507 | machine_mode target_mode) |
5508 | { |
5509 | class expand_operand ops[4]; |
5510 | rtx op0, comparison, subtarget; |
5511 | rtx_insn *last; |
5512 | scalar_int_mode result_mode = targetm.cstore_mode (icode); |
5513 | scalar_int_mode int_target_mode; |
5514 | |
5515 | last = get_last_insn (); |
5516 | x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp); |
5517 | y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp); |
5518 | if (!x || !y) |
5519 | { |
5520 | delete_insns_since (last); |
5521 | return NULL_RTX; |
5522 | } |
5523 | |
5524 | if (target_mode == VOIDmode) |
5525 | int_target_mode = result_mode; |
5526 | else |
5527 | int_target_mode = as_a <scalar_int_mode> (m: target_mode); |
5528 | if (!target) |
5529 | target = gen_reg_rtx (int_target_mode); |
5530 | |
5531 | comparison = gen_rtx_fmt_ee (code, result_mode, x, y); |
5532 | |
5533 | create_output_operand (op: &ops[0], optimize ? NULL_RTX : target, mode: result_mode); |
5534 | create_fixed_operand (op: &ops[1], x: comparison); |
5535 | create_fixed_operand (op: &ops[2], x); |
5536 | create_fixed_operand (op: &ops[3], x: y); |
5537 | if (!maybe_expand_insn (icode, nops: 4, ops)) |
5538 | { |
5539 | delete_insns_since (last); |
5540 | return NULL_RTX; |
5541 | } |
5542 | subtarget = ops[0].value; |
5543 | |
5544 | /* If we are converting to a wider mode, first convert to |
5545 | INT_TARGET_MODE, then normalize. This produces better combining |
5546 | opportunities on machines that have a SIGN_EXTRACT when we are |
5547 | testing a single bit. This mostly benefits the 68k. |
5548 | |
5549 | If STORE_FLAG_VALUE does not have the sign bit set when |
5550 | interpreted in MODE, we can do this conversion as unsigned, which |
5551 | is usually more efficient. */ |
5552 | if (GET_MODE_PRECISION (mode: int_target_mode) > GET_MODE_PRECISION (mode: result_mode)) |
5553 | { |
5554 | gcc_assert (GET_MODE_PRECISION (result_mode) != 1 |
5555 | || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1); |
5556 | |
5557 | bool unsignedp = (STORE_FLAG_VALUE >= 0); |
5558 | convert_move (target, subtarget, unsignedp); |
5559 | |
5560 | op0 = target; |
5561 | result_mode = int_target_mode; |
5562 | } |
5563 | else |
5564 | op0 = subtarget; |
5565 | |
5566 | /* If we want to keep subexpressions around, don't reuse our last |
5567 | target. */ |
5568 | if (optimize) |
5569 | subtarget = 0; |
5570 | |
5571 | /* Now normalize to the proper value in MODE. Sometimes we don't |
5572 | have to do anything. */ |
5573 | if (normalizep == 0 || normalizep == STORE_FLAG_VALUE) |
5574 | ; |
5575 | /* STORE_FLAG_VALUE might be the most negative number, so write |
5576 | the comparison this way to avoid a compiler-time warning. */ |
5577 | else if (- normalizep == STORE_FLAG_VALUE) |
5578 | op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0); |
5579 | |
5580 | /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes |
5581 | it hard to use a value of just the sign bit due to ANSI integer |
5582 | constant typing rules. */ |
5583 | else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE)) |
5584 | op0 = expand_shift (code: RSHIFT_EXPR, mode: result_mode, shifted: op0, |
5585 | amount: GET_MODE_BITSIZE (mode: result_mode) - 1, target: subtarget, |
5586 | unsignedp: normalizep == 1); |
5587 | else |
5588 | { |
5589 | gcc_assert (STORE_FLAG_VALUE & 1); |
5590 | |
5591 | op0 = expand_and (mode: result_mode, op0, const1_rtx, target: subtarget); |
5592 | if (normalizep == -1) |
5593 | op0 = expand_unop (result_mode, neg_optab, op0, op0, 0); |
5594 | } |
5595 | |
5596 | /* If we were converting to a smaller mode, do the conversion now. */ |
5597 | if (int_target_mode != result_mode) |
5598 | { |
5599 | convert_move (target, op0, 0); |
5600 | return target; |
5601 | } |
5602 | else |
5603 | return op0; |
5604 | } |
5605 | |
5606 | |
5607 | /* A subroutine of emit_store_flag only including "tricks" that do not |
5608 | need a recursive call. These are kept separate to avoid infinite |
5609 | loops. */ |
5610 | |
5611 | static rtx |
5612 | emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1, |
5613 | machine_mode mode, int unsignedp, int normalizep, |
5614 | machine_mode target_mode) |
5615 | { |
5616 | rtx subtarget; |
5617 | enum insn_code icode; |
5618 | machine_mode compare_mode; |
5619 | enum mode_class mclass; |
5620 | enum rtx_code scode; |
5621 | |
5622 | if (unsignedp) |
5623 | code = unsigned_condition (code); |
5624 | scode = swap_condition (code); |
5625 | |
5626 | /* If one operand is constant, make it the second one. Only do this |
5627 | if the other operand is not constant as well. */ |
5628 | |
5629 | if (swap_commutative_operands_p (op0, op1)) |
5630 | { |
5631 | std::swap (a&: op0, b&: op1); |
5632 | code = swap_condition (code); |
5633 | } |
5634 | |
5635 | if (mode == VOIDmode) |
5636 | mode = GET_MODE (op0); |
5637 | |
5638 | if (CONST_SCALAR_INT_P (op1)) |
5639 | canonicalize_comparison (mode, &code, &op1); |
5640 | |
5641 | /* For some comparisons with 1 and -1, we can convert this to |
5642 | comparisons with zero. This will often produce more opportunities for |
5643 | store-flag insns. */ |
5644 | |
5645 | switch (code) |
5646 | { |
5647 | case LT: |
5648 | if (op1 == const1_rtx) |
5649 | op1 = const0_rtx, code = LE; |
5650 | break; |
5651 | case LE: |
5652 | if (op1 == constm1_rtx) |
5653 | op1 = const0_rtx, code = LT; |
5654 | break; |
5655 | case GE: |
5656 | if (op1 == const1_rtx) |
5657 | op1 = const0_rtx, code = GT; |
5658 | break; |
5659 | case GT: |
5660 | if (op1 == constm1_rtx) |
5661 | op1 = const0_rtx, code = GE; |
5662 | break; |
5663 | case GEU: |
5664 | if (op1 == const1_rtx) |
5665 | op1 = const0_rtx, code = NE; |
5666 | break; |
5667 | case LTU: |
5668 | if (op1 == const1_rtx) |
5669 | op1 = const0_rtx, code = EQ; |
5670 | break; |
5671 | default: |
5672 | break; |
5673 | } |
5674 | |
5675 | /* If this is A < 0 or A >= 0, we can do this by taking the ones |
5676 | complement of A (for GE) and shifting the sign bit to the low bit. */ |
5677 | scalar_int_mode int_mode; |
5678 | if (op1 == const0_rtx && (code == LT || code == GE) |
5679 | && is_int_mode (mode, int_mode: &int_mode) |
5680 | && (normalizep || STORE_FLAG_VALUE == 1 |
5681 | || val_signbit_p (int_mode, STORE_FLAG_VALUE))) |
5682 | { |
5683 | scalar_int_mode int_target_mode; |
5684 | subtarget = target; |
5685 | |
5686 | if (!target) |
5687 | int_target_mode = int_mode; |
5688 | else |
5689 | { |
5690 | /* If the result is to be wider than OP0, it is best to convert it |
5691 | first. If it is to be narrower, it is *incorrect* to convert it |
5692 | first. */ |
5693 | int_target_mode = as_a <scalar_int_mode> (m: target_mode); |
5694 | if (GET_MODE_SIZE (mode: int_target_mode) > GET_MODE_SIZE (mode: int_mode)) |
5695 | { |
5696 | op0 = convert_modes (mode: int_target_mode, oldmode: int_mode, x: op0, unsignedp: 0); |
5697 | int_mode = int_target_mode; |
5698 | } |
5699 | } |
5700 | |
5701 | if (int_target_mode != int_mode) |
5702 | subtarget = 0; |
5703 | |
5704 | if (code == GE) |
5705 | op0 = expand_unop (int_mode, one_cmpl_optab, op0, |
5706 | ((STORE_FLAG_VALUE == 1 || normalizep) |
5707 | ? 0 : subtarget), 0); |
5708 | |
5709 | if (STORE_FLAG_VALUE == 1 || normalizep) |
5710 | /* If we are supposed to produce a 0/1 value, we want to do |
5711 | a logical shift from the sign bit to the low-order bit; for |
5712 | a -1/0 value, we do an arithmetic shift. */ |
5713 | op0 = expand_shift (code: RSHIFT_EXPR, mode: int_mode, shifted: op0, |
5714 | amount: GET_MODE_BITSIZE (mode: int_mode) - 1, |
5715 | target: subtarget, unsignedp: normalizep != -1); |
5716 | |
5717 | if (int_mode != int_target_mode) |
5718 | op0 = convert_modes (mode: int_target_mode, oldmode: int_mode, x: op0, unsignedp: 0); |
5719 | |
5720 | return op0; |
5721 | } |
5722 | |
5723 | /* Next try expanding this via the backend's cstore<mode>4. */ |
5724 | mclass = GET_MODE_CLASS (mode); |
5725 | FOR_EACH_WIDER_MODE_FROM (compare_mode, mode) |
5726 | { |
5727 | machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode; |
5728 | icode = optab_handler (op: cstore_optab, mode: optab_mode); |
5729 | if (icode != CODE_FOR_nothing) |
5730 | { |
5731 | do_pending_stack_adjust (); |
5732 | rtx tem = emit_cstore (target, icode, code, mode, compare_mode, |
5733 | unsignedp, x: op0, y: op1, normalizep, target_mode); |
5734 | if (tem) |
5735 | return tem; |
5736 | |
5737 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
5738 | { |
5739 | tem = emit_cstore (target, icode, code: scode, mode, compare_mode, |
5740 | unsignedp, x: op1, y: op0, normalizep, target_mode); |
5741 | if (tem) |
5742 | return tem; |
5743 | } |
5744 | break; |
5745 | } |
5746 | } |
5747 | |
5748 | /* If we are comparing a double-word integer with zero or -1, we can |
5749 | convert the comparison into one involving a single word. */ |
5750 | if (is_int_mode (mode, int_mode: &int_mode) |
5751 | && GET_MODE_BITSIZE (mode: int_mode) == BITS_PER_WORD * 2 |
5752 | && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0))) |
5753 | { |
5754 | rtx tem; |
5755 | if ((code == EQ || code == NE) |
5756 | && (op1 == const0_rtx || op1 == constm1_rtx)) |
5757 | { |
5758 | rtx op00, op01; |
5759 | |
5760 | /* Do a logical OR or AND of the two words and compare the |
5761 | result. */ |
5762 | op00 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, byte: 0); |
5763 | op01 = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, UNITS_PER_WORD); |
5764 | tem = expand_binop (word_mode, |
5765 | op1 == const0_rtx ? ior_optab : and_optab, |
5766 | op00, op01, NULL_RTX, unsignedp, |
5767 | OPTAB_DIRECT); |
5768 | |
5769 | if (tem != 0) |
5770 | tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode, |
5771 | unsignedp, normalizep); |
5772 | } |
5773 | else if ((code == LT || code == GE) && op1 == const0_rtx) |
5774 | { |
5775 | rtx op0h; |
5776 | |
5777 | /* If testing the sign bit, can just test on high word. */ |
5778 | op0h = simplify_gen_subreg (outermode: word_mode, op: op0, innermode: int_mode, |
5779 | byte: subreg_highpart_offset (outermode: word_mode, |
5780 | innermode: int_mode)); |
5781 | tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode, |
5782 | unsignedp, normalizep); |
5783 | } |
5784 | else |
5785 | tem = NULL_RTX; |
5786 | |
5787 | if (tem) |
5788 | { |
5789 | if (target_mode == VOIDmode || GET_MODE (tem) == target_mode) |
5790 | return tem; |
5791 | if (!target) |
5792 | target = gen_reg_rtx (target_mode); |
5793 | |
5794 | convert_move (target, tem, |
5795 | !val_signbit_known_set_p (word_mode, |
5796 | (normalizep ? normalizep |
5797 | : STORE_FLAG_VALUE))); |
5798 | return target; |
5799 | } |
5800 | } |
5801 | |
5802 | return 0; |
5803 | } |
5804 | |
5805 | /* Subroutine of emit_store_flag that handles cases in which the operands |
5806 | are scalar integers. SUBTARGET is the target to use for temporary |
5807 | operations and TRUEVAL is the value to store when the condition is |
5808 | true. All other arguments are as for emit_store_flag. */ |
5809 | |
5810 | rtx |
5811 | emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0, |
5812 | rtx op1, scalar_int_mode mode, int unsignedp, |
5813 | int normalizep, rtx trueval) |
5814 | { |
5815 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
5816 | rtx_insn *last = get_last_insn (); |
5817 | |
5818 | /* If this is an equality comparison of integers, we can try to exclusive-or |
5819 | (or subtract) the two operands and use a recursive call to try the |
5820 | comparison with zero. Don't do any of these cases if branches are |
5821 | very cheap. */ |
5822 | |
5823 | if ((code == EQ || code == NE) && op1 != const0_rtx) |
5824 | { |
5825 | rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1, |
5826 | OPTAB_WIDEN); |
5827 | |
5828 | if (tem == 0) |
5829 | tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1, |
5830 | OPTAB_WIDEN); |
5831 | if (tem != 0) |
5832 | tem = emit_store_flag (target, code, tem, const0_rtx, |
5833 | mode, unsignedp, normalizep); |
5834 | if (tem != 0) |
5835 | return tem; |
5836 | |
5837 | delete_insns_since (last); |
5838 | } |
5839 | |
5840 | /* For integer comparisons, try the reverse comparison. However, for |
5841 | small X and if we'd have anyway to extend, implementing "X != 0" |
5842 | as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */ |
5843 | rtx_code rcode = reverse_condition (code); |
5844 | if (can_compare_p (rcode, mode, ccp_store_flag) |
5845 | && ! (optab_handler (op: cstore_optab, mode) == CODE_FOR_nothing |
5846 | && code == NE |
5847 | && GET_MODE_SIZE (mode) < UNITS_PER_WORD |
5848 | && op1 == const0_rtx)) |
5849 | { |
5850 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
5851 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); |
5852 | |
5853 | /* Again, for the reverse comparison, use either an addition or a XOR. */ |
5854 | if (want_add |
5855 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
5856 | optimize_insn_for_speed_p ()) == 0) |
5857 | { |
5858 | rtx tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
5859 | STORE_FLAG_VALUE, target_mode); |
5860 | if (tem != 0) |
5861 | tem = expand_binop (target_mode, add_optab, tem, |
5862 | gen_int_mode (normalizep, target_mode), |
5863 | target, 0, OPTAB_WIDEN); |
5864 | if (tem != 0) |
5865 | return tem; |
5866 | } |
5867 | else if (!want_add |
5868 | && rtx_cost (trueval, mode, XOR, 1, |
5869 | optimize_insn_for_speed_p ()) == 0) |
5870 | { |
5871 | rtx tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
5872 | normalizep, target_mode); |
5873 | if (tem != 0) |
5874 | tem = expand_binop (target_mode, xor_optab, tem, trueval, target, |
5875 | INTVAL (trueval) >= 0, OPTAB_WIDEN); |
5876 | if (tem != 0) |
5877 | return tem; |
5878 | } |
5879 | |
5880 | delete_insns_since (last); |
5881 | } |
5882 | |
5883 | /* Some other cases we can do are EQ, NE, LE, and GT comparisons with |
5884 | the constant zero. Reject all other comparisons at this point. Only |
5885 | do LE and GT if branches are expensive since they are expensive on |
5886 | 2-operand machines. */ |
5887 | |
5888 | if (op1 != const0_rtx |
5889 | || (code != EQ && code != NE |
5890 | && (BRANCH_COST (optimize_insn_for_speed_p (), |
5891 | false) <= 1 || (code != LE && code != GT)))) |
5892 | return 0; |
5893 | |
5894 | /* Try to put the result of the comparison in the sign bit. Assume we can't |
5895 | do the necessary operation below. */ |
5896 | |
5897 | rtx tem = 0; |
5898 | |
5899 | /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has |
5900 | the sign bit set. */ |
5901 | |
5902 | if (code == LE) |
5903 | { |
5904 | /* This is destructive, so SUBTARGET can't be OP0. */ |
5905 | if (rtx_equal_p (subtarget, op0)) |
5906 | subtarget = 0; |
5907 | |
5908 | tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0, |
5909 | OPTAB_WIDEN); |
5910 | if (tem) |
5911 | tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0, |
5912 | OPTAB_WIDEN); |
5913 | } |
5914 | |
5915 | /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the |
5916 | number of bits in the mode of OP0, minus one. */ |
5917 | |
5918 | if (code == GT) |
5919 | { |
5920 | if (rtx_equal_p (subtarget, op0)) |
5921 | subtarget = 0; |
5922 | |
5923 | tem = maybe_expand_shift (code: RSHIFT_EXPR, mode, shifted: op0, |
5924 | amount: GET_MODE_BITSIZE (mode) - 1, |
5925 | target: subtarget, unsignedp: 0); |
5926 | if (tem) |
5927 | tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0, |
5928 | OPTAB_WIDEN); |
5929 | } |
5930 | |
5931 | if (code == EQ || code == NE) |
5932 | { |
5933 | /* For EQ or NE, one way to do the comparison is to apply an operation |
5934 | that converts the operand into a positive number if it is nonzero |
5935 | or zero if it was originally zero. Then, for EQ, we subtract 1 and |
5936 | for NE we negate. This puts the result in the sign bit. Then we |
5937 | normalize with a shift, if needed. |
5938 | |
5939 | Two operations that can do the above actions are ABS and FFS, so try |
5940 | them. If that doesn't work, and MODE is smaller than a full word, |
5941 | we can use zero-extension to the wider mode (an unsigned conversion) |
5942 | as the operation. */ |
5943 | |
5944 | /* Note that ABS doesn't yield a positive number for INT_MIN, but |
5945 | that is compensated by the subsequent overflow when subtracting |
5946 | one / negating. */ |
5947 | |
5948 | if (optab_handler (op: abs_optab, mode) != CODE_FOR_nothing) |
5949 | tem = expand_unop (mode, abs_optab, op0, subtarget, 1); |
5950 | else if (optab_handler (op: ffs_optab, mode) != CODE_FOR_nothing) |
5951 | tem = expand_unop (mode, ffs_optab, op0, subtarget, 1); |
5952 | else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) |
5953 | { |
5954 | tem = convert_modes (mode: word_mode, oldmode: mode, x: op0, unsignedp: 1); |
5955 | mode = word_mode; |
5956 | } |
5957 | |
5958 | if (tem != 0) |
5959 | { |
5960 | if (code == EQ) |
5961 | tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget, |
5962 | 0, OPTAB_WIDEN); |
5963 | else |
5964 | tem = expand_unop (mode, neg_optab, tem, subtarget, 0); |
5965 | } |
5966 | |
5967 | /* If we couldn't do it that way, for NE we can "or" the two's complement |
5968 | of the value with itself. For EQ, we take the one's complement of |
5969 | that "or", which is an extra insn, so we only handle EQ if branches |
5970 | are expensive. */ |
5971 | |
5972 | if (tem == 0 |
5973 | && (code == NE |
5974 | || BRANCH_COST (optimize_insn_for_speed_p (), |
5975 | false) > 1)) |
5976 | { |
5977 | if (rtx_equal_p (subtarget, op0)) |
5978 | subtarget = 0; |
5979 | |
5980 | tem = expand_unop (mode, neg_optab, op0, subtarget, 0); |
5981 | tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0, |
5982 | OPTAB_WIDEN); |
5983 | |
5984 | if (tem && code == EQ) |
5985 | tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0); |
5986 | } |
5987 | } |
5988 | |
5989 | if (tem && normalizep) |
5990 | tem = maybe_expand_shift (code: RSHIFT_EXPR, mode, shifted: tem, |
5991 | amount: GET_MODE_BITSIZE (mode) - 1, |
5992 | target: subtarget, unsignedp: normalizep == 1); |
5993 | |
5994 | if (tem) |
5995 | { |
5996 | if (!target) |
5997 | ; |
5998 | else if (GET_MODE (tem) != target_mode) |
5999 | { |
6000 | convert_move (target, tem, 0); |
6001 | tem = target; |
6002 | } |
6003 | else if (!subtarget) |
6004 | { |
6005 | emit_move_insn (target, tem); |
6006 | tem = target; |
6007 | } |
6008 | } |
6009 | else |
6010 | delete_insns_since (last); |
6011 | |
6012 | return tem; |
6013 | } |
6014 | |
6015 | /* Emit a store-flags instruction for comparison CODE on OP0 and OP1 |
6016 | and storing in TARGET. Normally return TARGET. |
6017 | Return 0 if that cannot be done. |
6018 | |
6019 | MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If |
6020 | it is VOIDmode, they cannot both be CONST_INT. |
6021 | |
6022 | UNSIGNEDP is for the case where we have to widen the operands |
6023 | to perform the operation. It says to use zero-extension. |
6024 | |
6025 | NORMALIZEP is 1 if we should convert the result to be either zero |
6026 | or one. Normalize is -1 if we should convert the result to be |
6027 | either zero or -1. If NORMALIZEP is zero, the result will be left |
6028 | "raw" out of the scc insn. */ |
6029 | |
6030 | rtx |
6031 | emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1, |
6032 | machine_mode mode, int unsignedp, int normalizep) |
6033 | { |
6034 | machine_mode target_mode = target ? GET_MODE (target) : VOIDmode; |
6035 | enum rtx_code rcode; |
6036 | rtx subtarget; |
6037 | rtx tem, trueval; |
6038 | rtx_insn *last; |
6039 | |
6040 | /* If we compare constants, we shouldn't use a store-flag operation, |
6041 | but a constant load. We can get there via the vanilla route that |
6042 | usually generates a compare-branch sequence, but will in this case |
6043 | fold the comparison to a constant, and thus elide the branch. */ |
6044 | if (CONSTANT_P (op0) && CONSTANT_P (op1)) |
6045 | return NULL_RTX; |
6046 | |
6047 | tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep, |
6048 | target_mode); |
6049 | if (tem) |
6050 | return tem; |
6051 | |
6052 | /* If we reached here, we can't do this with a scc insn, however there |
6053 | are some comparisons that can be done in other ways. Don't do any |
6054 | of these cases if branches are very cheap. */ |
6055 | if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0) |
6056 | return 0; |
6057 | |
6058 | /* See what we need to return. We can only return a 1, -1, or the |
6059 | sign bit. */ |
6060 | |
6061 | if (normalizep == 0) |
6062 | { |
6063 | if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1) |
6064 | normalizep = STORE_FLAG_VALUE; |
6065 | |
6066 | else if (val_signbit_p (mode, STORE_FLAG_VALUE)) |
6067 | ; |
6068 | else |
6069 | return 0; |
6070 | } |
6071 | |
6072 | last = get_last_insn (); |
6073 | |
6074 | /* If optimizing, use different pseudo registers for each insn, instead |
6075 | of reusing the same pseudo. This leads to better CSE, but slows |
6076 | down the compiler, since there are more pseudos. */ |
6077 | subtarget = (!optimize |
6078 | && (target_mode == mode)) ? target : NULL_RTX; |
6079 | trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE); |
6080 | |
6081 | /* For floating-point comparisons, try the reverse comparison or try |
6082 | changing the "orderedness" of the comparison. */ |
6083 | if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
6084 | { |
6085 | enum rtx_code first_code; |
6086 | bool and_them; |
6087 | |
6088 | rcode = reverse_condition_maybe_unordered (code); |
6089 | if (can_compare_p (rcode, mode, ccp_store_flag) |
6090 | && (code == ORDERED || code == UNORDERED |
6091 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) |
6092 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) |
6093 | { |
6094 | int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1) |
6095 | || (STORE_FLAG_VALUE == -1 && normalizep == 1)); |
6096 | |
6097 | /* For the reverse comparison, use either an addition or a XOR. */ |
6098 | if (want_add |
6099 | && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1, |
6100 | optimize_insn_for_speed_p ()) == 0) |
6101 | { |
6102 | tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
6103 | STORE_FLAG_VALUE, target_mode); |
6104 | if (tem) |
6105 | return expand_binop (target_mode, add_optab, tem, |
6106 | gen_int_mode (normalizep, target_mode), |
6107 | target, 0, OPTAB_WIDEN); |
6108 | } |
6109 | else if (!want_add |
6110 | && rtx_cost (trueval, mode, XOR, 1, |
6111 | optimize_insn_for_speed_p ()) == 0) |
6112 | { |
6113 | tem = emit_store_flag_1 (target: subtarget, code: rcode, op0, op1, mode, unsignedp: 0, |
6114 | normalizep, target_mode); |
6115 | if (tem) |
6116 | return expand_binop (target_mode, xor_optab, tem, trueval, |
6117 | target, INTVAL (trueval) >= 0, |
6118 | OPTAB_WIDEN); |
6119 | } |
6120 | } |
6121 | |
6122 | delete_insns_since (last); |
6123 | |
6124 | /* Cannot split ORDERED and UNORDERED, only try the above trick. */ |
6125 | if (code == ORDERED || code == UNORDERED) |
6126 | return 0; |
6127 | |
6128 | and_them = split_comparison (code, mode, &first_code, &code); |
6129 | |
6130 | /* If there are no NaNs, the first comparison should always fall through. |
6131 | Effectively change the comparison to the other one. */ |
6132 | if (!HONOR_NANS (mode)) |
6133 | { |
6134 | gcc_assert (first_code == (and_them ? ORDERED : UNORDERED)); |
6135 | return emit_store_flag_1 (target, code, op0, op1, mode, unsignedp: 0, normalizep, |
6136 | target_mode); |
6137 | } |
6138 | |
6139 | if (!HAVE_conditional_move) |
6140 | return 0; |
6141 | |
6142 | /* Do not turn a trapping comparison into a non-trapping one. */ |
6143 | if ((code != EQ && code != NE && code != UNEQ && code != LTGT) |
6144 | && flag_trapping_math) |
6145 | return 0; |
6146 | |
6147 | /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a |
6148 | conditional move. */ |
6149 | tem = emit_store_flag_1 (target: subtarget, code: first_code, op0, op1, mode, unsignedp: 0, |
6150 | normalizep, target_mode); |
6151 | if (tem == 0) |
6152 | return 0; |
6153 | |
6154 | if (and_them) |
6155 | tem = emit_conditional_move (target, { .code: code, .op0: op0, .op1: op1, .mode: mode }, |
6156 | tem, const0_rtx, GET_MODE (tem), 0); |
6157 | else |
6158 | tem = emit_conditional_move (target, { .code: code, .op0: op0, .op1: op1, .mode: mode }, |
6159 | trueval, tem, GET_MODE (tem), 0); |
6160 | |
6161 | if (tem == 0) |
6162 | delete_insns_since (last); |
6163 | return tem; |
6164 | } |
6165 | |
6166 | /* The remaining tricks only apply to integer comparisons. */ |
6167 | |
6168 | scalar_int_mode int_mode; |
6169 | if (is_int_mode (mode, int_mode: &int_mode)) |
6170 | return emit_store_flag_int (target, subtarget, code, op0, op1, mode: int_mode, |
6171 | unsignedp, normalizep, trueval); |
6172 | |
6173 | return 0; |
6174 | } |
6175 | |
6176 | /* Like emit_store_flag, but always succeeds. */ |
6177 | |
6178 | rtx |
6179 | emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1, |
6180 | machine_mode mode, int unsignedp, int normalizep) |
6181 | { |
6182 | rtx tem; |
6183 | rtx_code_label *label; |
6184 | rtx trueval, falseval; |
6185 | |
6186 | /* First see if emit_store_flag can do the job. */ |
6187 | tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep); |
6188 | if (tem != 0) |
6189 | return tem; |
6190 | |
6191 | /* If one operand is constant, make it the second one. Only do this |
6192 | if the other operand is not constant as well. */ |
6193 | if (swap_commutative_operands_p (op0, op1)) |
6194 | { |
6195 | std::swap (a&: op0, b&: op1); |
6196 | code = swap_condition (code); |
6197 | } |
6198 | |
6199 | if (mode == VOIDmode) |
6200 | mode = GET_MODE (op0); |
6201 | |
6202 | if (!target) |
6203 | target = gen_reg_rtx (word_mode); |
6204 | |
6205 | /* If this failed, we have to do this with set/compare/jump/set code. |
6206 | For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */ |
6207 | trueval = normalizep ? GEN_INT (normalizep) : const1_rtx; |
6208 | if (code == NE |
6209 | && GET_MODE_CLASS (mode) == MODE_INT |
6210 | && REG_P (target) |
6211 | && op0 == target |
6212 | && op1 == const0_rtx) |
6213 | { |
6214 | label = gen_label_rtx (); |
6215 | do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode, |
6216 | NULL_RTX, NULL, label, |
6217 | profile_probability::uninitialized ()); |
6218 | emit_move_insn (target, trueval); |
6219 | emit_label (label); |
6220 | return target; |
6221 | } |
6222 | |
6223 | if (!REG_P (target) |
6224 | || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1)) |
6225 | target = gen_reg_rtx (GET_MODE (target)); |
6226 | |
6227 | /* Jump in the right direction if the target cannot implement CODE |
6228 | but can jump on its reverse condition. */ |
6229 | falseval = const0_rtx; |
6230 | if (! can_compare_p (code, mode, ccp_jump) |
6231 | && (! FLOAT_MODE_P (mode) |
6232 | || code == ORDERED || code == UNORDERED |
6233 | || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ)) |
6234 | || (! HONOR_SNANS (mode) && (code == EQ || code == NE)))) |
6235 | { |
6236 | enum rtx_code rcode; |
6237 | if (FLOAT_MODE_P (mode)) |
6238 | rcode = reverse_condition_maybe_unordered (code); |
6239 | else |
6240 | rcode = reverse_condition (code); |
6241 | |
6242 | /* Canonicalize to UNORDERED for the libcall. */ |
6243 | if (can_compare_p (rcode, mode, ccp_jump) |
6244 | || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump))) |
6245 | { |
6246 | falseval = trueval; |
6247 | trueval = const0_rtx; |
6248 | code = rcode; |
6249 | } |
6250 | } |
6251 | |
6252 | emit_move_insn (target, trueval); |
6253 | label = gen_label_rtx (); |
6254 | do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL, |
6255 | label, profile_probability::uninitialized ()); |
6256 | |
6257 | emit_move_insn (target, falseval); |
6258 | emit_label (label); |
6259 | |
6260 | return target; |
6261 | } |
6262 | |
6263 | /* Helper function for canonicalize_cmp_for_target. Swap between inclusive |
6264 | and exclusive ranges in order to create an equivalent comparison. See |
6265 | canonicalize_cmp_for_target for the possible cases. */ |
6266 | |
6267 | static enum rtx_code |
6268 | equivalent_cmp_code (enum rtx_code code) |
6269 | { |
6270 | switch (code) |
6271 | { |
6272 | case GT: |
6273 | return GE; |
6274 | case GE: |
6275 | return GT; |
6276 | case LT: |
6277 | return LE; |
6278 | case LE: |
6279 | return LT; |
6280 | case GTU: |
6281 | return GEU; |
6282 | case GEU: |
6283 | return GTU; |
6284 | case LTU: |
6285 | return LEU; |
6286 | case LEU: |
6287 | return LTU; |
6288 | |
6289 | default: |
6290 | return code; |
6291 | } |
6292 | } |
6293 | |
6294 | /* Choose the more appropiate immediate in scalar integer comparisons. The |
6295 | purpose of this is to end up with an immediate which can be loaded into a |
6296 | register in fewer moves, if possible. |
6297 | |
6298 | For each integer comparison there exists an equivalent choice: |
6299 | i) a > b or a >= b + 1 |
6300 | ii) a <= b or a < b + 1 |
6301 | iii) a >= b or a > b - 1 |
6302 | iv) a < b or a <= b - 1 |
6303 | |
6304 | MODE is the mode of the first operand. |
6305 | CODE points to the comparison code. |
6306 | IMM points to the rtx containing the immediate. *IMM must satisfy |
6307 | CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P |
6308 | on exit. */ |
6309 | |
6310 | void |
6311 | canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm) |
6312 | { |
6313 | if (!SCALAR_INT_MODE_P (mode)) |
6314 | return; |
6315 | |
6316 | int to_add = 0; |
6317 | enum signop sgn = unsigned_condition_p (code: *code) ? UNSIGNED : SIGNED; |
6318 | |
6319 | /* Extract the immediate value from the rtx. */ |
6320 | wide_int imm_val = rtx_mode_t (*imm, mode); |
6321 | |
6322 | if (*code == GT || *code == GTU || *code == LE || *code == LEU) |
6323 | to_add = 1; |
6324 | else if (*code == GE || *code == GEU || *code == LT || *code == LTU) |
6325 | to_add = -1; |
6326 | else |
6327 | return; |
6328 | |
6329 | /* Check for overflow/underflow in the case of signed values and |
6330 | wrapping around in the case of unsigned values. If any occur |
6331 | cancel the optimization. */ |
6332 | wi::overflow_type overflow = wi::OVF_NONE; |
6333 | wide_int imm_modif; |
6334 | |
6335 | if (to_add == 1) |
6336 | imm_modif = wi::add (x: imm_val, y: 1, sgn, overflow: &overflow); |
6337 | else |
6338 | imm_modif = wi::sub (x: imm_val, y: 1, sgn, overflow: &overflow); |
6339 | |
6340 | if (overflow) |
6341 | return; |
6342 | |
6343 | /* The following creates a pseudo; if we cannot do that, bail out. */ |
6344 | if (!can_create_pseudo_p ()) |
6345 | return; |
6346 | |
6347 | rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1); |
6348 | rtx new_imm = immed_wide_int_const (imm_modif, mode); |
6349 | |
6350 | rtx_insn *old_rtx = gen_move_insn (reg, *imm); |
6351 | rtx_insn *new_rtx = gen_move_insn (reg, new_imm); |
6352 | |
6353 | /* Update the immediate and the code. */ |
6354 | if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true)) |
6355 | { |
6356 | *code = equivalent_cmp_code (code: *code); |
6357 | *imm = new_imm; |
6358 | } |
6359 | } |
6360 | |
6361 | |
6362 | |
6363 | /* Perform possibly multi-word comparison and conditional jump to LABEL |
6364 | if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is |
6365 | now a thin wrapper around do_compare_rtx_and_jump. */ |
6366 | |
6367 | static void |
6368 | do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode, |
6369 | rtx_code_label *label) |
6370 | { |
6371 | int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU); |
6372 | do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX, |
6373 | NULL, label, profile_probability::uninitialized ()); |
6374 | } |
6375 | |