1 | /* Medium-level subroutines: convert bit-field store and extract |
---|---|

2 | and shifts, multiplies and divides to rtl instructions. |

3 | Copyright (C) 1987-2017 Free Software Foundation, Inc. |

4 | |

5 | This file is part of GCC. |

6 | |

7 | GCC is free software; you can redistribute it and/or modify it under |

8 | the terms of the GNU General Public License as published by the Free |

9 | Software Foundation; either version 3, or (at your option) any later |

10 | version. |

11 | |

12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |

13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |

14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |

15 | for more details. |

16 | |

17 | You should have received a copy of the GNU General Public License |

18 | along with GCC; see the file COPYING3. If not see |

19 | <http://www.gnu.org/licenses/>. */ |

20 | |

21 | |

22 | #include "config.h" |

23 | #include "system.h" |

24 | #include "coretypes.h" |

25 | #include "backend.h" |

26 | #include "target.h" |

27 | #include "rtl.h" |

28 | #include "tree.h" |

29 | #include "predict.h" |

30 | #include "memmodel.h" |

31 | #include "tm_p.h" |

32 | #include "expmed.h" |

33 | #include "optabs.h" |

34 | #include "regs.h" |

35 | #include "emit-rtl.h" |

36 | #include "diagnostic-core.h" |

37 | #include "fold-const.h" |

38 | #include "stor-layout.h" |

39 | #include "dojump.h" |

40 | #include "explow.h" |

41 | #include "expr.h" |

42 | #include "langhooks.h" |

43 | #include "tree-vector-builder.h" |

44 | |

45 | struct target_expmed default_target_expmed; |

46 | #if SWITCHABLE_TARGET |

47 | struct target_expmed *this_target_expmed = &default_target_expmed; |

48 | #endif |

49 | |

50 | static void store_fixed_bit_field (rtx, opt_scalar_int_mode, |

51 | unsigned HOST_WIDE_INT, |

52 | unsigned HOST_WIDE_INT, |

53 | unsigned HOST_WIDE_INT, |

54 | unsigned HOST_WIDE_INT, |

55 | rtx, scalar_int_mode, bool); |

56 | static void store_fixed_bit_field_1 (rtx, scalar_int_mode, |

57 | unsigned HOST_WIDE_INT, |

58 | unsigned HOST_WIDE_INT, |

59 | rtx, scalar_int_mode, bool); |

60 | static void store_split_bit_field (rtx, opt_scalar_int_mode, |

61 | unsigned HOST_WIDE_INT, |

62 | unsigned HOST_WIDE_INT, |

63 | unsigned HOST_WIDE_INT, |

64 | unsigned HOST_WIDE_INT, |

65 | rtx, scalar_int_mode, bool); |

66 | static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode, |

67 | unsigned HOST_WIDE_INT, |

68 | unsigned HOST_WIDE_INT, rtx, int, bool); |

69 | static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode, |

70 | unsigned HOST_WIDE_INT, |

71 | unsigned HOST_WIDE_INT, rtx, int, bool); |

72 | static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int); |

73 | static rtx extract_split_bit_field (rtx, opt_scalar_int_mode, |

74 | unsigned HOST_WIDE_INT, |

75 | unsigned HOST_WIDE_INT, int, bool); |

76 | static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *); |

77 | static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |

78 | static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT); |

79 | |

80 | /* Return a constant integer mask value of mode MODE with BITSIZE ones |

81 | followed by BITPOS zeros, or the complement of that if COMPLEMENT. |

82 | The mask is truncated if necessary to the width of mode MODE. The |

83 | mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */ |

84 | |

85 | static inline rtx |

86 | mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement) |

87 | { |

88 | return immed_wide_int_const |

89 | (wi::shifted_mask (bitpos, bitsize, complement, |

90 | GET_MODE_PRECISION (mode)), mode); |

91 | } |

92 | |

93 | /* Test whether a value is zero of a power of two. */ |

94 | #define EXACT_POWER_OF_2_OR_ZERO_P(x) \ |

95 | (((x) & ((x) - HOST_WIDE_INT_1U)) == 0) |

96 | |

97 | struct init_expmed_rtl |

98 | { |

99 | rtx reg; |

100 | rtx plus; |

101 | rtx neg; |

102 | rtx mult; |

103 | rtx sdiv; |

104 | rtx udiv; |

105 | rtx sdiv_32; |

106 | rtx smod_32; |

107 | rtx wide_mult; |

108 | rtx wide_lshr; |

109 | rtx wide_trunc; |

110 | rtx shift; |

111 | rtx shift_mult; |

112 | rtx shift_add; |

113 | rtx shift_sub0; |

114 | rtx shift_sub1; |

115 | rtx zext; |

116 | rtx trunc; |

117 | |

118 | rtx pow2[MAX_BITS_PER_WORD]; |

119 | rtx cint[MAX_BITS_PER_WORD]; |

120 | }; |

121 | |

122 | static void |

123 | init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode, |

124 | scalar_int_mode from_mode, bool speed) |

125 | { |

126 | int to_size, from_size; |

127 | rtx which; |

128 | |

129 | to_size = GET_MODE_PRECISION (to_mode); |

130 | from_size = GET_MODE_PRECISION (from_mode); |

131 | |

132 | /* Most partial integers have a precision less than the "full" |

133 | integer it requires for storage. In case one doesn't, for |

134 | comparison purposes here, reduce the bit size by one in that |

135 | case. */ |

136 | if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT |

137 | && pow2p_hwi (to_size)) |

138 | to_size --; |

139 | if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT |

140 | && pow2p_hwi (from_size)) |

141 | from_size --; |

142 | |

143 | /* Assume cost of zero-extend and sign-extend is the same. */ |

144 | which = (to_size < from_size ? all->trunc : all->zext); |

145 | |

146 | PUT_MODE (all->reg, from_mode); |

147 | set_convert_cost (to_mode, from_mode, speed, |

148 | set_src_cost (which, to_mode, speed)); |

149 | } |

150 | |

151 | static void |

152 | init_expmed_one_mode (struct init_expmed_rtl *all, |

153 | machine_mode mode, int speed) |

154 | { |

155 | int m, n, mode_bitsize; |

156 | machine_mode mode_from; |

157 | |

158 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |

159 | |

160 | PUT_MODE (all->reg, mode); |

161 | PUT_MODE (all->plus, mode); |

162 | PUT_MODE (all->neg, mode); |

163 | PUT_MODE (all->mult, mode); |

164 | PUT_MODE (all->sdiv, mode); |

165 | PUT_MODE (all->udiv, mode); |

166 | PUT_MODE (all->sdiv_32, mode); |

167 | PUT_MODE (all->smod_32, mode); |

168 | PUT_MODE (all->wide_trunc, mode); |

169 | PUT_MODE (all->shift, mode); |

170 | PUT_MODE (all->shift_mult, mode); |

171 | PUT_MODE (all->shift_add, mode); |

172 | PUT_MODE (all->shift_sub0, mode); |

173 | PUT_MODE (all->shift_sub1, mode); |

174 | PUT_MODE (all->zext, mode); |

175 | PUT_MODE (all->trunc, mode); |

176 | |

177 | set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed)); |

178 | set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed)); |

179 | set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed)); |

180 | set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed)); |

181 | set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed)); |

182 | |

183 | set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed) |

184 | <= 2 * add_cost (speed, mode))); |

185 | set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed) |

186 | <= 4 * add_cost (speed, mode))); |

187 | |

188 | set_shift_cost (speed, mode, 0, 0); |

189 | { |

190 | int cost = add_cost (speed, mode); |

191 | set_shiftadd_cost (speed, mode, 0, cost); |

192 | set_shiftsub0_cost (speed, mode, 0, cost); |

193 | set_shiftsub1_cost (speed, mode, 0, cost); |

194 | } |

195 | |

196 | n = MIN (MAX_BITS_PER_WORD, mode_bitsize); |

197 | for (m = 1; m < n; m++) |

198 | { |

199 | XEXP (all->shift, 1) = all->cint[m]; |

200 | XEXP (all->shift_mult, 1) = all->pow2[m]; |

201 | |

202 | set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed)); |

203 | set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode, |

204 | speed)); |

205 | set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode, |

206 | speed)); |

207 | set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode, |

208 | speed)); |

209 | } |

210 | |

211 | scalar_int_mode int_mode_to; |

212 | if (is_a <scalar_int_mode> (mode, &int_mode_to)) |

213 | { |

214 | for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT; |

215 | mode_from = (machine_mode)(mode_from + 1)) |

216 | init_expmed_one_conv (all, int_mode_to, |

217 | as_a <scalar_int_mode> (mode_from), speed); |

218 | |

219 | scalar_int_mode wider_mode; |

220 | if (GET_MODE_CLASS (int_mode_to) == MODE_INT |

221 | && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode)) |

222 | { |

223 | PUT_MODE (all->zext, wider_mode); |

224 | PUT_MODE (all->wide_mult, wider_mode); |

225 | PUT_MODE (all->wide_lshr, wider_mode); |

226 | XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize); |

227 | |

228 | set_mul_widen_cost (speed, wider_mode, |

229 | set_src_cost (all->wide_mult, wider_mode, speed)); |

230 | set_mul_highpart_cost (speed, int_mode_to, |

231 | set_src_cost (all->wide_trunc, |

232 | int_mode_to, speed)); |

233 | } |

234 | } |

235 | } |

236 | |

237 | void |

238 | init_expmed (void) |

239 | { |

240 | struct init_expmed_rtl all; |

241 | machine_mode mode = QImode; |

242 | int m, speed; |

243 | |

244 | memset (&all, 0, sizeof all); |

245 | for (m = 1; m < MAX_BITS_PER_WORD; m++) |

246 | { |

247 | all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m); |

248 | all.cint[m] = GEN_INT (m); |

249 | } |

250 | |

251 | /* Avoid using hard regs in ways which may be unsupported. */ |

252 | all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |

253 | all.plus = gen_rtx_PLUS (mode, all.reg, all.reg); |

254 | all.neg = gen_rtx_NEG (mode, all.reg); |

255 | all.mult = gen_rtx_MULT (mode, all.reg, all.reg); |

256 | all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg); |

257 | all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg); |

258 | all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]); |

259 | all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]); |

260 | all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg); |

261 | all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext); |

262 | all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg); |

263 | all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr); |

264 | all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg); |

265 | all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg); |

266 | all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg); |

267 | all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg); |

268 | all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult); |

269 | all.trunc = gen_rtx_TRUNCATE (mode, all.reg); |

270 | |

271 | for (speed = 0; speed < 2; speed++) |

272 | { |

273 | crtl->maybe_hot_insn_p = speed; |

274 | set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed)); |

275 | |

276 | for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT; |

277 | mode = (machine_mode)(mode + 1)) |

278 | init_expmed_one_mode (&all, mode, speed); |

279 | |

280 | if (MIN_MODE_PARTIAL_INT != VOIDmode) |

281 | for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT; |

282 | mode = (machine_mode)(mode + 1)) |

283 | init_expmed_one_mode (&all, mode, speed); |

284 | |

285 | if (MIN_MODE_VECTOR_INT != VOIDmode) |

286 | for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT; |

287 | mode = (machine_mode)(mode + 1)) |

288 | init_expmed_one_mode (&all, mode, speed); |

289 | } |

290 | |

291 | if (alg_hash_used_p ()) |

292 | { |

293 | struct alg_hash_entry *p = alg_hash_entry_ptr (0); |

294 | memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES); |

295 | } |

296 | else |

297 | set_alg_hash_used_p (true); |

298 | default_rtl_profile (); |

299 | |

300 | ggc_free (all.trunc); |

301 | ggc_free (all.shift_sub1); |

302 | ggc_free (all.shift_sub0); |

303 | ggc_free (all.shift_add); |

304 | ggc_free (all.shift_mult); |

305 | ggc_free (all.shift); |

306 | ggc_free (all.wide_trunc); |

307 | ggc_free (all.wide_lshr); |

308 | ggc_free (all.wide_mult); |

309 | ggc_free (all.zext); |

310 | ggc_free (all.smod_32); |

311 | ggc_free (all.sdiv_32); |

312 | ggc_free (all.udiv); |

313 | ggc_free (all.sdiv); |

314 | ggc_free (all.mult); |

315 | ggc_free (all.neg); |

316 | ggc_free (all.plus); |

317 | ggc_free (all.reg); |

318 | } |

319 | |

320 | /* Return an rtx representing minus the value of X. |

321 | MODE is the intended mode of the result, |

322 | useful if X is a CONST_INT. */ |

323 | |

324 | rtx |

325 | negate_rtx (machine_mode mode, rtx x) |

326 | { |

327 | rtx result = simplify_unary_operation (NEG, mode, x, mode); |

328 | |

329 | if (result == 0) |

330 | result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |

331 | |

332 | return result; |

333 | } |

334 | |

335 | /* Whether reverse storage order is supported on the target. */ |

336 | static int reverse_storage_order_supported = -1; |

337 | |

338 | /* Check whether reverse storage order is supported on the target. */ |

339 | |

340 | static void |

341 | check_reverse_storage_order_support (void) |

342 | { |

343 | if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) |

344 | { |

345 | reverse_storage_order_supported = 0; |

346 | sorry ("reverse scalar storage order"); |

347 | } |

348 | else |

349 | reverse_storage_order_supported = 1; |

350 | } |

351 | |

352 | /* Whether reverse FP storage order is supported on the target. */ |

353 | static int reverse_float_storage_order_supported = -1; |

354 | |

355 | /* Check whether reverse FP storage order is supported on the target. */ |

356 | |

357 | static void |

358 | check_reverse_float_storage_order_support (void) |

359 | { |

360 | if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN) |

361 | { |

362 | reverse_float_storage_order_supported = 0; |

363 | sorry ("reverse floating-point scalar storage order"); |

364 | } |

365 | else |

366 | reverse_float_storage_order_supported = 1; |

367 | } |

368 | |

369 | /* Return an rtx representing value of X with reverse storage order. |

370 | MODE is the intended mode of the result, |

371 | useful if X is a CONST_INT. */ |

372 | |

373 | rtx |

374 | flip_storage_order (machine_mode mode, rtx x) |

375 | { |

376 | scalar_int_mode int_mode; |

377 | rtx result; |

378 | |

379 | if (mode == QImode) |

380 | return x; |

381 | |

382 | if (COMPLEX_MODE_P (mode)) |

383 | { |

384 | rtx real = read_complex_part (x, false); |

385 | rtx imag = read_complex_part (x, true); |

386 | |

387 | real = flip_storage_order (GET_MODE_INNER (mode), real); |

388 | imag = flip_storage_order (GET_MODE_INNER (mode), imag); |

389 | |

390 | return gen_rtx_CONCAT (mode, real, imag); |

391 | } |

392 | |

393 | if (__builtin_expect (reverse_storage_order_supported < 0, 0)) |

394 | check_reverse_storage_order_support (); |

395 | |

396 | if (!is_a <scalar_int_mode> (mode, &int_mode)) |

397 | { |

398 | if (FLOAT_MODE_P (mode) |

399 | && __builtin_expect (reverse_float_storage_order_supported < 0, 0)) |

400 | check_reverse_float_storage_order_support (); |

401 | |

402 | if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode)) |

403 | { |

404 | sorry ("reverse storage order for %smode", GET_MODE_NAME (mode)); |

405 | return x; |

406 | } |

407 | x = gen_lowpart (int_mode, x); |

408 | } |

409 | |

410 | result = simplify_unary_operation (BSWAP, int_mode, x, int_mode); |

411 | if (result == 0) |

412 | result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1); |

413 | |

414 | if (int_mode != mode) |

415 | result = gen_lowpart (mode, result); |

416 | |

417 | return result; |

418 | } |

419 | |

420 | /* If MODE is set, adjust bitfield memory MEM so that it points to the |

421 | first unit of mode MODE that contains a bitfield of size BITSIZE at |

422 | bit position BITNUM. If MODE is not set, return a BLKmode reference |

423 | to every byte in the bitfield. Set *NEW_BITNUM to the bit position |

424 | of the field within the new memory. */ |

425 | |

426 | static rtx |

427 | narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode, |

428 | unsigned HOST_WIDE_INT bitsize, |

429 | unsigned HOST_WIDE_INT bitnum, |

430 | unsigned HOST_WIDE_INT *new_bitnum) |

431 | { |

432 | scalar_int_mode imode; |

433 | if (mode.exists (&imode)) |

434 | { |

435 | unsigned int unit = GET_MODE_BITSIZE (imode); |

436 | *new_bitnum = bitnum % unit; |

437 | HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT; |

438 | return adjust_bitfield_address (mem, imode, offset); |

439 | } |

440 | else |

441 | { |

442 | *new_bitnum = bitnum % BITS_PER_UNIT; |

443 | HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT; |

444 | HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1) |

445 | / BITS_PER_UNIT); |

446 | return adjust_bitfield_address_size (mem, BLKmode, offset, size); |

447 | } |

448 | } |

449 | |

450 | /* The caller wants to perform insertion or extraction PATTERN on a |

451 | bitfield of size BITSIZE at BITNUM bits into memory operand OP0. |

452 | BITREGION_START and BITREGION_END are as for store_bit_field |

453 | and FIELDMODE is the natural mode of the field. |

454 | |

455 | Search for a mode that is compatible with the memory access |

456 | restrictions and (where applicable) with a register insertion or |

457 | extraction. Return the new memory on success, storing the adjusted |

458 | bit position in *NEW_BITNUM. Return null otherwise. */ |

459 | |

460 | static rtx |

461 | adjust_bit_field_mem_for_reg (enum extraction_pattern pattern, |

462 | rtx op0, HOST_WIDE_INT bitsize, |

463 | HOST_WIDE_INT bitnum, |

464 | unsigned HOST_WIDE_INT bitregion_start, |

465 | unsigned HOST_WIDE_INT bitregion_end, |

466 | machine_mode fieldmode, |

467 | unsigned HOST_WIDE_INT *new_bitnum) |

468 | { |

469 | bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start, |

470 | bitregion_end, MEM_ALIGN (op0), |

471 | MEM_VOLATILE_P (op0)); |

472 | scalar_int_mode best_mode; |

473 | if (iter.next_mode (&best_mode)) |

474 | { |

475 | /* We can use a memory in BEST_MODE. See whether this is true for |

476 | any wider modes. All other things being equal, we prefer to |

477 | use the widest mode possible because it tends to expose more |

478 | CSE opportunities. */ |

479 | if (!iter.prefer_smaller_modes ()) |

480 | { |

481 | /* Limit the search to the mode required by the corresponding |

482 | register insertion or extraction instruction, if any. */ |

483 | scalar_int_mode limit_mode = word_mode; |

484 | extraction_insn insn; |

485 | if (get_best_reg_extraction_insn (&insn, pattern, |

486 | GET_MODE_BITSIZE (best_mode), |

487 | fieldmode)) |

488 | limit_mode = insn.field_mode; |

489 | |

490 | scalar_int_mode wider_mode; |

491 | while (iter.next_mode (&wider_mode) |

492 | && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode)) |

493 | best_mode = wider_mode; |

494 | } |

495 | return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, |

496 | new_bitnum); |

497 | } |

498 | return NULL_RTX; |

499 | } |

500 | |

501 | /* Return true if a bitfield of size BITSIZE at bit number BITNUM within |

502 | a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg |

503 | offset is then BITNUM / BITS_PER_UNIT. */ |

504 | |

505 | static bool |

506 | lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum, |

507 | unsigned HOST_WIDE_INT bitsize, |

508 | machine_mode struct_mode) |

509 | { |

510 | unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode); |

511 | if (BYTES_BIG_ENDIAN) |

512 | return (bitnum % BITS_PER_UNIT == 0 |

513 | && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode) |

514 | || (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0)); |

515 | else |

516 | return bitnum % (regsize * BITS_PER_UNIT) == 0; |

517 | } |

518 | |

519 | /* Return true if -fstrict-volatile-bitfields applies to an access of OP0 |

520 | containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE. |

521 | Return false if the access would touch memory outside the range |

522 | BITREGION_START to BITREGION_END for conformance to the C++ memory |

523 | model. */ |

524 | |

525 | static bool |

526 | strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, |

527 | unsigned HOST_WIDE_INT bitnum, |

528 | scalar_int_mode fieldmode, |

529 | unsigned HOST_WIDE_INT bitregion_start, |

530 | unsigned HOST_WIDE_INT bitregion_end) |

531 | { |

532 | unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode); |

533 | |

534 | /* -fstrict-volatile-bitfields must be enabled and we must have a |

535 | volatile MEM. */ |

536 | if (!MEM_P (op0) |

537 | || !MEM_VOLATILE_P (op0) |

538 | || flag_strict_volatile_bitfields <= 0) |

539 | return false; |

540 | |

541 | /* The bit size must not be larger than the field mode, and |

542 | the field mode must not be larger than a word. */ |

543 | if (bitsize > modesize || modesize > BITS_PER_WORD) |

544 | return false; |

545 | |

546 | /* Check for cases of unaligned fields that must be split. */ |

547 | if (bitnum % modesize + bitsize > modesize) |

548 | return false; |

549 | |

550 | /* The memory must be sufficiently aligned for a MODESIZE access. |

551 | This condition guarantees, that the memory access will not |

552 | touch anything after the end of the structure. */ |

553 | if (MEM_ALIGN (op0) < modesize) |

554 | return false; |

555 | |

556 | /* Check for cases where the C++ memory model applies. */ |

557 | if (bitregion_end != 0 |

558 | && (bitnum - bitnum % modesize < bitregion_start |

559 | || bitnum - bitnum % modesize + modesize - 1 > bitregion_end)) |

560 | return false; |

561 | |

562 | return true; |

563 | } |

564 | |

565 | /* Return true if OP is a memory and if a bitfield of size BITSIZE at |

566 | bit number BITNUM can be treated as a simple value of mode MODE. */ |

567 | |

568 | static bool |

569 | simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize, |

570 | unsigned HOST_WIDE_INT bitnum, machine_mode mode) |

571 | { |

572 | return (MEM_P (op0) |

573 | && bitnum % BITS_PER_UNIT == 0 |

574 | && bitsize == GET_MODE_BITSIZE (mode) |

575 | && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0)) |

576 | || (bitnum % GET_MODE_ALIGNMENT (mode) == 0 |

577 | && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode)))); |

578 | } |

579 | |

580 | /* Try to use instruction INSV to store VALUE into a field of OP0. |

581 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a |

582 | BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM |

583 | are as for store_bit_field. */ |

584 | |

585 | static bool |

586 | store_bit_field_using_insv (const extraction_insn *insv, rtx op0, |

587 | opt_scalar_int_mode op0_mode, |

588 | unsigned HOST_WIDE_INT bitsize, |

589 | unsigned HOST_WIDE_INT bitnum, |

590 | rtx value, scalar_int_mode value_mode) |

591 | { |

592 | struct expand_operand ops[4]; |

593 | rtx value1; |

594 | rtx xop0 = op0; |

595 | rtx_insn *last = get_last_insn (); |

596 | bool copy_back = false; |

597 | |

598 | scalar_int_mode op_mode = insv->field_mode; |

599 | unsigned int unit = GET_MODE_BITSIZE (op_mode); |

600 | if (bitsize == 0 || bitsize > unit) |

601 | return false; |

602 | |

603 | if (MEM_P (xop0)) |

604 | /* Get a reference to the first byte of the field. */ |

605 | xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum, |

606 | &bitnum); |

607 | else |

608 | { |

609 | /* Convert from counting within OP0 to counting in OP_MODE. */ |

610 | if (BYTES_BIG_ENDIAN) |

611 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |

612 | |

613 | /* If xop0 is a register, we need it in OP_MODE |

614 | to make it acceptable to the format of insv. */ |

615 | if (GET_CODE (xop0) == SUBREG) |

616 | /* We can't just change the mode, because this might clobber op0, |

617 | and we will need the original value of op0 if insv fails. */ |

618 | xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); |

619 | if (REG_P (xop0) && GET_MODE (xop0) != op_mode) |

620 | xop0 = gen_lowpart_SUBREG (op_mode, xop0); |

621 | } |

622 | |

623 | /* If the destination is a paradoxical subreg such that we need a |

624 | truncate to the inner mode, perform the insertion on a temporary and |

625 | truncate the result to the original destination. Note that we can't |

626 | just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N |

627 | X) 0)) is (reg:N X). */ |

628 | if (GET_CODE (xop0) == SUBREG |

629 | && REG_P (SUBREG_REG (xop0)) |

630 | && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), |

631 | op_mode)) |

632 | { |

633 | rtx tem = gen_reg_rtx (op_mode); |

634 | emit_move_insn (tem, xop0); |

635 | xop0 = tem; |

636 | copy_back = true; |

637 | } |

638 | |

639 | /* There are similar overflow check at the start of store_bit_field_1, |

640 | but that only check the situation where the field lies completely |

641 | outside the register, while there do have situation where the field |

642 | lies partialy in the register, we need to adjust bitsize for this |

643 | partial overflow situation. Without this fix, pr48335-2.c on big-endian |

644 | will broken on those arch support bit insert instruction, like arm, aarch64 |

645 | etc. */ |

646 | if (bitsize + bitnum > unit && bitnum < unit) |

647 | { |

648 | warning (OPT_Wextra, "write of %wu-bit data outside the bound of " |

649 | "destination object, data truncated into %wu-bit", |

650 | bitsize, unit - bitnum); |

651 | bitsize = unit - bitnum; |

652 | } |

653 | |

654 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |

655 | "backwards" from the size of the unit we are inserting into. |

656 | Otherwise, we count bits from the most significant on a |

657 | BYTES/BITS_BIG_ENDIAN machine. */ |

658 | |

659 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |

660 | bitnum = unit - bitsize - bitnum; |

661 | |

662 | /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |

663 | value1 = value; |

664 | if (value_mode != op_mode) |

665 | { |

666 | if (GET_MODE_BITSIZE (value_mode) >= bitsize) |

667 | { |

668 | rtx tmp; |

669 | /* Optimization: Don't bother really extending VALUE |

670 | if it has all the bits we will actually use. However, |

671 | if we must narrow it, be sure we do it correctly. */ |

672 | |

673 | if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode)) |

674 | { |

675 | tmp = simplify_subreg (op_mode, value1, value_mode, 0); |

676 | if (! tmp) |

677 | tmp = simplify_gen_subreg (op_mode, |

678 | force_reg (value_mode, value1), |

679 | value_mode, 0); |

680 | } |

681 | else |

682 | { |

683 | tmp = gen_lowpart_if_possible (op_mode, value1); |

684 | if (! tmp) |

685 | tmp = gen_lowpart (op_mode, force_reg (value_mode, value1)); |

686 | } |

687 | value1 = tmp; |

688 | } |

689 | else if (CONST_INT_P (value)) |

690 | value1 = gen_int_mode (INTVAL (value), op_mode); |

691 | else |

692 | /* Parse phase is supposed to make VALUE's data type |

693 | match that of the component reference, which is a type |

694 | at least as wide as the field; so VALUE should have |

695 | a mode that corresponds to that type. */ |

696 | gcc_assert (CONSTANT_P (value)); |

697 | } |

698 | |

699 | create_fixed_operand (&ops[0], xop0); |

700 | create_integer_operand (&ops[1], bitsize); |

701 | create_integer_operand (&ops[2], bitnum); |

702 | create_input_operand (&ops[3], value1, op_mode); |

703 | if (maybe_expand_insn (insv->icode, 4, ops)) |

704 | { |

705 | if (copy_back) |

706 | convert_move (op0, xop0, true); |

707 | return true; |

708 | } |

709 | delete_insns_since (last); |

710 | return false; |

711 | } |

712 | |

713 | /* A subroutine of store_bit_field, with the same arguments. Return true |

714 | if the operation could be implemented. |

715 | |

716 | If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |

717 | no other way of implementing the operation. If FALLBACK_P is false, |

718 | return false instead. */ |

719 | |

720 | static bool |

721 | store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |

722 | unsigned HOST_WIDE_INT bitnum, |

723 | unsigned HOST_WIDE_INT bitregion_start, |

724 | unsigned HOST_WIDE_INT bitregion_end, |

725 | machine_mode fieldmode, |

726 | rtx value, bool reverse, bool fallback_p) |

727 | { |

728 | rtx op0 = str_rtx; |

729 | rtx orig_value; |

730 | |

731 | while (GET_CODE (op0) == SUBREG) |

732 | { |

733 | bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT; |

734 | op0 = SUBREG_REG (op0); |

735 | } |

736 | |

737 | /* No action is needed if the target is a register and if the field |

738 | lies completely outside that register. This can occur if the source |

739 | code contains an out-of-bounds access to a small array. */ |

740 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) |

741 | return true; |

742 | |

743 | /* Use vec_set patterns for inserting parts of vectors whenever |

744 | available. */ |

745 | machine_mode outermode = GET_MODE (op0); |

746 | scalar_mode innermode = GET_MODE_INNER (outermode); |

747 | if (VECTOR_MODE_P (outermode) |

748 | && !MEM_P (op0) |

749 | && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing |

750 | && fieldmode == innermode |

751 | && bitsize == GET_MODE_BITSIZE (innermode) |

752 | && !(bitnum % GET_MODE_BITSIZE (innermode))) |

753 | { |

754 | struct expand_operand ops[3]; |

755 | enum insn_code icode = optab_handler (vec_set_optab, outermode); |

756 | int pos = bitnum / GET_MODE_BITSIZE (innermode); |

757 | |

758 | create_fixed_operand (&ops[0], op0); |

759 | create_input_operand (&ops[1], value, innermode); |

760 | create_integer_operand (&ops[2], pos); |

761 | if (maybe_expand_insn (icode, 3, ops)) |

762 | return true; |

763 | } |

764 | |

765 | /* If the target is a register, overwriting the entire object, or storing |

766 | a full-word or multi-word field can be done with just a SUBREG. */ |

767 | if (!MEM_P (op0) |

768 | && bitsize == GET_MODE_BITSIZE (fieldmode) |

769 | && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0) |

770 | || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0))) |

771 | { |

772 | /* Use the subreg machinery either to narrow OP0 to the required |

773 | words or to cope with mode punning between equal-sized modes. |

774 | In the latter case, use subreg on the rhs side, not lhs. */ |

775 | rtx sub; |

776 | |

777 | if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |

778 | { |

779 | sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0); |

780 | if (sub) |

781 | { |

782 | if (reverse) |

783 | sub = flip_storage_order (GET_MODE (op0), sub); |

784 | emit_move_insn (op0, sub); |

785 | return true; |

786 | } |

787 | } |

788 | else |

789 | { |

790 | sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |

791 | bitnum / BITS_PER_UNIT); |

792 | if (sub) |

793 | { |

794 | if (reverse) |

795 | value = flip_storage_order (fieldmode, value); |

796 | emit_move_insn (sub, value); |

797 | return true; |

798 | } |

799 | } |

800 | } |

801 | |

802 | /* If the target is memory, storing any naturally aligned field can be |

803 | done with a simple store. For targets that support fast unaligned |

804 | memory, any naturally sized, unit aligned field can be done directly. */ |

805 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode)) |

806 | { |

807 | op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT); |

808 | if (reverse) |

809 | value = flip_storage_order (fieldmode, value); |

810 | emit_move_insn (op0, value); |

811 | return true; |

812 | } |

813 | |

814 | /* Make sure we are playing with integral modes. Pun with subregs |

815 | if we aren't. This must come after the entire register case above, |

816 | since that case is valid for any mode. The following cases are only |

817 | valid for integral modes. */ |

818 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |

819 | scalar_int_mode imode; |

820 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |

821 | { |

822 | if (MEM_P (op0)) |

823 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |

824 | 0, MEM_SIZE (op0)); |

825 | else |

826 | op0 = gen_lowpart (op0_mode.require (), op0); |

827 | } |

828 | |

829 | /* Storing an lsb-aligned field in a register |

830 | can be done with a movstrict instruction. */ |

831 | |

832 | if (!MEM_P (op0) |

833 | && !reverse |

834 | && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0)) |

835 | && bitsize == GET_MODE_BITSIZE (fieldmode) |

836 | && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing) |

837 | { |

838 | struct expand_operand ops[2]; |

839 | enum insn_code icode = optab_handler (movstrict_optab, fieldmode); |

840 | rtx arg0 = op0; |

841 | unsigned HOST_WIDE_INT subreg_off; |

842 | |

843 | if (GET_CODE (arg0) == SUBREG) |

844 | { |

845 | /* Else we've got some float mode source being extracted into |

846 | a different float mode destination -- this combination of |

847 | subregs results in Severe Tire Damage. */ |

848 | gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode |

849 | || GET_MODE_CLASS (fieldmode) == MODE_INT |

850 | || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); |

851 | arg0 = SUBREG_REG (arg0); |

852 | } |

853 | |

854 | subreg_off = bitnum / BITS_PER_UNIT; |

855 | if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off)) |

856 | { |

857 | arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off); |

858 | |

859 | create_fixed_operand (&ops[0], arg0); |

860 | /* Shrink the source operand to FIELDMODE. */ |

861 | create_convert_operand_to (&ops[1], value, fieldmode, false); |

862 | if (maybe_expand_insn (icode, 2, ops)) |

863 | return true; |

864 | } |

865 | } |

866 | |

867 | /* Handle fields bigger than a word. */ |

868 | |

869 | if (bitsize > BITS_PER_WORD) |

870 | { |

871 | /* Here we transfer the words of the field |

872 | in the order least significant first. |

873 | This is because the most significant word is the one which may |

874 | be less than full. |

875 | However, only do that if the value is not BLKmode. */ |

876 | |

877 | const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |

878 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |

879 | unsigned int i; |

880 | rtx_insn *last; |

881 | |

882 | /* This is the mode we must force value to, so that there will be enough |

883 | subwords to extract. Note that fieldmode will often (always?) be |

884 | VOIDmode, because that is what store_field uses to indicate that this |

885 | is a bit field, but passing VOIDmode to operand_subword_force |

886 | is not allowed. */ |

887 | fieldmode = GET_MODE (value); |

888 | if (fieldmode == VOIDmode) |

889 | fieldmode = smallest_int_mode_for_size (nwords * BITS_PER_WORD); |

890 | |

891 | last = get_last_insn (); |

892 | for (i = 0; i < nwords; i++) |

893 | { |

894 | /* If I is 0, use the low-order word in both field and target; |

895 | if I is 1, use the next to lowest word; and so on. */ |

896 | unsigned int wordnum = (backwards |

897 | ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD |

898 | - i - 1 |

899 | : i); |

900 | unsigned int bit_offset = (backwards ^ reverse |

901 | ? MAX ((int) bitsize - ((int) i + 1) |

902 | * BITS_PER_WORD, |

903 | 0) |

904 | : (int) i * BITS_PER_WORD); |

905 | rtx value_word = operand_subword_force (value, wordnum, fieldmode); |

906 | unsigned HOST_WIDE_INT new_bitsize = |

907 | MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD); |

908 | |

909 | /* If the remaining chunk doesn't have full wordsize we have |

910 | to make sure that for big-endian machines the higher order |

911 | bits are used. */ |

912 | if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards) |

913 | value_word = simplify_expand_binop (word_mode, lshr_optab, |

914 | value_word, |

915 | GEN_INT (BITS_PER_WORD |

916 | - new_bitsize), |

917 | NULL_RTX, true, |

918 | OPTAB_LIB_WIDEN); |

919 | |

920 | if (!store_bit_field_1 (op0, new_bitsize, |

921 | bitnum + bit_offset, |

922 | bitregion_start, bitregion_end, |

923 | word_mode, |

924 | value_word, reverse, fallback_p)) |

925 | { |

926 | delete_insns_since (last); |

927 | return false; |

928 | } |

929 | } |

930 | return true; |

931 | } |

932 | |

933 | /* If VALUE has a floating-point or complex mode, access it as an |

934 | integer of the corresponding size. This can occur on a machine |

935 | with 64 bit registers that uses SFmode for float. It can also |

936 | occur for unaligned float or complex fields. */ |

937 | orig_value = value; |

938 | scalar_int_mode value_mode; |

939 | if (GET_MODE (value) == VOIDmode) |

940 | /* By this point we've dealt with values that are bigger than a word, |

941 | so word_mode is a conservatively correct choice. */ |

942 | value_mode = word_mode; |

943 | else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode)) |

944 | { |

945 | value_mode = int_mode_for_mode (GET_MODE (value)).require (); |

946 | value = gen_reg_rtx (value_mode); |

947 | emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |

948 | } |

949 | |

950 | /* If OP0 is a multi-word register, narrow it to the affected word. |

951 | If the region spans two words, defer to store_split_bit_field. |

952 | Don't do this if op0 is a single hard register wider than word |

953 | such as a float or vector register. */ |

954 | if (!MEM_P (op0) |

955 | && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD |

956 | && (!REG_P (op0) |

957 | || !HARD_REGISTER_P (op0) |

958 | || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1)) |

959 | { |

960 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |

961 | { |

962 | if (!fallback_p) |

963 | return false; |

964 | |

965 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |

966 | bitregion_start, bitregion_end, |

967 | value, value_mode, reverse); |

968 | return true; |

969 | } |

970 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |

971 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |

972 | gcc_assert (op0); |

973 | op0_mode = word_mode; |

974 | bitnum %= BITS_PER_WORD; |

975 | } |

976 | |

977 | /* From here on we can assume that the field to be stored in fits |

978 | within a word. If the destination is a register, it too fits |

979 | in a word. */ |

980 | |

981 | extraction_insn insv; |

982 | if (!MEM_P (op0) |

983 | && !reverse |

984 | && get_best_reg_extraction_insn (&insv, EP_insv, |

985 | GET_MODE_BITSIZE (op0_mode.require ()), |

986 | fieldmode) |

987 | && store_bit_field_using_insv (&insv, op0, op0_mode, |

988 | bitsize, bitnum, value, value_mode)) |

989 | return true; |

990 | |

991 | /* If OP0 is a memory, try copying it to a register and seeing if a |

992 | cheap register alternative is available. */ |

993 | if (MEM_P (op0) && !reverse) |

994 | { |

995 | if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum, |

996 | fieldmode) |

997 | && store_bit_field_using_insv (&insv, op0, op0_mode, |

998 | bitsize, bitnum, value, value_mode)) |

999 | return true; |

1000 | |

1001 | rtx_insn *last = get_last_insn (); |

1002 | |

1003 | /* Try loading part of OP0 into a register, inserting the bitfield |

1004 | into that, and then copying the result back to OP0. */ |

1005 | unsigned HOST_WIDE_INT bitpos; |

1006 | rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum, |

1007 | bitregion_start, bitregion_end, |

1008 | fieldmode, &bitpos); |

1009 | if (xop0) |

1010 | { |

1011 | rtx tempreg = copy_to_reg (xop0); |

1012 | if (store_bit_field_1 (tempreg, bitsize, bitpos, |

1013 | bitregion_start, bitregion_end, |

1014 | fieldmode, orig_value, reverse, false)) |

1015 | { |

1016 | emit_move_insn (xop0, tempreg); |

1017 | return true; |

1018 | } |

1019 | delete_insns_since (last); |

1020 | } |

1021 | } |

1022 | |

1023 | if (!fallback_p) |

1024 | return false; |

1025 | |

1026 | store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start, |

1027 | bitregion_end, value, value_mode, reverse); |

1028 | return true; |

1029 | } |

1030 | |

1031 | /* Generate code to store value from rtx VALUE |

1032 | into a bit-field within structure STR_RTX |

1033 | containing BITSIZE bits starting at bit BITNUM. |

1034 | |

1035 | BITREGION_START is bitpos of the first bitfield in this region. |

1036 | BITREGION_END is the bitpos of the ending bitfield in this region. |

1037 | These two fields are 0, if the C++ memory model does not apply, |

1038 | or we are not interested in keeping track of bitfield regions. |

1039 | |

1040 | FIELDMODE is the machine-mode of the FIELD_DECL node for this field. |

1041 | |

1042 | If REVERSE is true, the store is to be done in reverse order. */ |

1043 | |

1044 | void |

1045 | store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |

1046 | unsigned HOST_WIDE_INT bitnum, |

1047 | unsigned HOST_WIDE_INT bitregion_start, |

1048 | unsigned HOST_WIDE_INT bitregion_end, |

1049 | machine_mode fieldmode, |

1050 | rtx value, bool reverse) |

1051 | { |

1052 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |

1053 | scalar_int_mode int_mode; |

1054 | if (is_a <scalar_int_mode> (fieldmode, &int_mode) |

1055 | && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, |

1056 | bitregion_start, bitregion_end)) |

1057 | { |

1058 | /* Storing of a full word can be done with a simple store. |

1059 | We know here that the field can be accessed with one single |

1060 | instruction. For targets that support unaligned memory, |

1061 | an unaligned access may be necessary. */ |

1062 | if (bitsize == GET_MODE_BITSIZE (int_mode)) |

1063 | { |

1064 | str_rtx = adjust_bitfield_address (str_rtx, int_mode, |

1065 | bitnum / BITS_PER_UNIT); |

1066 | if (reverse) |

1067 | value = flip_storage_order (int_mode, value); |

1068 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |

1069 | emit_move_insn (str_rtx, value); |

1070 | } |

1071 | else |

1072 | { |

1073 | rtx temp; |

1074 | |

1075 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum, |

1076 | &bitnum); |

1077 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode)); |

1078 | temp = copy_to_reg (str_rtx); |

1079 | if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0, |

1080 | int_mode, value, reverse, true)) |

1081 | gcc_unreachable (); |

1082 | |

1083 | emit_move_insn (str_rtx, temp); |

1084 | } |

1085 | |

1086 | return; |

1087 | } |

1088 | |

1089 | /* Under the C++0x memory model, we must not touch bits outside the |

1090 | bit region. Adjust the address to start at the beginning of the |

1091 | bit region. */ |

1092 | if (MEM_P (str_rtx) && bitregion_start > 0) |

1093 | { |

1094 | scalar_int_mode best_mode; |

1095 | machine_mode addr_mode = VOIDmode; |

1096 | HOST_WIDE_INT offset, size; |

1097 | |

1098 | gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0); |

1099 | |

1100 | offset = bitregion_start / BITS_PER_UNIT; |

1101 | bitnum -= bitregion_start; |

1102 | size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT; |

1103 | bitregion_end -= bitregion_start; |

1104 | bitregion_start = 0; |

1105 | if (get_best_mode (bitsize, bitnum, |

1106 | bitregion_start, bitregion_end, |

1107 | MEM_ALIGN (str_rtx), INT_MAX, |

1108 | MEM_VOLATILE_P (str_rtx), &best_mode)) |

1109 | addr_mode = best_mode; |

1110 | str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode, |

1111 | offset, size); |

1112 | } |

1113 | |

1114 | if (!store_bit_field_1 (str_rtx, bitsize, bitnum, |

1115 | bitregion_start, bitregion_end, |

1116 | fieldmode, value, reverse, true)) |

1117 | gcc_unreachable (); |

1118 | } |

1119 | |

1120 | /* Use shifts and boolean operations to store VALUE into a bit field of |

1121 | width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined, |

1122 | it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is |

1123 | the mode of VALUE. |

1124 | |

1125 | If REVERSE is true, the store is to be done in reverse order. */ |

1126 | |

1127 | static void |

1128 | store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |

1129 | unsigned HOST_WIDE_INT bitsize, |

1130 | unsigned HOST_WIDE_INT bitnum, |

1131 | unsigned HOST_WIDE_INT bitregion_start, |

1132 | unsigned HOST_WIDE_INT bitregion_end, |

1133 | rtx value, scalar_int_mode value_mode, bool reverse) |

1134 | { |

1135 | /* There is a case not handled here: |

1136 | a structure with a known alignment of just a halfword |

1137 | and a field split across two aligned halfwords within the structure. |

1138 | Or likewise a structure with a known alignment of just a byte |

1139 | and a field split across two bytes. |

1140 | Such cases are not supposed to be able to occur. */ |

1141 | |

1142 | scalar_int_mode best_mode; |

1143 | if (MEM_P (op0)) |

1144 | { |

1145 | unsigned int max_bitsize = BITS_PER_WORD; |

1146 | scalar_int_mode imode; |

1147 | if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize) |

1148 | max_bitsize = GET_MODE_BITSIZE (imode); |

1149 | |

1150 | if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end, |

1151 | MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0), |

1152 | &best_mode)) |

1153 | { |

1154 | /* The only way this should occur is if the field spans word |

1155 | boundaries. */ |

1156 | store_split_bit_field (op0, op0_mode, bitsize, bitnum, |

1157 | bitregion_start, bitregion_end, |

1158 | value, value_mode, reverse); |

1159 | return; |

1160 | } |

1161 | |

1162 | op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum); |

1163 | } |

1164 | else |

1165 | best_mode = op0_mode.require (); |

1166 | |

1167 | store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum, |

1168 | value, value_mode, reverse); |

1169 | } |

1170 | |

1171 | /* Helper function for store_fixed_bit_field, stores |

1172 | the bit field always using MODE, which is the mode of OP0. The other |

1173 | arguments are as for store_fixed_bit_field. */ |

1174 | |

1175 | static void |

1176 | store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode, |

1177 | unsigned HOST_WIDE_INT bitsize, |

1178 | unsigned HOST_WIDE_INT bitnum, |

1179 | rtx value, scalar_int_mode value_mode, bool reverse) |

1180 | { |

1181 | rtx temp; |

1182 | int all_zero = 0; |

1183 | int all_one = 0; |

1184 | |

1185 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |

1186 | for invalid input, such as f5 from gcc.dg/pr48335-2.c. */ |

1187 | |

1188 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |

1189 | /* BITNUM is the distance between our msb |

1190 | and that of the containing datum. |

1191 | Convert it to the distance from the lsb. */ |

1192 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |

1193 | |

1194 | /* Now BITNUM is always the distance between our lsb |

1195 | and that of OP0. */ |

1196 | |

1197 | /* Shift VALUE left by BITNUM bits. If VALUE is not constant, |

1198 | we must first convert its mode to MODE. */ |

1199 | |

1200 | if (CONST_INT_P (value)) |

1201 | { |

1202 | unsigned HOST_WIDE_INT v = UINTVAL (value); |

1203 | |

1204 | if (bitsize < HOST_BITS_PER_WIDE_INT) |

1205 | v &= (HOST_WIDE_INT_1U << bitsize) - 1; |

1206 | |

1207 | if (v == 0) |

1208 | all_zero = 1; |

1209 | else if ((bitsize < HOST_BITS_PER_WIDE_INT |

1210 | && v == (HOST_WIDE_INT_1U << bitsize) - 1) |

1211 | || (bitsize == HOST_BITS_PER_WIDE_INT |

1212 | && v == HOST_WIDE_INT_M1U)) |

1213 | all_one = 1; |

1214 | |

1215 | value = lshift_value (mode, v, bitnum); |

1216 | } |

1217 | else |

1218 | { |

1219 | int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize |

1220 | && bitnum + bitsize != GET_MODE_BITSIZE (mode)); |

1221 | |

1222 | if (value_mode != mode) |

1223 | value = convert_to_mode (mode, value, 1); |

1224 | |

1225 | if (must_and) |

1226 | value = expand_binop (mode, and_optab, value, |

1227 | mask_rtx (mode, 0, bitsize, 0), |

1228 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |

1229 | if (bitnum > 0) |

1230 | value = expand_shift (LSHIFT_EXPR, mode, value, |

1231 | bitnum, NULL_RTX, 1); |

1232 | } |

1233 | |

1234 | if (reverse) |

1235 | value = flip_storage_order (mode, value); |

1236 | |

1237 | /* Now clear the chosen bits in OP0, |

1238 | except that if VALUE is -1 we need not bother. */ |

1239 | /* We keep the intermediates in registers to allow CSE to combine |

1240 | consecutive bitfield assignments. */ |

1241 | |

1242 | temp = force_reg (mode, op0); |

1243 | |

1244 | if (! all_one) |

1245 | { |

1246 | rtx mask = mask_rtx (mode, bitnum, bitsize, 1); |

1247 | if (reverse) |

1248 | mask = flip_storage_order (mode, mask); |

1249 | temp = expand_binop (mode, and_optab, temp, mask, |

1250 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |

1251 | temp = force_reg (mode, temp); |

1252 | } |

1253 | |

1254 | /* Now logical-or VALUE into OP0, unless it is zero. */ |

1255 | |

1256 | if (! all_zero) |

1257 | { |

1258 | temp = expand_binop (mode, ior_optab, temp, value, |

1259 | NULL_RTX, 1, OPTAB_LIB_WIDEN); |

1260 | temp = force_reg (mode, temp); |

1261 | } |

1262 | |

1263 | if (op0 != temp) |

1264 | { |

1265 | op0 = copy_rtx (op0); |

1266 | emit_move_insn (op0, temp); |

1267 | } |

1268 | } |

1269 | |

1270 | /* Store a bit field that is split across multiple accessible memory objects. |

1271 | |

1272 | OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |

1273 | BITSIZE is the field width; BITPOS the position of its first bit |

1274 | (within the word). |

1275 | VALUE is the value to store, which has mode VALUE_MODE. |

1276 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |

1277 | a BLKmode MEM. |

1278 | |

1279 | If REVERSE is true, the store is to be done in reverse order. |

1280 | |

1281 | This does not yet handle fields wider than BITS_PER_WORD. */ |

1282 | |

1283 | static void |

1284 | store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |

1285 | unsigned HOST_WIDE_INT bitsize, |

1286 | unsigned HOST_WIDE_INT bitpos, |

1287 | unsigned HOST_WIDE_INT bitregion_start, |

1288 | unsigned HOST_WIDE_INT bitregion_end, |

1289 | rtx value, scalar_int_mode value_mode, bool reverse) |

1290 | { |

1291 | unsigned int unit, total_bits, bitsdone = 0; |

1292 | |

1293 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |

1294 | much at a time. */ |

1295 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |

1296 | unit = BITS_PER_WORD; |

1297 | else |

1298 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |

1299 | |

1300 | /* If OP0 is a memory with a mode, then UNIT must not be larger than |

1301 | OP0's mode as well. Otherwise, store_fixed_bit_field will call us |

1302 | again, and we will mutually recurse forever. */ |

1303 | if (MEM_P (op0) && op0_mode.exists ()) |

1304 | unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ())); |

1305 | |

1306 | /* If VALUE is a constant other than a CONST_INT, get it into a register in |

1307 | WORD_MODE. If we can do this using gen_lowpart_common, do so. Note |

1308 | that VALUE might be a floating-point constant. */ |

1309 | if (CONSTANT_P (value) && !CONST_INT_P (value)) |

1310 | { |

1311 | rtx word = gen_lowpart_common (word_mode, value); |

1312 | |

1313 | if (word && (value != word)) |

1314 | value = word; |

1315 | else |

1316 | value = gen_lowpart_common (word_mode, force_reg (value_mode, value)); |

1317 | value_mode = word_mode; |

1318 | } |

1319 | |

1320 | total_bits = GET_MODE_BITSIZE (value_mode); |

1321 | |

1322 | while (bitsdone < bitsize) |

1323 | { |

1324 | unsigned HOST_WIDE_INT thissize; |

1325 | unsigned HOST_WIDE_INT thispos; |

1326 | unsigned HOST_WIDE_INT offset; |

1327 | rtx part; |

1328 | |

1329 | offset = (bitpos + bitsdone) / unit; |

1330 | thispos = (bitpos + bitsdone) % unit; |

1331 | |

1332 | /* When region of bytes we can touch is restricted, decrease |

1333 | UNIT close to the end of the region as needed. If op0 is a REG |

1334 | or SUBREG of REG, don't do this, as there can't be data races |

1335 | on a register and we can expand shorter code in some cases. */ |

1336 | if (bitregion_end |

1337 | && unit > BITS_PER_UNIT |

1338 | && bitpos + bitsdone - thispos + unit > bitregion_end + 1 |

1339 | && !REG_P (op0) |

1340 | && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0)))) |

1341 | { |

1342 | unit = unit / 2; |

1343 | continue; |

1344 | } |

1345 | |

1346 | /* THISSIZE must not overrun a word boundary. Otherwise, |

1347 | store_fixed_bit_field will call us again, and we will mutually |

1348 | recurse forever. */ |

1349 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |

1350 | thissize = MIN (thissize, unit - thispos); |

1351 | |

1352 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |

1353 | { |

1354 | /* Fetch successively less significant portions. */ |

1355 | if (CONST_INT_P (value)) |

1356 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |

1357 | >> (bitsize - bitsdone - thissize)) |

1358 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |

1359 | /* Likewise, but the source is little-endian. */ |

1360 | else if (reverse) |

1361 | part = extract_fixed_bit_field (word_mode, value, value_mode, |

1362 | thissize, |

1363 | bitsize - bitsdone - thissize, |

1364 | NULL_RTX, 1, false); |

1365 | else |

1366 | /* The args are chosen so that the last part includes the |

1367 | lsb. Give extract_bit_field the value it needs (with |

1368 | endianness compensation) to fetch the piece we want. */ |

1369 | part = extract_fixed_bit_field (word_mode, value, value_mode, |

1370 | thissize, |

1371 | total_bits - bitsize + bitsdone, |

1372 | NULL_RTX, 1, false); |

1373 | } |

1374 | else |

1375 | { |

1376 | /* Fetch successively more significant portions. */ |

1377 | if (CONST_INT_P (value)) |

1378 | part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |

1379 | >> bitsdone) |

1380 | & ((HOST_WIDE_INT_1 << thissize) - 1)); |

1381 | /* Likewise, but the source is big-endian. */ |

1382 | else if (reverse) |

1383 | part = extract_fixed_bit_field (word_mode, value, value_mode, |

1384 | thissize, |

1385 | total_bits - bitsdone - thissize, |

1386 | NULL_RTX, 1, false); |

1387 | else |

1388 | part = extract_fixed_bit_field (word_mode, value, value_mode, |

1389 | thissize, bitsdone, NULL_RTX, |

1390 | 1, false); |

1391 | } |

1392 | |

1393 | /* If OP0 is a register, then handle OFFSET here. */ |

1394 | rtx op0_piece = op0; |

1395 | opt_scalar_int_mode op0_piece_mode = op0_mode; |

1396 | if (SUBREG_P (op0) || REG_P (op0)) |

1397 | { |

1398 | scalar_int_mode imode; |

1399 | if (op0_mode.exists (&imode) |

1400 | && GET_MODE_SIZE (imode) < UNITS_PER_WORD) |

1401 | { |

1402 | if (offset) |

1403 | op0_piece = const0_rtx; |

1404 | } |

1405 | else |

1406 | { |

1407 | op0_piece = operand_subword_force (op0, |

1408 | offset * unit / BITS_PER_WORD, |

1409 | GET_MODE (op0)); |

1410 | op0_piece_mode = word_mode; |

1411 | } |

1412 | offset &= BITS_PER_WORD / unit - 1; |

1413 | } |

1414 | |

1415 | /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx, |

1416 | it is just an out-of-bounds access. Ignore it. */ |

1417 | if (op0_piece != const0_rtx) |

1418 | store_fixed_bit_field (op0_piece, op0_piece_mode, thissize, |

1419 | offset * unit + thispos, bitregion_start, |

1420 | bitregion_end, part, word_mode, reverse); |

1421 | bitsdone += thissize; |

1422 | } |

1423 | } |

1424 | |

1425 | /* A subroutine of extract_bit_field_1 that converts return value X |

1426 | to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments |

1427 | to extract_bit_field. */ |

1428 | |

1429 | static rtx |

1430 | convert_extracted_bit_field (rtx x, machine_mode mode, |

1431 | machine_mode tmode, bool unsignedp) |

1432 | { |

1433 | if (GET_MODE (x) == tmode || GET_MODE (x) == mode) |

1434 | return x; |

1435 | |

1436 | /* If the x mode is not a scalar integral, first convert to the |

1437 | integer mode of that size and then access it as a floating-point |

1438 | value via a SUBREG. */ |

1439 | if (!SCALAR_INT_MODE_P (tmode)) |

1440 | { |

1441 | scalar_int_mode int_mode = int_mode_for_mode (tmode).require (); |

1442 | x = convert_to_mode (int_mode, x, unsignedp); |

1443 | x = force_reg (int_mode, x); |

1444 | return gen_lowpart (tmode, x); |

1445 | } |

1446 | |

1447 | return convert_to_mode (tmode, x, unsignedp); |

1448 | } |

1449 | |

1450 | /* Try to use an ext(z)v pattern to extract a field from OP0. |

1451 | Return the extracted value on success, otherwise return null. |

1452 | EXTV describes the extraction instruction to use. If OP0_MODE |

1453 | is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM. |

1454 | The other arguments are as for extract_bit_field. */ |

1455 | |

1456 | static rtx |

1457 | extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, |

1458 | opt_scalar_int_mode op0_mode, |

1459 | unsigned HOST_WIDE_INT bitsize, |

1460 | unsigned HOST_WIDE_INT bitnum, |

1461 | int unsignedp, rtx target, |

1462 | machine_mode mode, machine_mode tmode) |

1463 | { |

1464 | struct expand_operand ops[4]; |

1465 | rtx spec_target = target; |

1466 | rtx spec_target_subreg = 0; |

1467 | scalar_int_mode ext_mode = extv->field_mode; |

1468 | unsigned unit = GET_MODE_BITSIZE (ext_mode); |

1469 | |

1470 | if (bitsize == 0 || unit < bitsize) |

1471 | return NULL_RTX; |

1472 | |

1473 | if (MEM_P (op0)) |

1474 | /* Get a reference to the first byte of the field. */ |

1475 | op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum, |

1476 | &bitnum); |

1477 | else |

1478 | { |

1479 | /* Convert from counting within OP0 to counting in EXT_MODE. */ |

1480 | if (BYTES_BIG_ENDIAN) |

1481 | bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ()); |

1482 | |

1483 | /* If op0 is a register, we need it in EXT_MODE to make it |

1484 | acceptable to the format of ext(z)v. */ |

1485 | if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode) |

1486 | return NULL_RTX; |

1487 | if (REG_P (op0) && op0_mode.require () != ext_mode) |

1488 | op0 = gen_lowpart_SUBREG (ext_mode, op0); |

1489 | } |

1490 | |

1491 | /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count |

1492 | "backwards" from the size of the unit we are extracting from. |

1493 | Otherwise, we count bits from the most significant on a |

1494 | BYTES/BITS_BIG_ENDIAN machine. */ |

1495 | |

1496 | if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |

1497 | bitnum = unit - bitsize - bitnum; |

1498 | |

1499 | if (target == 0) |

1500 | target = spec_target = gen_reg_rtx (tmode); |

1501 | |

1502 | if (GET_MODE (target) != ext_mode) |

1503 | { |

1504 | /* Don't use LHS paradoxical subreg if explicit truncation is needed |

1505 | between the mode of the extraction (word_mode) and the target |

1506 | mode. Instead, create a temporary and use convert_move to set |

1507 | the target. */ |

1508 | if (REG_P (target) |

1509 | && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode)) |

1510 | { |

1511 | target = gen_lowpart (ext_mode, target); |

1512 | if (partial_subreg_p (GET_MODE (spec_target), ext_mode)) |

1513 | spec_target_subreg = target; |

1514 | } |

1515 | else |

1516 | target = gen_reg_rtx (ext_mode); |

1517 | } |

1518 | |

1519 | create_output_operand (&ops[0], target, ext_mode); |

1520 | create_fixed_operand (&ops[1], op0); |

1521 | create_integer_operand (&ops[2], bitsize); |

1522 | create_integer_operand (&ops[3], bitnum); |

1523 | if (maybe_expand_insn (extv->icode, 4, ops)) |

1524 | { |

1525 | target = ops[0].value; |

1526 | if (target == spec_target) |

1527 | return target; |

1528 | if (target == spec_target_subreg) |

1529 | return spec_target; |

1530 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |

1531 | } |

1532 | return NULL_RTX; |

1533 | } |

1534 | |

1535 | /* A subroutine of extract_bit_field, with the same arguments. |

1536 | If FALLBACK_P is true, fall back to extract_fixed_bit_field |

1537 | if we can find no other means of implementing the operation. |

1538 | if FALLBACK_P is false, return NULL instead. */ |

1539 | |

1540 | static rtx |

1541 | extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |

1542 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |

1543 | machine_mode mode, machine_mode tmode, |

1544 | bool reverse, bool fallback_p, rtx *alt_rtl) |

1545 | { |

1546 | rtx op0 = str_rtx; |

1547 | machine_mode mode1; |

1548 | |

1549 | if (tmode == VOIDmode) |

1550 | tmode = mode; |

1551 | |

1552 | while (GET_CODE (op0) == SUBREG) |

1553 | { |

1554 | bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |

1555 | op0 = SUBREG_REG (op0); |

1556 | } |

1557 | |

1558 | /* If we have an out-of-bounds access to a register, just return an |

1559 | uninitialized register of the required mode. This can occur if the |

1560 | source code contains an out-of-bounds access to a small array. */ |

1561 | if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) |

1562 | return gen_reg_rtx (tmode); |

1563 | |

1564 | if (REG_P (op0) |

1565 | && mode == GET_MODE (op0) |

1566 | && bitnum == 0 |

1567 | && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |

1568 | { |

1569 | if (reverse) |

1570 | op0 = flip_storage_order (mode, op0); |

1571 | /* We're trying to extract a full register from itself. */ |

1572 | return op0; |

1573 | } |

1574 | |

1575 | /* First try to check for vector from vector extractions. */ |

1576 | if (VECTOR_MODE_P (GET_MODE (op0)) |

1577 | && !MEM_P (op0) |

1578 | && VECTOR_MODE_P (tmode) |

1579 | && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode)) |

1580 | { |

1581 | machine_mode new_mode = GET_MODE (op0); |

1582 | if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)) |

1583 | { |

1584 | scalar_mode inner_mode = GET_MODE_INNER (tmode); |

1585 | unsigned int nunits = (GET_MODE_BITSIZE (GET_MODE (op0)) |

1586 | / GET_MODE_UNIT_BITSIZE (tmode)); |

1587 | if (!mode_for_vector (inner_mode, nunits).exists (&new_mode) |

1588 | || !VECTOR_MODE_P (new_mode) |

1589 | || GET_MODE_SIZE (new_mode) != GET_MODE_SIZE (GET_MODE (op0)) |

1590 | || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode) |

1591 | || !targetm.vector_mode_supported_p (new_mode)) |

1592 | new_mode = VOIDmode; |

1593 | } |

1594 | if (new_mode != VOIDmode |

1595 | && (convert_optab_handler (vec_extract_optab, new_mode, tmode) |

1596 | != CODE_FOR_nothing) |

1597 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode) |

1598 | == bitnum / GET_MODE_BITSIZE (tmode))) |

1599 | { |

1600 | struct expand_operand ops[3]; |

1601 | machine_mode outermode = new_mode; |

1602 | machine_mode innermode = tmode; |

1603 | enum insn_code icode |

1604 | = convert_optab_handler (vec_extract_optab, outermode, innermode); |

1605 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |

1606 | |

1607 | if (new_mode != GET_MODE (op0)) |

1608 | op0 = gen_lowpart (new_mode, op0); |

1609 | create_output_operand (&ops[0], target, innermode); |

1610 | ops[0].target = 1; |

1611 | create_input_operand (&ops[1], op0, outermode); |

1612 | create_integer_operand (&ops[2], pos); |

1613 | if (maybe_expand_insn (icode, 3, ops)) |

1614 | { |

1615 | if (alt_rtl && ops[0].target) |

1616 | *alt_rtl = target; |

1617 | target = ops[0].value; |

1618 | if (GET_MODE (target) != mode) |

1619 | return gen_lowpart (tmode, target); |

1620 | return target; |

1621 | } |

1622 | } |

1623 | } |

1624 | |

1625 | /* See if we can get a better vector mode before extracting. */ |

1626 | if (VECTOR_MODE_P (GET_MODE (op0)) |

1627 | && !MEM_P (op0) |

1628 | && GET_MODE_INNER (GET_MODE (op0)) != tmode) |

1629 | { |

1630 | machine_mode new_mode; |

1631 | |

1632 | if (GET_MODE_CLASS (tmode) == MODE_FLOAT) |

1633 | new_mode = MIN_MODE_VECTOR_FLOAT; |

1634 | else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |

1635 | new_mode = MIN_MODE_VECTOR_FRACT; |

1636 | else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) |

1637 | new_mode = MIN_MODE_VECTOR_UFRACT; |

1638 | else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) |

1639 | new_mode = MIN_MODE_VECTOR_ACCUM; |

1640 | else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) |

1641 | new_mode = MIN_MODE_VECTOR_UACCUM; |

1642 | else |

1643 | new_mode = MIN_MODE_VECTOR_INT; |

1644 | |

1645 | FOR_EACH_MODE_FROM (new_mode, new_mode) |

1646 | if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |

1647 | && GET_MODE_UNIT_SIZE (new_mode) == GET_MODE_SIZE (tmode) |

1648 | && targetm.vector_mode_supported_p (new_mode)) |

1649 | break; |

1650 | if (new_mode != VOIDmode) |

1651 | op0 = gen_lowpart (new_mode, op0); |

1652 | } |

1653 | |

1654 | /* Use vec_extract patterns for extracting parts of vectors whenever |

1655 | available. */ |

1656 | machine_mode outermode = GET_MODE (op0); |

1657 | scalar_mode innermode = GET_MODE_INNER (outermode); |

1658 | if (VECTOR_MODE_P (outermode) |

1659 | && !MEM_P (op0) |

1660 | && (convert_optab_handler (vec_extract_optab, outermode, innermode) |

1661 | != CODE_FOR_nothing) |

1662 | && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode) |

1663 | == bitnum / GET_MODE_BITSIZE (innermode))) |

1664 | { |

1665 | struct expand_operand ops[3]; |

1666 | enum insn_code icode |

1667 | = convert_optab_handler (vec_extract_optab, outermode, innermode); |

1668 | unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |

1669 | |

1670 | create_output_operand (&ops[0], target, innermode); |

1671 | ops[0].target = 1; |

1672 | create_input_operand (&ops[1], op0, outermode); |

1673 | create_integer_operand (&ops[2], pos); |

1674 | if (maybe_expand_insn (icode, 3, ops)) |

1675 | { |

1676 | if (alt_rtl && ops[0].target) |

1677 | *alt_rtl = target; |

1678 | target = ops[0].value; |

1679 | if (GET_MODE (target) != mode) |

1680 | return gen_lowpart (tmode, target); |

1681 | return target; |

1682 | } |

1683 | } |

1684 | |

1685 | /* Make sure we are playing with integral modes. Pun with subregs |

1686 | if we aren't. */ |

1687 | opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0)); |

1688 | scalar_int_mode imode; |

1689 | if (!op0_mode.exists (&imode) || imode != GET_MODE (op0)) |

1690 | { |

1691 | if (MEM_P (op0)) |

1692 | op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (), |

1693 | 0, MEM_SIZE (op0)); |

1694 | else if (op0_mode.exists (&imode)) |

1695 | { |

1696 | op0 = gen_lowpart (imode, op0); |

1697 | |

1698 | /* If we got a SUBREG, force it into a register since we |

1699 | aren't going to be able to do another SUBREG on it. */ |

1700 | if (GET_CODE (op0) == SUBREG) |

1701 | op0 = force_reg (imode, op0); |

1702 | } |

1703 | else |

1704 | { |

1705 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0)); |

1706 | rtx mem = assign_stack_temp (GET_MODE (op0), size); |

1707 | emit_move_insn (mem, op0); |

1708 | op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size); |

1709 | } |

1710 | } |

1711 | |

1712 | /* ??? We currently assume TARGET is at least as big as BITSIZE. |

1713 | If that's wrong, the solution is to test for it and set TARGET to 0 |

1714 | if needed. */ |

1715 | |

1716 | /* Get the mode of the field to use for atomic access or subreg |

1717 | conversion. */ |

1718 | if (!SCALAR_INT_MODE_P (tmode) |

1719 | || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1)) |

1720 | mode1 = mode; |

1721 | gcc_assert (mode1 != BLKmode); |

1722 | |

1723 | /* Extraction of a full MODE1 value can be done with a subreg as long |

1724 | as the least significant bit of the value is the least significant |

1725 | bit of either OP0 or a word of OP0. */ |

1726 | if (!MEM_P (op0) |

1727 | && !reverse |

1728 | && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ()) |

1729 | && bitsize == GET_MODE_BITSIZE (mode1) |

1730 | && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ())) |

1731 | { |

1732 | rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (), |

1733 | bitnum / BITS_PER_UNIT); |

1734 | if (sub) |

1735 | return convert_extracted_bit_field (sub, mode, tmode, unsignedp); |

1736 | } |

1737 | |

1738 | /* Extraction of a full MODE1 value can be done with a load as long as |

1739 | the field is on a byte boundary and is sufficiently aligned. */ |

1740 | if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1)) |

1741 | { |

1742 | op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT); |

1743 | if (reverse) |

1744 | op0 = flip_storage_order (mode1, op0); |

1745 | return convert_extracted_bit_field (op0, mode, tmode, unsignedp); |

1746 | } |

1747 | |

1748 | /* Handle fields bigger than a word. */ |

1749 | |

1750 | if (bitsize > BITS_PER_WORD) |

1751 | { |

1752 | /* Here we transfer the words of the field |

1753 | in the order least significant first. |

1754 | This is because the most significant word is the one which may |

1755 | be less than full. */ |

1756 | |

1757 | const bool backwards = WORDS_BIG_ENDIAN; |

1758 | unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |

1759 | unsigned int i; |

1760 | rtx_insn *last; |

1761 | |

1762 | if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target)) |

1763 | target = gen_reg_rtx (mode); |

1764 | |

1765 | /* In case we're about to clobber a base register or something |

1766 | (see gcc.c-torture/execute/20040625-1.c). */ |

1767 | if (reg_mentioned_p (target, str_rtx)) |

1768 | target = gen_reg_rtx (mode); |

1769 | |

1770 | /* Indicate for flow that the entire target reg is being set. */ |

1771 | emit_clobber (target); |

1772 | |

1773 | last = get_last_insn (); |

1774 | for (i = 0; i < nwords; i++) |

1775 | { |

1776 | /* If I is 0, use the low-order word in both field and target; |

1777 | if I is 1, use the next to lowest word; and so on. */ |

1778 | /* Word number in TARGET to use. */ |

1779 | unsigned int wordnum |

1780 | = (backwards |

1781 | ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |

1782 | : i); |

1783 | /* Offset from start of field in OP0. */ |

1784 | unsigned int bit_offset = (backwards ^ reverse |

1785 | ? MAX ((int) bitsize - ((int) i + 1) |

1786 | * BITS_PER_WORD, |

1787 | 0) |

1788 | : (int) i * BITS_PER_WORD); |

1789 | rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |

1790 | rtx result_part |

1791 | = extract_bit_field_1 (op0, MIN (BITS_PER_WORD, |

1792 | bitsize - i * BITS_PER_WORD), |

1793 | bitnum + bit_offset, 1, target_part, |

1794 | mode, word_mode, reverse, fallback_p, NULL); |

1795 | |

1796 | gcc_assert (target_part); |

1797 | if (!result_part) |

1798 | { |

1799 | delete_insns_since (last); |

1800 | return NULL; |

1801 | } |

1802 | |

1803 | if (result_part != target_part) |

1804 | emit_move_insn (target_part, result_part); |

1805 | } |

1806 | |

1807 | if (unsignedp) |

1808 | { |

1809 | /* Unless we've filled TARGET, the upper regs in a multi-reg value |

1810 | need to be zero'd out. */ |

1811 | if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) |

1812 | { |

1813 | unsigned int i, total_words; |

1814 | |

1815 | total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; |

1816 | for (i = nwords; i < total_words; i++) |

1817 | emit_move_insn |

1818 | (operand_subword (target, |

1819 | backwards ? total_words - i - 1 : i, |

1820 | 1, VOIDmode), |

1821 | const0_rtx); |

1822 | } |

1823 | return target; |

1824 | } |

1825 | |

1826 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |

1827 | target = expand_shift (LSHIFT_EXPR, mode, target, |

1828 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |

1829 | return expand_shift (RSHIFT_EXPR, mode, target, |

1830 | GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0); |

1831 | } |

1832 | |

1833 | /* If OP0 is a multi-word register, narrow it to the affected word. |

1834 | If the region spans two words, defer to extract_split_bit_field. */ |

1835 | if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD) |

1836 | { |

1837 | if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD) |

1838 | { |

1839 | if (!fallback_p) |

1840 | return NULL_RTX; |

1841 | target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |

1842 | unsignedp, reverse); |

1843 | return convert_extracted_bit_field (target, mode, tmode, unsignedp); |

1844 | } |

1845 | op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (), |

1846 | bitnum / BITS_PER_WORD * UNITS_PER_WORD); |

1847 | op0_mode = word_mode; |

1848 | bitnum %= BITS_PER_WORD; |

1849 | } |

1850 | |

1851 | /* From here on we know the desired field is smaller than a word. |

1852 | If OP0 is a register, it too fits within a word. */ |

1853 | enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv; |

1854 | extraction_insn extv; |

1855 | if (!MEM_P (op0) |

1856 | && !reverse |

1857 | /* ??? We could limit the structure size to the part of OP0 that |

1858 | contains the field, with appropriate checks for endianness |

1859 | and TARGET_TRULY_NOOP_TRUNCATION. */ |

1860 | && get_best_reg_extraction_insn (&extv, pattern, |

1861 | GET_MODE_BITSIZE (op0_mode.require ()), |

1862 | tmode)) |

1863 | { |

1864 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |

1865 | bitsize, bitnum, |

1866 | unsignedp, target, mode, |

1867 | tmode); |

1868 | if (result) |

1869 | return result; |

1870 | } |

1871 | |

1872 | /* If OP0 is a memory, try copying it to a register and seeing if a |

1873 | cheap register alternative is available. */ |

1874 | if (MEM_P (op0) & !reverse) |

1875 | { |

1876 | if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum, |

1877 | tmode)) |

1878 | { |

1879 | rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode, |

1880 | bitsize, bitnum, |

1881 | unsignedp, target, mode, |

1882 | tmode); |

1883 | if (result) |

1884 | return result; |

1885 | } |

1886 | |

1887 | rtx_insn *last = get_last_insn (); |

1888 | |

1889 | /* Try loading part of OP0 into a register and extracting the |

1890 | bitfield from that. */ |

1891 | unsigned HOST_WIDE_INT bitpos; |

1892 | rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum, |

1893 | 0, 0, tmode, &bitpos); |

1894 | if (xop0) |

1895 | { |

1896 | xop0 = copy_to_reg (xop0); |

1897 | rtx result = extract_bit_field_1 (xop0, bitsize, bitpos, |

1898 | unsignedp, target, |

1899 | mode, tmode, reverse, false, NULL); |

1900 | if (result) |

1901 | return result; |

1902 | delete_insns_since (last); |

1903 | } |

1904 | } |

1905 | |

1906 | if (!fallback_p) |

1907 | return NULL; |

1908 | |

1909 | /* Find a correspondingly-sized integer field, so we can apply |

1910 | shifts and masks to it. */ |

1911 | scalar_int_mode int_mode; |

1912 | if (!int_mode_for_mode (tmode).exists (&int_mode)) |

1913 | /* If this fails, we should probably push op0 out to memory and then |

1914 | do a load. */ |

1915 | int_mode = int_mode_for_mode (mode).require (); |

1916 | |

1917 | target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize, |

1918 | bitnum, target, unsignedp, reverse); |

1919 | |

1920 | /* Complex values must be reversed piecewise, so we need to undo the global |

1921 | reversal, convert to the complex mode and reverse again. */ |

1922 | if (reverse && COMPLEX_MODE_P (tmode)) |

1923 | { |

1924 | target = flip_storage_order (int_mode, target); |

1925 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); |

1926 | target = flip_storage_order (tmode, target); |

1927 | } |

1928 | else |

1929 | target = convert_extracted_bit_field (target, mode, tmode, unsignedp); |

1930 | |

1931 | return target; |

1932 | } |

1933 | |

1934 | /* Generate code to extract a byte-field from STR_RTX |

1935 | containing BITSIZE bits, starting at BITNUM, |

1936 | and put it in TARGET if possible (if TARGET is nonzero). |

1937 | Regardless of TARGET, we return the rtx for where the value is placed. |

1938 | |

1939 | STR_RTX is the structure containing the byte (a REG or MEM). |

1940 | UNSIGNEDP is nonzero if this is an unsigned bit field. |

1941 | MODE is the natural mode of the field value once extracted. |

1942 | TMODE is the mode the caller would like the value to have; |

1943 | but the value may be returned with type MODE instead. |

1944 | |

1945 | If REVERSE is true, the extraction is to be done in reverse order. |

1946 | |

1947 | If a TARGET is specified and we can store in it at no extra cost, |

1948 | we do so, and return TARGET. |

1949 | Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred |

1950 | if they are equally easy. */ |

1951 | |

1952 | rtx |

1953 | extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |

1954 | unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |

1955 | machine_mode mode, machine_mode tmode, bool reverse, |

1956 | rtx *alt_rtl) |

1957 | { |

1958 | machine_mode mode1; |

1959 | |

1960 | /* Handle -fstrict-volatile-bitfields in the cases where it applies. */ |

1961 | if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0) |

1962 | mode1 = GET_MODE (str_rtx); |

1963 | else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0) |

1964 | mode1 = GET_MODE (target); |

1965 | else |

1966 | mode1 = tmode; |

1967 | |

1968 | scalar_int_mode int_mode; |

1969 | if (is_a <scalar_int_mode> (mode1, &int_mode) |

1970 | && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0)) |

1971 | { |

1972 | /* Extraction of a full INT_MODE value can be done with a simple load. |

1973 | We know here that the field can be accessed with one single |

1974 | instruction. For targets that support unaligned memory, |

1975 | an unaligned access may be necessary. */ |

1976 | if (bitsize == GET_MODE_BITSIZE (int_mode)) |

1977 | { |

1978 | rtx result = adjust_bitfield_address (str_rtx, int_mode, |

1979 | bitnum / BITS_PER_UNIT); |

1980 | if (reverse) |

1981 | result = flip_storage_order (int_mode, result); |

1982 | gcc_assert (bitnum % BITS_PER_UNIT == 0); |

1983 | return convert_extracted_bit_field (result, mode, tmode, unsignedp); |

1984 | } |

1985 | |

1986 | str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum, |

1987 | &bitnum); |

1988 | gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode)); |

1989 | str_rtx = copy_to_reg (str_rtx); |

1990 | } |

1991 | |

1992 | return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |

1993 | target, mode, tmode, reverse, true, alt_rtl); |

1994 | } |

1995 | |

1996 | /* Use shifts and boolean operations to extract a field of BITSIZE bits |

1997 | from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0, |

1998 | otherwise OP0 is a BLKmode MEM. |

1999 | |

2000 | UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). |

2001 | If REVERSE is true, the extraction is to be done in reverse order. |

2002 | |

2003 | If TARGET is nonzero, attempts to store the value there |

2004 | and return TARGET, but this is not guaranteed. |

2005 | If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |

2006 | |

2007 | static rtx |

2008 | extract_fixed_bit_field (machine_mode tmode, rtx op0, |

2009 | opt_scalar_int_mode op0_mode, |

2010 | unsigned HOST_WIDE_INT bitsize, |

2011 | unsigned HOST_WIDE_INT bitnum, rtx target, |

2012 | int unsignedp, bool reverse) |

2013 | { |

2014 | scalar_int_mode mode; |

2015 | if (MEM_P (op0)) |

2016 | { |

2017 | if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), |

2018 | BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode)) |

2019 | /* The only way this should occur is if the field spans word |

2020 | boundaries. */ |

2021 | return extract_split_bit_field (op0, op0_mode, bitsize, bitnum, |

2022 | unsignedp, reverse); |

2023 | |

2024 | op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum); |

2025 | } |

2026 | else |

2027 | mode = op0_mode.require (); |

2028 | |

2029 | return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum, |

2030 | target, unsignedp, reverse); |

2031 | } |

2032 | |

2033 | /* Helper function for extract_fixed_bit_field, extracts |

2034 | the bit field always using MODE, which is the mode of OP0. |

2035 | The other arguments are as for extract_fixed_bit_field. */ |

2036 | |

2037 | static rtx |

2038 | extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode, |

2039 | unsigned HOST_WIDE_INT bitsize, |

2040 | unsigned HOST_WIDE_INT bitnum, rtx target, |

2041 | int unsignedp, bool reverse) |

2042 | { |

2043 | /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode) |

2044 | for invalid input, such as extract equivalent of f5 from |

2045 | gcc.dg/pr48335-2.c. */ |

2046 | |

2047 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |

2048 | /* BITNUM is the distance between our msb and that of OP0. |

2049 | Convert it to the distance from the lsb. */ |

2050 | bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum; |

2051 | |

2052 | /* Now BITNUM is always the distance between the field's lsb and that of OP0. |

2053 | We have reduced the big-endian case to the little-endian case. */ |

2054 | if (reverse) |

2055 | op0 = flip_storage_order (mode, op0); |

2056 | |

2057 | if (unsignedp) |

2058 | { |

2059 | if (bitnum) |

2060 | { |

2061 | /* If the field does not already start at the lsb, |

2062 | shift it so it does. */ |

2063 | /* Maybe propagate the target for the shift. */ |

2064 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |

2065 | if (tmode != mode) |

2066 | subtarget = 0; |

2067 | op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1); |

2068 | } |

2069 | /* Convert the value to the desired mode. TMODE must also be a |

2070 | scalar integer for this conversion to make sense, since we |

2071 | shouldn't reinterpret the bits. */ |

2072 | scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode); |

2073 | if (mode != new_mode) |

2074 | op0 = convert_to_mode (new_mode, op0, 1); |

2075 | |

2076 | /* Unless the msb of the field used to be the msb when we shifted, |

2077 | mask out the upper bits. */ |

2078 | |

2079 | if (GET_MODE_BITSIZE (mode) != bitnum + bitsize) |

2080 | return expand_binop (new_mode, and_optab, op0, |

2081 | mask_rtx (new_mode, 0, bitsize, 0), |

2082 | target, 1, OPTAB_LIB_WIDEN); |

2083 | return op0; |

2084 | } |

2085 | |

2086 | /* To extract a signed bit-field, first shift its msb to the msb of the word, |

2087 | then arithmetic-shift its lsb to the lsb of the word. */ |

2088 | op0 = force_reg (mode, op0); |

2089 | |

2090 | /* Find the narrowest integer mode that contains the field. */ |

2091 | |

2092 | opt_scalar_int_mode mode_iter; |

2093 | FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |

2094 | if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum) |

2095 | break; |

2096 | |

2097 | mode = mode_iter.require (); |

2098 | op0 = convert_to_mode (mode, op0, 0); |

2099 | |

2100 | if (mode != tmode) |

2101 | target = 0; |

2102 | |

2103 | if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum)) |

2104 | { |

2105 | int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum); |

2106 | /* Maybe propagate the target for the shift. */ |

2107 | rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |

2108 | op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |

2109 | } |

2110 | |

2111 | return expand_shift (RSHIFT_EXPR, mode, op0, |

2112 | GET_MODE_BITSIZE (mode) - bitsize, target, 0); |

2113 | } |

2114 | |

2115 | /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value |

2116 | VALUE << BITPOS. */ |

2117 | |

2118 | static rtx |

2119 | lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value, |

2120 | int bitpos) |

2121 | { |

2122 | return immed_wide_int_const (wi::lshift (value, bitpos), mode); |

2123 | } |

2124 | |

2125 | /* Extract a bit field that is split across two words |

2126 | and return an RTX for the result. |

2127 | |

2128 | OP0 is the REG, SUBREG or MEM rtx for the first of the two words. |

2129 | BITSIZE is the field width; BITPOS, position of its first bit, in the word. |

2130 | UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. |

2131 | If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is |

2132 | a BLKmode MEM. |

2133 | |

2134 | If REVERSE is true, the extraction is to be done in reverse order. */ |

2135 | |

2136 | static rtx |

2137 | extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode, |

2138 | unsigned HOST_WIDE_INT bitsize, |

2139 | unsigned HOST_WIDE_INT bitpos, int unsignedp, |

2140 | bool reverse) |

2141 | { |

2142 | unsigned int unit; |

2143 | unsigned int bitsdone = 0; |

2144 | rtx result = NULL_RTX; |

2145 | int first = 1; |

2146 | |

2147 | /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |

2148 | much at a time. */ |

2149 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) |

2150 | unit = BITS_PER_WORD; |

2151 | else |

2152 | unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |

2153 | |

2154 | while (bitsdone < bitsize) |

2155 | { |

2156 | unsigned HOST_WIDE_INT thissize; |

2157 | rtx part; |

2158 | unsigned HOST_WIDE_INT thispos; |

2159 | unsigned HOST_WIDE_INT offset; |

2160 | |

2161 | offset = (bitpos + bitsdone) / unit; |

2162 | thispos = (bitpos + bitsdone) % unit; |

2163 | |

2164 | /* THISSIZE must not overrun a word boundary. Otherwise, |

2165 | extract_fixed_bit_field will call us again, and we will mutually |

2166 | recurse forever. */ |

2167 | thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |

2168 | thissize = MIN (thissize, unit - thispos); |

2169 | |

2170 | /* If OP0 is a register, then handle OFFSET here. */ |

2171 | rtx op0_piece = op0; |

2172 | opt_scalar_int_mode op0_piece_mode = op0_mode; |

2173 | if (SUBREG_P (op0) || REG_P (op0)) |

2174 | { |

2175 | op0_piece = operand_subword_force (op0, offset, op0_mode.require ()); |

2176 | op0_piece_mode = word_mode; |

2177 | offset = 0; |

2178 | } |

2179 | |

2180 | /* Extract the parts in bit-counting order, |

2181 | whose meaning is determined by BYTES_PER_UNIT. |

2182 | OFFSET is in UNITs, and UNIT is in bits. */ |

2183 | part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode, |

2184 | thissize, offset * unit + thispos, |

2185 | 0, 1, reverse); |

2186 | bitsdone += thissize; |

2187 | |

2188 | /* Shift this part into place for the result. */ |

2189 | if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN) |

2190 | { |

2191 | if (bitsize != bitsdone) |

2192 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |

2193 | bitsize - bitsdone, 0, 1); |

2194 | } |

2195 | else |

2196 | { |

2197 | if (bitsdone != thissize) |

2198 | part = expand_shift (LSHIFT_EXPR, word_mode, part, |

2199 | bitsdone - thissize, 0, 1); |

2200 | } |

2201 | |

2202 | if (first) |

2203 | result = part; |

2204 | else |

2205 | /* Combine the parts with bitwise or. This works |

2206 | because we extracted each part as an unsigned bit field. */ |

2207 | result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, |

2208 | OPTAB_LIB_WIDEN); |

2209 | |

2210 | first = 0; |

2211 | } |

2212 | |

2213 | /* Unsigned bit field: we are done. */ |

2214 | if (unsignedp) |

2215 | return result; |

2216 | /* Signed bit field: sign-extend with two arithmetic shifts. */ |

2217 | result = expand_shift (LSHIFT_EXPR, word_mode, result, |

2218 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |

2219 | return expand_shift (RSHIFT_EXPR, word_mode, result, |

2220 | BITS_PER_WORD - bitsize, NULL_RTX, 0); |

2221 | } |

2222 | |

2223 | /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |

2224 | the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than |

2225 | MODE, fill the upper bits with zeros. Fail if the layout of either |

2226 | mode is unknown (as for CC modes) or if the extraction would involve |

2227 | unprofitable mode punning. Return the value on success, otherwise |

2228 | return null. |

2229 | |

2230 | This is different from gen_lowpart* in these respects: |

2231 | |

2232 | - the returned value must always be considered an rvalue |

2233 | |

2234 | - when MODE is wider than SRC_MODE, the extraction involves |

2235 | a zero extension |

2236 | |

2237 | - when MODE is smaller than SRC_MODE, the extraction involves |

2238 | a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION). |

2239 | |

2240 | In other words, this routine performs a computation, whereas the |

2241 | gen_lowpart* routines are conceptually lvalue or rvalue subreg |

2242 | operations. */ |

2243 | |

2244 | rtx |

2245 | extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src) |

2246 | { |

2247 | scalar_int_mode int_mode, src_int_mode; |

2248 | |

2249 | if (mode == src_mode) |

2250 | return src; |

2251 | |

2252 | if (CONSTANT_P (src)) |

2253 | { |

2254 | /* simplify_gen_subreg can't be used here, as if simplify_subreg |

2255 | fails, it will happily create (subreg (symbol_ref)) or similar |

2256 | invalid SUBREGs. */ |

2257 | unsigned int byte = subreg_lowpart_offset (mode, src_mode); |

2258 | rtx ret = simplify_subreg (mode, src, src_mode, byte); |

2259 | if (ret) |

2260 | return ret; |

2261 | |

2262 | if (GET_MODE (src) == VOIDmode |

2263 | || !validate_subreg (mode, src_mode, src, byte)) |

2264 | return NULL_RTX; |

2265 | |

2266 | src = force_reg (GET_MODE (src), src); |

2267 | return gen_rtx_SUBREG (mode, src, byte); |

2268 | } |

2269 | |

2270 | if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) |

2271 | return NULL_RTX; |

2272 | |

2273 | if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) |

2274 | && targetm.modes_tieable_p (mode, src_mode)) |

2275 | { |

2276 | rtx x = gen_lowpart_common (mode, src); |

2277 | if (x) |

2278 | return x; |

2279 | } |

2280 | |

2281 | if (!int_mode_for_mode (src_mode).exists (&src_int_mode) |

2282 | || !int_mode_for_mode (mode).exists (&int_mode)) |

2283 | return NULL_RTX; |

2284 | |

2285 | if (!targetm.modes_tieable_p (src_int_mode, src_mode)) |

2286 | return NULL_RTX; |

2287 | if (!targetm.modes_tieable_p (int_mode, mode)) |

2288 | return NULL_RTX; |

2289 | |

2290 | src = gen_lowpart (src_int_mode, src); |

2291 | src = convert_modes (int_mode, src_int_mode, src, true); |

2292 | src = gen_lowpart (mode, src); |

2293 | return src; |

2294 | } |

2295 | |

2296 | /* Add INC into TARGET. */ |

2297 | |

2298 | void |

2299 | expand_inc (rtx target, rtx inc) |

2300 | { |

2301 | rtx value = expand_binop (GET_MODE (target), add_optab, |

2302 | target, inc, |

2303 | target, 0, OPTAB_LIB_WIDEN); |

2304 | if (value != target) |

2305 | emit_move_insn (target, value); |

2306 | } |

2307 | |

2308 | /* Subtract DEC from TARGET. */ |

2309 | |

2310 | void |

2311 | expand_dec (rtx target, rtx dec) |

2312 | { |

2313 | rtx value = expand_binop (GET_MODE (target), sub_optab, |

2314 | target, dec, |

2315 | target, 0, OPTAB_LIB_WIDEN); |

2316 | if (value != target) |

2317 | emit_move_insn (target, value); |

2318 | } |

2319 | |

2320 | /* Output a shift instruction for expression code CODE, |

2321 | with SHIFTED being the rtx for the value to shift, |

2322 | and AMOUNT the rtx for the amount to shift by. |

2323 | Store the result in the rtx TARGET, if that is convenient. |

2324 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |

2325 | Return the rtx for where the value is. |

2326 | If that cannot be done, abort the compilation unless MAY_FAIL is true, |

2327 | in which case 0 is returned. */ |

2328 | |

2329 | static rtx |

2330 | expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, |

2331 | rtx amount, rtx target, int unsignedp, bool may_fail = false) |

2332 | { |

2333 | rtx op1, temp = 0; |

2334 | int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); |

2335 | int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); |

2336 | optab lshift_optab = ashl_optab; |

2337 | optab rshift_arith_optab = ashr_optab; |

2338 | optab rshift_uns_optab = lshr_optab; |

2339 | optab lrotate_optab = rotl_optab; |

2340 | optab rrotate_optab = rotr_optab; |

2341 | machine_mode op1_mode; |

2342 | scalar_mode scalar_mode = GET_MODE_INNER (mode); |

2343 | int attempt; |

2344 | bool speed = optimize_insn_for_speed_p (); |

2345 | |

2346 | op1 = amount; |

2347 | op1_mode = GET_MODE (op1); |

2348 | |

2349 | /* Determine whether the shift/rotate amount is a vector, or scalar. If the |

2350 | shift amount is a vector, use the vector/vector shift patterns. */ |

2351 | if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) |

2352 | { |

2353 | lshift_optab = vashl_optab; |

2354 | rshift_arith_optab = vashr_optab; |

2355 | rshift_uns_optab = vlshr_optab; |

2356 | lrotate_optab = vrotl_optab; |

2357 | rrotate_optab = vrotr_optab; |

2358 | } |

2359 | |

2360 | /* Previously detected shift-counts computed by NEGATE_EXPR |

2361 | and shifted in the other direction; but that does not work |

2362 | on all machines. */ |

2363 | |

2364 | if (SHIFT_COUNT_TRUNCATED) |

2365 | { |

2366 | if (CONST_INT_P (op1) |

2367 | && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |

2368 | (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode))) |

2369 | op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |

2370 | % GET_MODE_BITSIZE (scalar_mode)); |

2371 | else if (GET_CODE (op1) == SUBREG |

2372 | && subreg_lowpart_p (op1) |

2373 | && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1))) |

2374 | && SCALAR_INT_MODE_P (GET_MODE (op1))) |

2375 | op1 = SUBREG_REG (op1); |

2376 | } |

2377 | |

2378 | /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, |

2379 | prefer left rotation, if op1 is from bitsize / 2 + 1 to |

2380 | bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 |

2381 | amount instead. */ |

2382 | if (rotate |

2383 | && CONST_INT_P (op1) |

2384 | && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, |

2385 | GET_MODE_BITSIZE (scalar_mode) - 1)) |

2386 | { |

2387 | op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1)); |

2388 | left = !left; |

2389 | code = left ? LROTATE_EXPR : RROTATE_EXPR; |

2390 | } |

2391 | |

2392 | /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi. |

2393 | Note that this is not the case for bigger values. For instance a rotation |

2394 | of 0x01020304 by 16 bits gives 0x03040102 which is different from |

2395 | 0x04030201 (bswapsi). */ |

2396 | if (rotate |

2397 | && CONST_INT_P (op1) |

2398 | && INTVAL (op1) == BITS_PER_UNIT |

2399 | && GET_MODE_SIZE (scalar_mode) == 2 |

2400 | && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing) |

2401 | return expand_unop (HImode, bswap_optab, shifted, NULL_RTX, |

2402 | unsignedp); |

2403 | |

2404 | if (op1 == const0_rtx) |

2405 | return shifted; |

2406 | |

2407 | /* Check whether its cheaper to implement a left shift by a constant |

2408 | bit count by a sequence of additions. */ |

2409 | if (code == LSHIFT_EXPR |

2410 | && CONST_INT_P (op1) |

2411 | && INTVAL (op1) > 0 |

2412 | && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode) |

2413 | && INTVAL (op1) < MAX_BITS_PER_WORD |

2414 | && (shift_cost (speed, mode, INTVAL (op1)) |

2415 | > INTVAL (op1) * add_cost (speed, mode)) |

2416 | && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST) |

2417 | { |

2418 | int i; |

2419 | for (i = 0; i < INTVAL (op1); i++) |

2420 | { |

2421 | temp = force_reg (mode, shifted); |

2422 | shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, |

2423 | unsignedp, OPTAB_LIB_WIDEN); |

2424 | } |

2425 | return shifted; |

2426 | } |

2427 | |

2428 | for (attempt = 0; temp == 0 && attempt < 3; attempt++) |

2429 | { |

2430 | enum optab_methods methods; |

2431 | |

2432 | if (attempt == 0) |

2433 | methods = OPTAB_DIRECT; |

2434 | else if (attempt == 1) |

2435 | methods = OPTAB_WIDEN; |

2436 | else |

2437 | methods = OPTAB_LIB_WIDEN; |

2438 | |

2439 | if (rotate) |

2440 | { |

2441 | /* Widening does not work for rotation. */ |

2442 | if (methods == OPTAB_WIDEN) |

2443 | continue; |

2444 | else if (methods == OPTAB_LIB_WIDEN) |

2445 | { |

2446 | /* If we have been unable to open-code this by a rotation, |

2447 | do it as the IOR of two shifts. I.e., to rotate A |

2448 | by N bits, compute |

2449 | (A << N) | ((unsigned) A >> ((-N) & (C - 1))) |

2450 | where C is the bitsize of A. |

2451 | |

2452 | It is theoretically possible that the target machine might |

2453 | not be able to perform either shift and hence we would |

2454 | be making two libcalls rather than just the one for the |

2455 | shift (similarly if IOR could not be done). We will allow |

2456 | this extremely unlikely lossage to avoid complicating the |

2457 | code below. */ |

2458 | |

2459 | rtx subtarget = target == shifted ? 0 : target; |

2460 | rtx new_amount, other_amount; |

2461 | rtx temp1; |

2462 | |

2463 | new_amount = op1; |

2464 | if (op1 == const0_rtx) |

2465 | return shifted; |

2466 | else if (CONST_INT_P (op1)) |

2467 | other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode) |

2468 | - INTVAL (op1)); |

2469 | else |

2470 | { |

2471 | other_amount |

2472 | = simplify_gen_unary (NEG, GET_MODE (op1), |

2473 | op1, GET_MODE (op1)); |

2474 | HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1; |

2475 | other_amount |

2476 | = simplify_gen_binary (AND, GET_MODE (op1), other_amount, |

2477 | gen_int_mode (mask, GET_MODE (op1))); |

2478 | } |

2479 | |

2480 | shifted = force_reg (mode, shifted); |

2481 | |

2482 | temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR, |

2483 | mode, shifted, new_amount, 0, 1); |

2484 | temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR, |

2485 | mode, shifted, other_amount, |

2486 | subtarget, 1); |

2487 | return expand_binop (mode, ior_optab, temp, temp1, target, |

2488 | unsignedp, methods); |

2489 | } |

2490 | |

2491 | temp = expand_binop (mode, |

2492 | left ? lrotate_optab : rrotate_optab, |

2493 | shifted, op1, target, unsignedp, methods); |

2494 | } |

2495 | else if (unsignedp) |

2496 | temp = expand_binop (mode, |

2497 | left ? lshift_optab : rshift_uns_optab, |

2498 | shifted, op1, target, unsignedp, methods); |

2499 | |

2500 | /* Do arithmetic shifts. |

2501 | Also, if we are going to widen the operand, we can just as well |

2502 | use an arithmetic right-shift instead of a logical one. */ |

2503 | if (temp == 0 && ! rotate |

2504 | && (! unsignedp || (! left && methods == OPTAB_WIDEN))) |

2505 | { |

2506 | enum optab_methods methods1 = methods; |

2507 | |

2508 | /* If trying to widen a log shift to an arithmetic shift, |

2509 | don't accept an arithmetic shift of the same size. */ |

2510 | if (unsignedp) |

2511 | methods1 = OPTAB_MUST_WIDEN; |

2512 | |

2513 | /* Arithmetic shift */ |

2514 | |

2515 | temp = expand_binop (mode, |

2516 | left ? lshift_optab : rshift_arith_optab, |

2517 | shifted, op1, target, unsignedp, methods1); |

2518 | } |

2519 | |

2520 | /* We used to try extzv here for logical right shifts, but that was |

2521 | only useful for one machine, the VAX, and caused poor code |

2522 | generation there for lshrdi3, so the code was deleted and a |

2523 | define_expand for lshrsi3 was added to vax.md. */ |

2524 | } |

2525 | |

2526 | gcc_assert (temp != NULL_RTX || may_fail); |

2527 | return temp; |

2528 | } |

2529 | |

2530 | /* Output a shift instruction for expression code CODE, |

2531 | with SHIFTED being the rtx for the value to shift, |

2532 | and AMOUNT the amount to shift by. |

2533 | Store the result in the rtx TARGET, if that is convenient. |

2534 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |

2535 | Return the rtx for where the value is. */ |

2536 | |

2537 | rtx |

2538 | expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |

2539 | int amount, rtx target, int unsignedp) |

2540 | { |

2541 | return expand_shift_1 (code, mode, |

2542 | shifted, GEN_INT (amount), target, unsignedp); |

2543 | } |

2544 | |

2545 | /* Likewise, but return 0 if that cannot be done. */ |

2546 | |

2547 | static rtx |

2548 | maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted, |

2549 | int amount, rtx target, int unsignedp) |

2550 | { |

2551 | return expand_shift_1 (code, mode, |

2552 | shifted, GEN_INT (amount), target, unsignedp, true); |

2553 | } |

2554 | |

2555 | /* Output a shift instruction for expression code CODE, |

2556 | with SHIFTED being the rtx for the value to shift, |

2557 | and AMOUNT the tree for the amount to shift by. |

2558 | Store the result in the rtx TARGET, if that is convenient. |

2559 | If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |

2560 | Return the rtx for where the value is. */ |

2561 | |

2562 | rtx |

2563 | expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, |

2564 | tree amount, rtx target, int unsignedp) |

2565 | { |

2566 | return expand_shift_1 (code, mode, |

2567 | shifted, expand_normal (amount), target, unsignedp); |

2568 | } |

2569 | |

2570 | |

2571 | static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |

2572 | const struct mult_cost *, machine_mode mode); |

2573 | static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, |

2574 | const struct algorithm *, enum mult_variant); |

2575 | static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |

2576 | static rtx extract_high_half (scalar_int_mode, rtx); |

2577 | static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int); |

2578 | static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx, |

2579 | int, int); |

2580 | /* Compute and return the best algorithm for multiplying by T. |

2581 | The algorithm must cost less than cost_limit |

2582 | If retval.cost >= COST_LIMIT, no algorithm was found and all |

2583 | other field of the returned struct are undefined. |

2584 | MODE is the machine mode of the multiplication. */ |

2585 | |

2586 | static void |

2587 | synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |

2588 | const struct mult_cost *cost_limit, machine_mode mode) |

2589 | { |

2590 | int m; |

2591 | struct algorithm *alg_in, *best_alg; |

2592 | struct mult_cost best_cost; |

2593 | struct mult_cost new_limit; |

2594 | int op_cost, op_latency; |

2595 | unsigned HOST_WIDE_INT orig_t = t; |

2596 | unsigned HOST_WIDE_INT q; |

2597 | int maxm, hash_index; |

2598 | bool cache_hit = false; |

2599 | enum alg_code cache_alg = alg_zero; |

2600 | bool speed = optimize_insn_for_speed_p (); |

2601 | scalar_int_mode imode; |

2602 | struct alg_hash_entry *entry_ptr; |

2603 | |

2604 | /* Indicate that no algorithm is yet found. If no algorithm |

2605 | is found, this value will be returned and indicate failure. */ |

2606 | alg_out->cost.cost = cost_limit->cost + 1; |

2607 | alg_out->cost.latency = cost_limit->latency + 1; |

2608 | |

2609 | if (cost_limit->cost < 0 |

2610 | || (cost_limit->cost == 0 && cost_limit->latency <= 0)) |

2611 | return; |

2612 | |

2613 | /* Be prepared for vector modes. */ |

2614 | imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode)); |

2615 | |

2616 | maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode)); |

2617 | |

2618 | /* Restrict the bits of "t" to the multiplication's mode. */ |

2619 | t &= GET_MODE_MASK (imode); |

2620 | |

2621 | /* t == 1 can be done in zero cost. */ |

2622 | if (t == 1) |

2623 | { |

2624 | alg_out->ops = 1; |

2625 | alg_out->cost.cost = 0; |

2626 | alg_out->cost.latency = 0; |

2627 | alg_out->op[0] = alg_m; |

2628 | return; |

2629 | } |

2630 | |

2631 | /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |

2632 | fail now. */ |

2633 | if (t == 0) |

2634 | { |

2635 | if (MULT_COST_LESS (cost_limit, zero_cost (speed))) |

2636 | return; |

2637 | else |

2638 | { |

2639 | alg_out->ops = 1; |

2640 | alg_out->cost.cost = zero_cost (speed); |

2641 | alg_out->cost.latency = zero_cost (speed); |

2642 | alg_out->op[0] = alg_zero; |

2643 | return; |

2644 | } |

2645 | } |

2646 | |

2647 | /* We'll be needing a couple extra algorithm structures now. */ |

2648 | |

2649 | alg_in = XALLOCA (struct algorithm); |

2650 | best_alg = XALLOCA (struct algorithm); |

2651 | best_cost = *cost_limit; |

2652 | |

2653 | /* Compute the hash index. */ |

2654 | hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |

2655 | |

2656 | /* See if we already know what to do for T. */ |

2657 | entry_ptr = alg_hash_entry_ptr (hash_index); |

2658 | if (entry_ptr->t == t |

2659 | && entry_ptr->mode == mode |

2660 | && entry_ptr->speed == speed |

2661 | && entry_ptr->alg != alg_unknown) |

2662 | { |

2663 | cache_alg = entry_ptr->alg; |

2664 | |

2665 | if (cache_alg == alg_impossible) |

2666 | { |

2667 | /* The cache tells us that it's impossible to synthesize |

2668 | multiplication by T within entry_ptr->cost. */ |

2669 | if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit)) |

2670 | /* COST_LIMIT is at least as restrictive as the one |

2671 | recorded in the hash table, in which case we have no |

2672 | hope of synthesizing a multiplication. Just |

2673 | return. */ |

2674 | return; |

2675 | |

2676 | /* If we get here, COST_LIMIT is less restrictive than the |

2677 | one recorded in the hash table, so we may be able to |

2678 | synthesize a multiplication. Proceed as if we didn't |

2679 | have the cache entry. */ |

2680 | } |

2681 | else |

2682 | { |

2683 | if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost)) |

2684 | /* The cached algorithm shows that this multiplication |

2685 | requires more cost than COST_LIMIT. Just return. This |

2686 | way, we don't clobber this cache entry with |

2687 | alg_impossible but retain useful information. */ |

2688 | return; |

2689 | |

2690 | cache_hit = true; |

2691 | |

2692 | switch (cache_alg) |

2693 | { |

2694 | case alg_shift: |

2695 | goto do_alg_shift; |

2696 | |

2697 | case alg_add_t_m2: |

2698 | case alg_sub_t_m2: |

2699 | goto do_alg_addsub_t_m2; |

2700 | |

2701 | case alg_add_factor: |

2702 | case alg_sub_factor: |

2703 | goto do_alg_addsub_factor; |

2704 | |

2705 | case alg_add_t2_m: |

2706 | goto do_alg_add_t2_m; |

2707 | |

2708 | case alg_sub_t2_m: |

2709 | goto do_alg_sub_t2_m; |

2710 | |

2711 | default: |

2712 | gcc_unreachable (); |

2713 | } |

2714 | } |

2715 | } |

2716 | |

2717 | /* If we have a group of zero bits at the low-order part of T, try |

2718 | multiplying by the remaining bits and then doing a shift. */ |

2719 | |

2720 | if ((t & 1) == 0) |

2721 | { |

2722 | do_alg_shift: |

2723 | m = ctz_or_zero (t); /* m = number of low zero bits */ |

2724 | if (m < maxm) |

2725 | { |

2726 | q = t >> m; |

2727 | /* The function expand_shift will choose between a shift and |

2728 | a sequence of additions, so the observed cost is given as |

2729 | MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */ |

2730 | op_cost = m * add_cost (speed, mode); |

2731 | if (shift_cost (speed, mode, m) < op_cost) |

2732 | op_cost = shift_cost (speed, mode, m); |

2733 | new_limit.cost = best_cost.cost - op_cost; |

2734 | new_limit.latency = best_cost.latency - op_cost; |

2735 | synth_mult (alg_in, q, &new_limit, mode); |

2736 | |

2737 | alg_in->cost.cost += op_cost; |

2738 | alg_in->cost.latency += op_cost; |

2739 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2740 | { |

2741 | best_cost = alg_in->cost; |

2742 | std::swap (alg_in, best_alg); |

2743 | best_alg->log[best_alg->ops] = m; |

2744 | best_alg->op[best_alg->ops] = alg_shift; |

2745 | } |

2746 | |

2747 | /* See if treating ORIG_T as a signed number yields a better |

2748 | sequence. Try this sequence only for a negative ORIG_T |

2749 | as it would be useless for a non-negative ORIG_T. */ |

2750 | if ((HOST_WIDE_INT) orig_t < 0) |

2751 | { |

2752 | /* Shift ORIG_T as follows because a right shift of a |

2753 | negative-valued signed type is implementation |

2754 | defined. */ |

2755 | q = ~(~orig_t >> m); |

2756 | /* The function expand_shift will choose between a shift |

2757 | and a sequence of additions, so the observed cost is |

2758 | given as MIN (m * add_cost(speed, mode), |

2759 | shift_cost(speed, mode, m)). */ |

2760 | op_cost = m * add_cost (speed, mode); |

2761 | if (shift_cost (speed, mode, m) < op_cost) |

2762 | op_cost = shift_cost (speed, mode, m); |

2763 | new_limit.cost = best_cost.cost - op_cost; |

2764 | new_limit.latency = best_cost.latency - op_cost; |

2765 | synth_mult (alg_in, q, &new_limit, mode); |

2766 | |

2767 | alg_in->cost.cost += op_cost; |

2768 | alg_in->cost.latency += op_cost; |

2769 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2770 | { |

2771 | best_cost = alg_in->cost; |

2772 | std::swap (alg_in, best_alg); |

2773 | best_alg->log[best_alg->ops] = m; |

2774 | best_alg->op[best_alg->ops] = alg_shift; |

2775 | } |

2776 | } |

2777 | } |

2778 | if (cache_hit) |

2779 | goto done; |

2780 | } |

2781 | |

2782 | /* If we have an odd number, add or subtract one. */ |

2783 | if ((t & 1) != 0) |

2784 | { |

2785 | unsigned HOST_WIDE_INT w; |

2786 | |

2787 | do_alg_addsub_t_m2: |

2788 | for (w = 1; (w & t) != 0; w <<= 1) |

2789 | ; |

2790 | /* If T was -1, then W will be zero after the loop. This is another |

2791 | case where T ends with ...111. Handling this with (T + 1) and |

2792 | subtract 1 produces slightly better code and results in algorithm |

2793 | selection much faster than treating it like the ...0111 case |

2794 | below. */ |

2795 | if (w == 0 |

2796 | || (w > 2 |

2797 | /* Reject the case where t is 3. |

2798 | Thus we prefer addition in that case. */ |

2799 | && t != 3)) |

2800 | { |

2801 | /* T ends with ...111. Multiply by (T + 1) and subtract T. */ |

2802 | |

2803 | op_cost = add_cost (speed, mode); |

2804 | new_limit.cost = best_cost.cost - op_cost; |

2805 | new_limit.latency = best_cost.latency - op_cost; |

2806 | synth_mult (alg_in, t + 1, &new_limit, mode); |

2807 | |

2808 | alg_in->cost.cost += op_cost; |

2809 | alg_in->cost.latency += op_cost; |

2810 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2811 | { |

2812 | best_cost = alg_in->cost; |

2813 | std::swap (alg_in, best_alg); |

2814 | best_alg->log[best_alg->ops] = 0; |

2815 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |

2816 | } |

2817 | } |

2818 | else |

2819 | { |

2820 | /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */ |

2821 | |

2822 | op_cost = add_cost (speed, mode); |

2823 | new_limit.cost = best_cost.cost - op_cost; |

2824 | new_limit.latency = best_cost.latency - op_cost; |

2825 | synth_mult (alg_in, t - 1, &new_limit, mode); |

2826 | |

2827 | alg_in->cost.cost += op_cost; |

2828 | alg_in->cost.latency += op_cost; |

2829 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2830 | { |

2831 | best_cost = alg_in->cost; |

2832 | std::swap (alg_in, best_alg); |

2833 | best_alg->log[best_alg->ops] = 0; |

2834 | best_alg->op[best_alg->ops] = alg_add_t_m2; |

2835 | } |

2836 | } |

2837 | |

2838 | /* We may be able to calculate a * -7, a * -15, a * -31, etc |

2839 | quickly with a - a * n for some appropriate constant n. */ |

2840 | m = exact_log2 (-orig_t + 1); |

2841 | if (m >= 0 && m < maxm) |

2842 | { |

2843 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |

2844 | /* If the target has a cheap shift-and-subtract insn use |

2845 | that in preference to a shift insn followed by a sub insn. |

2846 | Assume that the shift-and-sub is "atomic" with a latency |

2847 | equal to it's cost, otherwise assume that on superscalar |

2848 | hardware the shift may be executed concurrently with the |

2849 | earlier steps in the algorithm. */ |

2850 | if (shiftsub1_cost (speed, mode, m) <= op_cost) |

2851 | { |

2852 | op_cost = shiftsub1_cost (speed, mode, m); |

2853 | op_latency = op_cost; |

2854 | } |

2855 | else |

2856 | op_latency = add_cost (speed, mode); |

2857 | |

2858 | new_limit.cost = best_cost.cost - op_cost; |

2859 | new_limit.latency = best_cost.latency - op_latency; |

2860 | synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, |

2861 | &new_limit, mode); |

2862 | |

2863 | alg_in->cost.cost += op_cost; |

2864 | alg_in->cost.latency += op_latency; |

2865 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2866 | { |

2867 | best_cost = alg_in->cost; |

2868 | std::swap (alg_in, best_alg); |

2869 | best_alg->log[best_alg->ops] = m; |

2870 | best_alg->op[best_alg->ops] = alg_sub_t_m2; |

2871 | } |

2872 | } |

2873 | |

2874 | if (cache_hit) |

2875 | goto done; |

2876 | } |

2877 | |

2878 | /* Look for factors of t of the form |

2879 | t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |

2880 | If we find such a factor, we can multiply by t using an algorithm that |

2881 | multiplies by q, shift the result by m and add/subtract it to itself. |

2882 | |

2883 | We search for large factors first and loop down, even if large factors |

2884 | are less probable than small; if we find a large factor we will find a |

2885 | good sequence quickly, and therefore be able to prune (by decreasing |

2886 | COST_LIMIT) the search. */ |

2887 | |

2888 | do_alg_addsub_factor: |

2889 | for (m = floor_log2 (t - 1); m >= 2; m--) |

2890 | { |

2891 | unsigned HOST_WIDE_INT d; |

2892 | |

2893 | d = (HOST_WIDE_INT_1U << m) + 1; |

2894 | if (t % d == 0 && t > d && m < maxm |

2895 | && (!cache_hit || cache_alg == alg_add_factor)) |

2896 | { |

2897 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |

2898 | if (shiftadd_cost (speed, mode, m) <= op_cost) |

2899 | op_cost = shiftadd_cost (speed, mode, m); |

2900 | |

2901 | op_latency = op_cost; |

2902 | |

2903 | |

2904 | new_limit.cost = best_cost.cost - op_cost; |

2905 | new_limit.latency = best_cost.latency - op_latency; |

2906 | synth_mult (alg_in, t / d, &new_limit, mode); |

2907 | |

2908 | alg_in->cost.cost += op_cost; |

2909 | alg_in->cost.latency += op_latency; |

2910 | if (alg_in->cost.latency < op_cost) |

2911 | alg_in->cost.latency = op_cost; |

2912 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2913 | { |

2914 | best_cost = alg_in->cost; |

2915 | std::swap (alg_in, best_alg); |

2916 | best_alg->log[best_alg->ops] = m; |

2917 | best_alg->op[best_alg->ops] = alg_add_factor; |

2918 | } |

2919 | /* Other factors will have been taken care of in the recursion. */ |

2920 | break; |

2921 | } |

2922 | |

2923 | d = (HOST_WIDE_INT_1U << m) - 1; |

2924 | if (t % d == 0 && t > d && m < maxm |

2925 | && (!cache_hit || cache_alg == alg_sub_factor)) |

2926 | { |

2927 | op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m); |

2928 | if (shiftsub0_cost (speed, mode, m) <= op_cost) |

2929 | op_cost = shiftsub0_cost (speed, mode, m); |

2930 | |

2931 | op_latency = op_cost; |

2932 | |

2933 | new_limit.cost = best_cost.cost - op_cost; |

2934 | new_limit.latency = best_cost.latency - op_latency; |

2935 | synth_mult (alg_in, t / d, &new_limit, mode); |

2936 | |

2937 | alg_in->cost.cost += op_cost; |

2938 | alg_in->cost.latency += op_latency; |

2939 | if (alg_in->cost.latency < op_cost) |

2940 | alg_in->cost.latency = op_cost; |

2941 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2942 | { |

2943 | best_cost = alg_in->cost; |

2944 | std::swap (alg_in, best_alg); |

2945 | best_alg->log[best_alg->ops] = m; |

2946 | best_alg->op[best_alg->ops] = alg_sub_factor; |

2947 | } |

2948 | break; |

2949 | } |

2950 | } |

2951 | if (cache_hit) |

2952 | goto done; |

2953 | |

2954 | /* Try shift-and-add (load effective address) instructions, |

2955 | i.e. do a*3, a*5, a*9. */ |

2956 | if ((t & 1) != 0) |

2957 | { |

2958 | do_alg_add_t2_m: |

2959 | q = t - 1; |

2960 | m = ctz_hwi (q); |

2961 | if (q && m < maxm) |

2962 | { |

2963 | op_cost = shiftadd_cost (speed, mode, m); |

2964 | new_limit.cost = best_cost.cost - op_cost; |

2965 | new_limit.latency = best_cost.latency - op_cost; |

2966 | synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); |

2967 | |

2968 | alg_in->cost.cost += op_cost; |

2969 | alg_in->cost.latency += op_cost; |

2970 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2971 | { |

2972 | best_cost = alg_in->cost; |

2973 | std::swap (alg_in, best_alg); |

2974 | best_alg->log[best_alg->ops] = m; |

2975 | best_alg->op[best_alg->ops] = alg_add_t2_m; |

2976 | } |

2977 | } |

2978 | if (cache_hit) |

2979 | goto done; |

2980 | |

2981 | do_alg_sub_t2_m: |

2982 | q = t + 1; |

2983 | m = ctz_hwi (q); |

2984 | if (q && m < maxm) |

2985 | { |

2986 | op_cost = shiftsub0_cost (speed, mode, m); |

2987 | new_limit.cost = best_cost.cost - op_cost; |

2988 | new_limit.latency = best_cost.latency - op_cost; |

2989 | synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); |

2990 | |

2991 | alg_in->cost.cost += op_cost; |

2992 | alg_in->cost.latency += op_cost; |

2993 | if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |

2994 | { |

2995 | best_cost = alg_in->cost; |

2996 | std::swap (alg_in, best_alg); |

2997 | best_alg->log[best_alg->ops] = m; |

2998 | best_alg->op[best_alg->ops] = alg_sub_t2_m; |

2999 | } |

3000 | } |

3001 | if (cache_hit) |

3002 | goto done; |

3003 | } |

3004 | |

3005 | done: |

3006 | /* If best_cost has not decreased, we have not found any algorithm. */ |

3007 | if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) |

3008 | { |

3009 | /* We failed to find an algorithm. Record alg_impossible for |

3010 | this case (that is, <T, MODE, COST_LIMIT>) so that next time |

3011 | we are asked to find an algorithm for T within the same or |

3012 | lower COST_LIMIT, we can immediately return to the |

3013 | caller. */ |

3014 | entry_ptr->t = t; |

3015 | entry_ptr->mode = mode; |

3016 | entry_ptr->speed = speed; |

3017 | entry_ptr->alg = alg_impossible; |

3018 | entry_ptr->cost = *cost_limit; |

3019 | return; |

3020 | } |

3021 | |

3022 | /* Cache the result. */ |

3023 | if (!cache_hit) |

3024 | { |

3025 | entry_ptr->t = t; |

3026 | entry_ptr->mode = mode; |

3027 | entry_ptr->speed = speed; |

3028 | entry_ptr->alg = best_alg->op[best_alg->ops]; |

3029 | entry_ptr->cost.cost = best_cost.cost; |

3030 | entry_ptr->cost.latency = best_cost.latency; |

3031 | } |

3032 | |

3033 | /* If we are getting a too long sequence for `struct algorithm' |

3034 | to record, make this search fail. */ |

3035 | if (best_alg->ops == MAX_BITS_PER_WORD) |

3036 | return; |

3037 | |

3038 | /* Copy the algorithm from temporary space to the space at alg_out. |

3039 | We avoid using structure assignment because the majority of |

3040 | best_alg is normally undefined, and this is a critical function. */ |

3041 | alg_out->ops = best_alg->ops + 1; |

3042 | alg_out->cost = best_cost; |

3043 | memcpy (alg_out->op, best_alg->op, |

3044 | alg_out->ops * sizeof *alg_out->op); |

3045 | memcpy (alg_out->log, best_alg->log, |

3046 | alg_out->ops * sizeof *alg_out->log); |

3047 | } |

3048 | |

3049 | /* Find the cheapest way of multiplying a value of mode MODE by VAL. |

3050 | Try three variations: |

3051 | |

3052 | - a shift/add sequence based on VAL itself |

3053 | - a shift/add sequence based on -VAL, followed by a negation |

3054 | - a shift/add sequence based on VAL - 1, followed by an addition. |

3055 | |

3056 | Return true if the cheapest of these cost less than MULT_COST, |

3057 | describing the algorithm in *ALG and final fixup in *VARIANT. */ |

3058 | |

3059 | bool |

3060 | choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, |

3061 | struct algorithm *alg, enum mult_variant *variant, |

3062 | int mult_cost) |

3063 | { |

3064 | struct algorithm alg2; |

3065 | struct mult_cost limit; |

3066 | int op_cost; |

3067 | bool speed = optimize_insn_for_speed_p (); |

3068 | |

3069 | /* Fail quickly for impossible bounds. */ |

3070 | if (mult_cost < 0) |

3071 | return false; |

3072 | |

3073 | /* Ensure that mult_cost provides a reasonable upper bound. |

3074 | Any constant multiplication can be performed with less |

3075 | than 2 * bits additions. */ |

3076 | op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode); |

3077 | if (mult_cost > op_cost) |

3078 | mult_cost = op_cost; |

3079 | |

3080 | *variant = basic_variant; |

3081 | limit.cost = mult_cost; |

3082 | limit.latency = mult_cost; |

3083 | synth_mult (alg, val, &limit, mode); |

3084 | |

3085 | /* This works only if the inverted value actually fits in an |

3086 | `unsigned int' */ |

3087 | if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode)) |

3088 | { |

3089 | op_cost = neg_cost (speed, mode); |

3090 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |

3091 | { |

3092 | limit.cost = alg->cost.cost - op_cost; |

3093 | limit.latency = alg->cost.latency - op_cost; |

3094 | } |

3095 | else |

3096 | { |

3097 | limit.cost = mult_cost - op_cost; |

3098 | limit.latency = mult_cost - op_cost; |

3099 | } |

3100 | |

3101 | synth_mult (&alg2, -val, &limit, mode); |

3102 | alg2.cost.cost += op_cost; |

3103 | alg2.cost.latency += op_cost; |

3104 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |

3105 | *alg = alg2, *variant = negate_variant; |

3106 | } |

3107 | |

3108 | /* This proves very useful for division-by-constant. */ |

3109 | op_cost = add_cost (speed, mode); |

3110 | if (MULT_COST_LESS (&alg->cost, mult_cost)) |

3111 | { |

3112 | limit.cost = alg->cost.cost - op_cost; |

3113 | limit.latency = alg->cost.latency - op_cost; |

3114 | } |

3115 | else |

3116 | { |

3117 | limit.cost = mult_cost - op_cost; |

3118 | limit.latency = mult_cost - op_cost; |

3119 | } |

3120 | |

3121 | synth_mult (&alg2, val - 1, &limit, mode); |

3122 | alg2.cost.cost += op_cost; |

3123 | alg2.cost.latency += op_cost; |

3124 | if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |

3125 | *alg = alg2, *variant = add_variant; |

3126 | |

3127 | return MULT_COST_LESS (&alg->cost, mult_cost); |

3128 | } |

3129 | |

3130 | /* A subroutine of expand_mult, used for constant multiplications. |

3131 | Multiply OP0 by VAL in mode MODE, storing the result in TARGET if |

3132 | convenient. Use the shift/add sequence described by ALG and apply |

3133 | the final fixup specified by VARIANT. */ |

3134 | |

3135 | static rtx |

3136 | expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val, |

3137 | rtx target, const struct algorithm *alg, |

3138 | enum mult_variant variant) |

3139 | { |

3140 | unsigned HOST_WIDE_INT val_so_far; |

3141 | rtx_insn *insn; |

3142 | rtx accum, tem; |

3143 | int opno; |

3144 | machine_mode nmode; |

3145 | |

3146 | /* Avoid referencing memory over and over and invalid sharing |

3147 | on SUBREGs. */ |

3148 | op0 = force_reg (mode, op0); |

3149 | |

3150 | /* ACCUM starts out either as OP0 or as a zero, depending on |

3151 | the first operation. */ |

3152 | |

3153 | if (alg->op[0] == alg_zero) |

3154 | { |

3155 | accum = copy_to_mode_reg (mode, CONST0_RTX (mode)); |

3156 | val_so_far = 0; |

3157 | } |

3158 | else if (alg->op[0] == alg_m) |

3159 | { |

3160 | accum = copy_to_mode_reg (mode, op0); |

3161 | val_so_far = 1; |

3162 | } |

3163 | else |

3164 | gcc_unreachable (); |

3165 | |

3166 | for (opno = 1; opno < alg->ops; opno++) |

3167 | { |

3168 | int log = alg->log[opno]; |

3169 | rtx shift_subtarget = optimize ? 0 : accum; |

3170 | rtx add_target |

3171 | = (opno == alg->ops - 1 && target != 0 && variant != add_variant |

3172 | && !optimize) |

3173 | ? target : 0; |

3174 | rtx accum_target = optimize ? 0 : accum; |

3175 | rtx accum_inner; |

3176 | |

3177 | switch (alg->op[opno]) |

3178 | { |

3179 | case alg_shift: |

3180 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |

3181 | /* REG_EQUAL note will be attached to the following insn. */ |

3182 | emit_move_insn (accum, tem); |

3183 | val_so_far <<= log; |

3184 | break; |

3185 | |

3186 | case alg_add_t_m2: |

3187 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |

3188 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |

3189 | add_target ? add_target : accum_target); |

3190 | val_so_far += HOST_WIDE_INT_1U << log; |

3191 | break; |

3192 | |

3193 | case alg_sub_t_m2: |

3194 | tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0); |

3195 | accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |

3196 | add_target ? add_target : accum_target); |

3197 | val_so_far -= HOST_WIDE_INT_1U << log; |

3198 | break; |

3199 | |

3200 | case alg_add_t2_m: |

3201 | accum = expand_shift (LSHIFT_EXPR, mode, accum, |

3202 | log, shift_subtarget, 0); |

3203 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |

3204 | add_target ? add_target : accum_target); |

3205 | val_so_far = (val_so_far << log) + 1; |

3206 | break; |

3207 | |

3208 | case alg_sub_t2_m: |

3209 | accum = expand_shift (LSHIFT_EXPR, mode, accum, |

3210 | log, shift_subtarget, 0); |

3211 | accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |

3212 | add_target ? add_target : accum_target); |

3213 | val_so_far = (val_so_far << log) - 1; |

3214 | break; |

3215 | |

3216 | case alg_add_factor: |

3217 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |

3218 | accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |

3219 | add_target ? add_target : accum_target); |

3220 | val_so_far += val_so_far << log; |

3221 | break; |

3222 | |

3223 | case alg_sub_factor: |

3224 | tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0); |

3225 | accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |

3226 | (add_target |

3227 | ? add_target : (optimize ? 0 : tem))); |

3228 | val_so_far = (val_so_far << log) - val_so_far; |

3229 | break; |

3230 | |

3231 | default: |

3232 | gcc_unreachable (); |

3233 | } |

3234 | |

3235 | if (SCALAR_INT_MODE_P (mode)) |

3236 | { |

3237 | /* Write a REG_EQUAL note on the last insn so that we can cse |

3238 | multiplication sequences. Note that if ACCUM is a SUBREG, |

3239 | we've set the inner register and must properly indicate that. */ |

3240 | tem = op0, nmode = mode; |

3241 | accum_inner = accum; |

3242 | if (GET_CODE (accum) == SUBREG) |

3243 | { |

3244 | accum_inner = SUBREG_REG (accum); |

3245 | nmode = GET_MODE (accum_inner); |

3246 | tem = gen_lowpart (nmode, op0); |

3247 | } |

3248 | |

3249 | insn = get_last_insn (); |

3250 | set_dst_reg_note (insn, REG_EQUAL, |

3251 | gen_rtx_MULT (nmode, tem, |

3252 | gen_int_mode (val_so_far, nmode)), |

3253 | accum_inner); |

3254 | } |

3255 | } |

3256 | |

3257 | if (variant == negate_variant) |

3258 | { |

3259 | val_so_far = -val_so_far; |

3260 | accum = expand_unop (mode, neg_optab, accum, target, 0); |

3261 | } |

3262 | else if (variant == add_variant) |

3263 | { |

3264 | val_so_far = val_so_far + 1; |

3265 | accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target); |

3266 | } |

3267 | |

3268 | /* Compare only the bits of val and val_so_far that are significant |

3269 | in the result mode, to avoid sign-/zero-extension confusion. */ |

3270 | nmode = GET_MODE_INNER (mode); |

3271 | val &= GET_MODE_MASK (nmode); |

3272 | val_so_far &= GET_MODE_MASK (nmode); |

3273 | gcc_assert (val == (HOST_WIDE_INT) val_so_far); |

3274 | |

3275 | return accum; |

3276 | } |

3277 | |

3278 | /* Perform a multiplication and return an rtx for the result. |

3279 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |

3280 | TARGET is a suggestion for where to store the result (an rtx). |

3281 | |

3282 | We check specially for a constant integer as OP1. |

3283 | If you want this check for OP0 as well, then before calling |

3284 | you should swap the two operands if OP0 would be constant. */ |

3285 | |

3286 | rtx |

3287 | expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |

3288 | int unsignedp, bool no_libcall) |

3289 | { |

3290 | enum mult_variant variant; |

3291 | struct algorithm algorithm; |

3292 | rtx scalar_op1; |

3293 | int max_cost; |

3294 | bool speed = optimize_insn_for_speed_p (); |

3295 | bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp; |

3296 | |

3297 | if (CONSTANT_P (op0)) |

3298 | std::swap (op0, op1); |

3299 | |

3300 | /* For vectors, there are several simplifications that can be made if |

3301 | all elements of the vector constant are identical. */ |

3302 | scalar_op1 = unwrap_const_vec_duplicate (op1); |

3303 | |

3304 | if (INTEGRAL_MODE_P (mode)) |

3305 | { |

3306 | rtx fake_reg; |

3307 | HOST_WIDE_INT coeff; |

3308 | bool is_neg; |

3309 | int mode_bitsize; |

3310 | |

3311 | if (op1 == CONST0_RTX (mode)) |

3312 | return op1; |

3313 | if (op1 == CONST1_RTX (mode)) |

3314 | return op0; |

3315 | if (op1 == CONSTM1_RTX (mode)) |

3316 | return expand_unop (mode, do_trapv ? negv_optab : neg_optab, |

3317 | op0, target, 0); |

3318 | |

3319 | if (do_trapv) |

3320 | goto skip_synth; |

3321 | |

3322 | /* If mode is integer vector mode, check if the backend supports |

3323 | vector lshift (by scalar or vector) at all. If not, we can't use |

3324 | synthetized multiply. */ |

3325 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |

3326 | && optab_handler (vashl_optab, mode) == CODE_FOR_nothing |

3327 | && optab_handler (ashl_optab, mode) == CODE_FOR_nothing) |

3328 | goto skip_synth; |

3329 | |

3330 | /* These are the operations that are potentially turned into |

3331 | a sequence of shifts and additions. */ |

3332 | mode_bitsize = GET_MODE_UNIT_BITSIZE (mode); |

3333 | |

3334 | /* synth_mult does an `unsigned int' multiply. As long as the mode is |

3335 | less than or equal in size to `unsigned int' this doesn't matter. |

3336 | If the mode is larger than `unsigned int', then synth_mult works |

3337 | only if the constant value exactly fits in an `unsigned int' without |

3338 | any truncation. This means that multiplying by negative values does |

3339 | not work; results are off by 2^32 on a 32 bit machine. */ |

3340 | if (CONST_INT_P (scalar_op1)) |

3341 | { |

3342 | coeff = INTVAL (scalar_op1); |

3343 | is_neg = coeff < 0; |

3344 | } |

3345 | #if TARGET_SUPPORTS_WIDE_INT |

3346 | else if (CONST_WIDE_INT_P (scalar_op1)) |

3347 | #else |

3348 | else if (CONST_DOUBLE_AS_INT_P (scalar_op1)) |

3349 | #endif |

3350 | { |

3351 | int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode)); |

3352 | /* Perfect power of 2 (other than 1, which is handled above). */ |

3353 | if (shift > 0) |

3354 | return expand_shift (LSHIFT_EXPR, mode, op0, |

3355 | shift, target, unsignedp); |

3356 | else |

3357 | goto skip_synth; |

3358 | } |

3359 | else |

3360 | goto skip_synth; |

3361 | |

3362 | /* We used to test optimize here, on the grounds that it's better to |

3363 | produce a smaller program when -O is not used. But this causes |

3364 | such a terrible slowdown sometimes that it seems better to always |

3365 | use synth_mult. */ |

3366 | |

3367 | /* Special case powers of two. */ |

3368 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) |

3369 | && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) |

3370 | return expand_shift (LSHIFT_EXPR, mode, op0, |

3371 | floor_log2 (coeff), target, unsignedp); |

3372 | |

3373 | fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |

3374 | |

3375 | /* Attempt to handle multiplication of DImode values by negative |

3376 | coefficients, by performing the multiplication by a positive |

3377 | multiplier and then inverting the result. */ |

3378 | if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) |

3379 | { |

3380 | /* Its safe to use -coeff even for INT_MIN, as the |

3381 | result is interpreted as an unsigned coefficient. |

3382 | Exclude cost of op0 from max_cost to match the cost |

3383 | calculation of the synth_mult. */ |

3384 | coeff = -(unsigned HOST_WIDE_INT) coeff; |

3385 | max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), |

3386 | mode, speed) |

3387 | - neg_cost (speed, mode)); |

3388 | if (max_cost <= 0) |

3389 | goto skip_synth; |

3390 | |

3391 | /* Special case powers of two. */ |

3392 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |

3393 | { |

3394 | rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, |

3395 | floor_log2 (coeff), target, unsignedp); |

3396 | return expand_unop (mode, neg_optab, temp, target, 0); |

3397 | } |

3398 | |

3399 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |

3400 | max_cost)) |

3401 | { |

3402 | rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, |

3403 | &algorithm, variant); |

3404 | return expand_unop (mode, neg_optab, temp, target, 0); |

3405 | } |

3406 | goto skip_synth; |

3407 | } |

3408 | |

3409 | /* Exclude cost of op0 from max_cost to match the cost |

3410 | calculation of the synth_mult. */ |

3411 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed); |

3412 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |

3413 | return expand_mult_const (mode, op0, coeff, target, |

3414 | &algorithm, variant); |

3415 | } |

3416 | skip_synth: |

3417 | |

3418 | /* Expand x*2.0 as x+x. */ |

3419 | if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1) |

3420 | && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2)) |

3421 | { |

3422 | op0 = force_reg (GET_MODE (op0), op0); |

3423 | return expand_binop (mode, add_optab, op0, op0, |

3424 | target, unsignedp, |

3425 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |

3426 | } |

3427 | |

3428 | /* This used to use umul_optab if unsigned, but for non-widening multiply |

3429 | there is no difference between signed and unsigned. */ |

3430 | op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab, |

3431 | op0, op1, target, unsignedp, |

3432 | no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN); |

3433 | gcc_assert (op0 || no_libcall); |

3434 | return op0; |

3435 | } |

3436 | |

3437 | /* Return a cost estimate for multiplying a register by the given |

3438 | COEFFicient in the given MODE and SPEED. */ |

3439 | |

3440 | int |

3441 | mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed) |

3442 | { |

3443 | int max_cost; |

3444 | struct algorithm algorithm; |

3445 | enum mult_variant variant; |

3446 | |

3447 | rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1); |

3448 | max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), |

3449 | mode, speed); |

3450 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost)) |

3451 | return algorithm.cost.cost; |

3452 | else |

3453 | return max_cost; |

3454 | } |

3455 | |

3456 | /* Perform a widening multiplication and return an rtx for the result. |

3457 | MODE is mode of value; OP0 and OP1 are what to multiply (rtx's); |

3458 | TARGET is a suggestion for where to store the result (an rtx). |

3459 | THIS_OPTAB is the optab we should use, it must be either umul_widen_optab |

3460 | or smul_widen_optab. |

3461 | |

3462 | We check specially for a constant integer as OP1, comparing the |

3463 | cost of a widening multiply against the cost of a sequence of shifts |

3464 | and adds. */ |

3465 | |

3466 | rtx |

3467 | expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target, |

3468 | int unsignedp, optab this_optab) |

3469 | { |

3470 | bool speed = optimize_insn_for_speed_p (); |

3471 | rtx cop1; |

3472 | |

3473 | if (CONST_INT_P (op1) |

3474 | && GET_MODE (op0) != VOIDmode |

3475 | && (cop1 = convert_modes (mode, GET_MODE (op0), op1, |

3476 | this_optab == umul_widen_optab)) |

3477 | && CONST_INT_P (cop1) |

3478 | && (INTVAL (cop1) >= 0 |

3479 | || HWI_COMPUTABLE_MODE_P (mode))) |

3480 | { |

3481 | HOST_WIDE_INT coeff = INTVAL (cop1); |

3482 | int max_cost; |

3483 | enum mult_variant variant; |

3484 | struct algorithm algorithm; |

3485 | |

3486 | if (coeff == 0) |

3487 | return CONST0_RTX (mode); |

3488 | |

3489 | /* Special case powers of two. */ |

3490 | if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) |

3491 | { |

3492 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |

3493 | return expand_shift (LSHIFT_EXPR, mode, op0, |

3494 | floor_log2 (coeff), target, unsignedp); |

3495 | } |

3496 | |

3497 | /* Exclude cost of op0 from max_cost to match the cost |

3498 | calculation of the synth_mult. */ |

3499 | max_cost = mul_widen_cost (speed, mode); |

3500 | if (choose_mult_variant (mode, coeff, &algorithm, &variant, |

3501 | max_cost)) |

3502 | { |

3503 | op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab); |

3504 | return expand_mult_const (mode, op0, coeff, target, |

3505 | &algorithm, variant); |

3506 | } |

3507 | } |

3508 | return expand_binop (mode, this_optab, op0, op1, target, |

3509 | unsignedp, OPTAB_LIB_WIDEN); |

3510 | } |

3511 | |

3512 | /* Choose a minimal N + 1 bit approximation to 1/D that can be used to |

3513 | replace division by D, and put the least significant N bits of the result |

3514 | in *MULTIPLIER_PTR and return the most significant bit. |

3515 | |

3516 | The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the |

3517 | needed precision is in PRECISION (should be <= N). |

3518 | |

3519 | PRECISION should be as small as possible so this function can choose |

3520 | multiplier more freely. |

3521 | |

3522 | The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that |

3523 | is to be used for a final right shift is placed in *POST_SHIFT_PTR. |

3524 | |

3525 | Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), |

3526 | where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ |

3527 | |

3528 | unsigned HOST_WIDE_INT |

3529 | choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, |

3530 | unsigned HOST_WIDE_INT *multiplier_ptr, |

3531 | int *post_shift_ptr, int *lgup_ptr) |

3532 | { |

3533 | int lgup, post_shift; |

3534 | int pow, pow2; |

3535 | |

3536 | /* lgup = ceil(log2(divisor)); */ |

3537 | lgup = ceil_log2 (d); |

3538 | |

3539 | gcc_assert (lgup <= n); |

3540 | |

3541 | pow = n + lgup; |

3542 | pow2 = n + lgup - precision; |

3543 | |

3544 | /* mlow = 2^(N + lgup)/d */ |

3545 | wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT); |

3546 | wide_int mlow = wi::udiv_trunc (val, d); |

3547 | |

3548 | /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */ |

3549 | val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT); |

3550 | wide_int mhigh = wi::udiv_trunc (val, d); |

3551 | |

3552 | /* If precision == N, then mlow, mhigh exceed 2^N |

3553 | (but they do not exceed 2^(N+1)). */ |

3554 | |

3555 | /* Reduce to lowest terms. */ |

3556 | for (post_shift = lgup; post_shift > 0; post_shift--) |

3557 | { |

3558 | unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1, |

3559 | HOST_BITS_PER_WIDE_INT); |

3560 | unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1, |

3561 | HOST_BITS_PER_WIDE_INT); |

3562 | if (ml_lo >= mh_lo) |

3563 | break; |

3564 | |

3565 | mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT); |

3566 | mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT); |

3567 | } |

3568 | |

3569 | *post_shift_ptr = post_shift; |

3570 | *lgup_ptr = lgup; |

3571 | if (n < HOST_BITS_PER_WIDE_INT) |

3572 | { |

3573 | unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1; |

3574 | *multiplier_ptr = mhigh.to_uhwi () & mask; |

3575 | return mhigh.to_uhwi () >= mask; |

3576 | } |

3577 | else |

3578 | { |

3579 | *multiplier_ptr = mhigh.to_uhwi (); |

3580 | return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1); |

3581 | } |

3582 | } |

3583 | |

3584 | /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is |

3585 | congruent to 1 (mod 2**N). */ |

3586 | |

3587 | static unsigned HOST_WIDE_INT |

3588 | invert_mod2n (unsigned HOST_WIDE_INT x, int n) |

3589 | { |

3590 | /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */ |

3591 | |

3592 | /* The algorithm notes that the choice y = x satisfies |

3593 | x*y == 1 mod 2^3, since x is assumed odd. |

3594 | Each iteration doubles the number of bits of significance in y. */ |

3595 | |

3596 | unsigned HOST_WIDE_INT mask; |

3597 | unsigned HOST_WIDE_INT y = x; |

3598 | int nbit = 3; |

3599 | |

3600 | mask = (n == HOST_BITS_PER_WIDE_INT |

3601 | ? HOST_WIDE_INT_M1U |

3602 | : (HOST_WIDE_INT_1U << n) - 1); |

3603 | |

3604 | while (nbit < n) |

3605 | { |

3606 | y = y * (2 - x*y) & mask; /* Modulo 2^N */ |

3607 | nbit *= 2; |

3608 | } |

3609 | return y; |

3610 | } |

3611 | |

3612 | /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness |

3613 | flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the |

3614 | product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product |

3615 | to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to |

3616 | become signed. |

3617 | |

3618 | The result is put in TARGET if that is convenient. |

3619 | |

3620 | MODE is the mode of operation. */ |

3621 | |

3622 | rtx |

3623 | expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0, |

3624 | rtx op1, rtx target, int unsignedp) |

3625 | { |

3626 | rtx tem; |

3627 | enum rtx_code adj_code = unsignedp ? PLUS : MINUS; |

3628 | |

3629 | tem = expand_shift (RSHIFT_EXPR, mode, op0, |

3630 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |

3631 | tem = expand_and (mode, tem, op1, NULL_RTX); |

3632 | adj_operand |

3633 | = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |

3634 | adj_operand); |

3635 | |

3636 | tem = expand_shift (RSHIFT_EXPR, mode, op1, |

3637 | GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0); |

3638 | tem = expand_and (mode, tem, op0, NULL_RTX); |

3639 | target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem), |

3640 | target); |

3641 | |

3642 | return target; |

3643 | } |

3644 | |

3645 | /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */ |

3646 | |

3647 | static rtx |

3648 | extract_high_half (scalar_int_mode mode, rtx op) |

3649 | { |

3650 | if (mode == word_mode) |

3651 | return gen_highpart (mode, op); |

3652 | |

3653 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |

3654 | |

3655 | op = expand_shift (RSHIFT_EXPR, wider_mode, op, |

3656 | GET_MODE_BITSIZE (mode), 0, 1); |

3657 | return convert_modes (mode, wider_mode, op, 0); |

3658 | } |

3659 | |

3660 | /* Like expmed_mult_highpart, but only consider using a multiplication |

3661 | optab. OP1 is an rtx for the constant operand. */ |

3662 | |

3663 | static rtx |

3664 | expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1, |

3665 | rtx target, int unsignedp, int max_cost) |

3666 | { |

3667 | rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode); |

3668 | optab moptab; |

3669 | rtx tem; |

3670 | int size; |

3671 | bool speed = optimize_insn_for_speed_p (); |

3672 | |

3673 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |

3674 | |

3675 | size = GET_MODE_BITSIZE (mode); |

3676 | |

3677 | /* Firstly, try using a multiplication insn that only generates the needed |

3678 | high part of the product, and in the sign flavor of unsignedp. */ |

3679 | if (mul_highpart_cost (speed, mode) < max_cost) |

3680 | { |

3681 | moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab; |

3682 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |

3683 | unsignedp, OPTAB_DIRECT); |

3684 | if (tem) |

3685 | return tem; |

3686 | } |

3687 | |

3688 | /* Secondly, same as above, but use sign flavor opposite of unsignedp. |

3689 | Need to adjust the result after the multiplication. */ |

3690 | if (size - 1 < BITS_PER_WORD |

3691 | && (mul_highpart_cost (speed, mode) |

3692 | + 2 * shift_cost (speed, mode, size-1) |

3693 | + 4 * add_cost (speed, mode) < max_cost)) |

3694 | { |

3695 | moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab; |

3696 | tem = expand_binop (mode, moptab, op0, narrow_op1, target, |

3697 | unsignedp, OPTAB_DIRECT); |

3698 | if (tem) |

3699 | /* We used the wrong signedness. Adjust the result. */ |

3700 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |

3701 | tem, unsignedp); |

3702 | } |

3703 | |

3704 | /* Try widening multiplication. */ |

3705 | moptab = unsignedp ? umul_widen_optab : smul_widen_optab; |

3706 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |

3707 | && mul_widen_cost (speed, wider_mode) < max_cost) |

3708 | { |

3709 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0, |

3710 | unsignedp, OPTAB_WIDEN); |

3711 | if (tem) |

3712 | return extract_high_half (mode, tem); |

3713 | } |

3714 | |

3715 | /* Try widening the mode and perform a non-widening multiplication. */ |

3716 | if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing |

3717 | && size - 1 < BITS_PER_WORD |

3718 | && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1) |

3719 | < max_cost)) |

3720 | { |

3721 | rtx_insn *insns; |

3722 | rtx wop0, wop1; |

3723 | |

3724 | /* We need to widen the operands, for example to ensure the |

3725 | constant multiplier is correctly sign or zero extended. |

3726 | Use a sequence to clean-up any instructions emitted by |

3727 | the conversions if things don't work out. */ |

3728 | start_sequence (); |

3729 | wop0 = convert_modes (wider_mode, mode, op0, unsignedp); |

3730 | wop1 = convert_modes (wider_mode, mode, op1, unsignedp); |

3731 | tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0, |

3732 | unsignedp, OPTAB_WIDEN); |

3733 | insns = get_insns (); |

3734 | end_sequence (); |

3735 | |

3736 | if (tem) |

3737 | { |

3738 | emit_insn (insns); |

3739 | return extract_high_half (mode, tem); |

3740 | } |

3741 | } |

3742 | |

3743 | /* Try widening multiplication of opposite signedness, and adjust. */ |

3744 | moptab = unsignedp ? smul_widen_optab : umul_widen_optab; |

3745 | if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing |

3746 | && size - 1 < BITS_PER_WORD |

3747 | && (mul_widen_cost (speed, wider_mode) |

3748 | + 2 * shift_cost (speed, mode, size-1) |

3749 | + 4 * add_cost (speed, mode) < max_cost)) |

3750 | { |

3751 | tem = expand_binop (wider_mode, moptab, op0, narrow_op1, |

3752 | NULL_RTX, ! unsignedp, OPTAB_WIDEN); |

3753 | if (tem != 0) |

3754 | { |

3755 | tem = extract_high_half (mode, tem); |

3756 | /* We used the wrong signedness. Adjust the result. */ |

3757 | return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1, |

3758 | target, unsignedp); |

3759 | } |

3760 | } |

3761 | |

3762 | return 0; |

3763 | } |

3764 | |

3765 | /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant), |

3766 | putting the high half of the result in TARGET if that is convenient, |

3767 | and return where the result is. If the operation can not be performed, |

3768 | 0 is returned. |

3769 | |

3770 | MODE is the mode of operation and result. |

3771 | |

3772 | UNSIGNEDP nonzero means unsigned multiply. |

3773 | |

3774 | MAX_COST is the total allowed cost for the expanded RTL. */ |

3775 | |

3776 | static rtx |

3777 | expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1, |

3778 | rtx target, int unsignedp, int max_cost) |

3779 | { |

3780 | unsigned HOST_WIDE_INT cnst1; |

3781 | int extra_cost; |

3782 | bool sign_adjust = false; |

3783 | enum mult_variant variant; |

3784 | struct algorithm alg; |

3785 | rtx tem; |

3786 | bool speed = optimize_insn_for_speed_p (); |

3787 | |

3788 | /* We can't support modes wider than HOST_BITS_PER_INT. */ |

3789 | gcc_assert (HWI_COMPUTABLE_MODE_P (mode)); |

3790 | |

3791 | cnst1 = INTVAL (op1) & GET_MODE_MASK (mode); |

3792 | |

3793 | /* We can't optimize modes wider than BITS_PER_WORD. |

3794 | ??? We might be able to perform double-word arithmetic if |

3795 | mode == word_mode, however all the cost calculations in |

3796 | synth_mult etc. assume single-word operations. */ |

3797 | scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require (); |

3798 | if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD) |

3799 | return expmed_mult_highpart_optab (mode, op0, op1, target, |

3800< |