1 | /* Global, SSA-based optimizations using mathematical identities. |
---|---|

2 | Copyright (C) 2005-2017 Free Software Foundation, Inc. |

3 | |

4 | This file is part of GCC. |

5 | |

6 | GCC is free software; you can redistribute it and/or modify it |

7 | under the terms of the GNU General Public License as published by the |

8 | Free Software Foundation; either version 3, or (at your option) any |

9 | later version. |

10 | |

11 | GCC is distributed in the hope that it will be useful, but WITHOUT |

12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |

13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |

14 | for more details. |

15 | |

16 | You should have received a copy of the GNU General Public License |

17 | along with GCC; see the file COPYING3. If not see |

18 | <http://www.gnu.org/licenses/>. */ |

19 | |

20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal |

21 | operations. These are common in sequences such as this one: |

22 | |

23 | modulus = sqrt(x*x + y*y + z*z); |

24 | x = x / modulus; |

25 | y = y / modulus; |

26 | z = z / modulus; |

27 | |

28 | that can be optimized to |

29 | |

30 | modulus = sqrt(x*x + y*y + z*z); |

31 | rmodulus = 1.0 / modulus; |

32 | x = x * rmodulus; |

33 | y = y * rmodulus; |

34 | z = z * rmodulus; |

35 | |

36 | We do this for loop invariant divisors, and with this pass whenever |

37 | we notice that a division has the same divisor multiple times. |

38 | |

39 | Of course, like in PRE, we don't insert a division if a dominator |

40 | already has one. However, this cannot be done as an extension of |

41 | PRE for several reasons. |

42 | |

43 | First of all, with some experiments it was found out that the |

44 | transformation is not always useful if there are only two divisions |

45 | by the same divisor. This is probably because modern processors |

46 | can pipeline the divisions; on older, in-order processors it should |

47 | still be effective to optimize two divisions by the same number. |

48 | We make this a param, and it shall be called N in the remainder of |

49 | this comment. |

50 | |

51 | Second, if trapping math is active, we have less freedom on where |

52 | to insert divisions: we can only do so in basic blocks that already |

53 | contain one. (If divisions don't trap, instead, we can insert |

54 | divisions elsewhere, which will be in blocks that are common dominators |

55 | of those that have the division). |

56 | |

57 | We really don't want to compute the reciprocal unless a division will |

58 | be found. To do this, we won't insert the division in a basic block |

59 | that has less than N divisions *post-dominating* it. |

60 | |

61 | The algorithm constructs a subset of the dominator tree, holding the |

62 | blocks containing the divisions and the common dominators to them, |

63 | and walk it twice. The first walk is in post-order, and it annotates |

64 | each block with the number of divisions that post-dominate it: this |

65 | gives information on where divisions can be inserted profitably. |

66 | The second walk is in pre-order, and it inserts divisions as explained |

67 | above, and replaces divisions by multiplications. |

68 | |

69 | In the best case, the cost of the pass is O(n_statements). In the |

70 | worst-case, the cost is due to creating the dominator tree subset, |

71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen |

72 | for n_statements / n_basic_blocks statements. So, the amortized cost |

73 | of creating the dominator tree subset is O(n_basic_blocks) and the |

74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). |

75 | |

76 | More practically, the cost will be small because there are few |

77 | divisions, and they tend to be in the same basic block, so insert_bb |

78 | is called very few times. |

79 | |

80 | If we did this using domwalk.c, an efficient implementation would have |

81 | to work on all the variables in a single pass, because we could not |

82 | work on just a subset of the dominator tree, as we do now, and the |

83 | cost would also be something like O(n_statements * n_basic_blocks). |

84 | The data structures would be more complex in order to work on all the |

85 | variables in a single pass. */ |

86 | |

87 | #include "config.h" |

88 | #include "system.h" |

89 | #include "coretypes.h" |

90 | #include "backend.h" |

91 | #include "target.h" |

92 | #include "rtl.h" |

93 | #include "tree.h" |

94 | #include "gimple.h" |

95 | #include "predict.h" |

96 | #include "alloc-pool.h" |

97 | #include "tree-pass.h" |

98 | #include "ssa.h" |

99 | #include "optabs-tree.h" |

100 | #include "gimple-pretty-print.h" |

101 | #include "alias.h" |

102 | #include "fold-const.h" |

103 | #include "gimple-fold.h" |

104 | #include "gimple-iterator.h" |

105 | #include "gimplify.h" |

106 | #include "gimplify-me.h" |

107 | #include "stor-layout.h" |

108 | #include "tree-cfg.h" |

109 | #include "tree-dfa.h" |

110 | #include "tree-ssa.h" |

111 | #include "builtins.h" |

112 | #include "params.h" |

113 | #include "internal-fn.h" |

114 | #include "case-cfn-macros.h" |

115 | #include "optabs-libfuncs.h" |

116 | #include "tree-eh.h" |

117 | #include "targhooks.h" |

118 | |

119 | /* This structure represents one basic block that either computes a |

120 | division, or is a common dominator for basic block that compute a |

121 | division. */ |

122 | struct occurrence { |

123 | /* The basic block represented by this structure. */ |

124 | basic_block bb; |

125 | |

126 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal |

127 | inserted in BB. */ |

128 | tree recip_def; |

129 | |

130 | /* If non-NULL, the SSA_NAME holding the definition for a squared |

131 | reciprocal inserted in BB. */ |

132 | tree square_recip_def; |

133 | |

134 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |

135 | was inserted in BB. */ |

136 | gimple *recip_def_stmt; |

137 | |

138 | /* Pointer to a list of "struct occurrence"s for blocks dominated |

139 | by BB. */ |

140 | struct occurrence *children; |

141 | |

142 | /* Pointer to the next "struct occurrence"s in the list of blocks |

143 | sharing a common dominator. */ |

144 | struct occurrence *next; |

145 | |

146 | /* The number of divisions that are in BB before compute_merit. The |

147 | number of divisions that are in BB or post-dominate it after |

148 | compute_merit. */ |

149 | int num_divisions; |

150 | |

151 | /* True if the basic block has a division, false if it is a common |

152 | dominator for basic blocks that do. If it is false and trapping |

153 | math is active, BB is not a candidate for inserting a reciprocal. */ |

154 | bool bb_has_division; |

155 | }; |

156 | |

157 | static struct |

158 | { |

159 | /* Number of 1.0/X ops inserted. */ |

160 | int rdivs_inserted; |

161 | |

162 | /* Number of 1.0/FUNC ops inserted. */ |

163 | int rfuncs_inserted; |

164 | } reciprocal_stats; |

165 | |

166 | static struct |

167 | { |

168 | /* Number of cexpi calls inserted. */ |

169 | int inserted; |

170 | } sincos_stats; |

171 | |

172 | static struct |

173 | { |

174 | /* Number of widening multiplication ops inserted. */ |

175 | int widen_mults_inserted; |

176 | |

177 | /* Number of integer multiply-and-accumulate ops inserted. */ |

178 | int maccs_inserted; |

179 | |

180 | /* Number of fp fused multiply-add ops inserted. */ |

181 | int fmas_inserted; |

182 | |

183 | /* Number of divmod calls inserted. */ |

184 | int divmod_calls_inserted; |

185 | } widen_mul_stats; |

186 | |

187 | /* The instance of "struct occurrence" representing the highest |

188 | interesting block in the dominator tree. */ |

189 | static struct occurrence *occ_head; |

190 | |

191 | /* Allocation pool for getting instances of "struct occurrence". */ |

192 | static object_allocator<occurrence> *occ_pool; |

193 | |

194 | |

195 | |

196 | /* Allocate and return a new struct occurrence for basic block BB, and |

197 | whose children list is headed by CHILDREN. */ |

198 | static struct occurrence * |

199 | occ_new (basic_block bb, struct occurrence *children) |

200 | { |

201 | struct occurrence *occ; |

202 | |

203 | bb->aux = occ = occ_pool->allocate (); |

204 | memset (occ, 0, sizeof (struct occurrence)); |

205 | |

206 | occ->bb = bb; |

207 | occ->children = children; |

208 | return occ; |

209 | } |

210 | |

211 | |

212 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a |

213 | list of "struct occurrence"s, one per basic block, having IDOM as |

214 | their common dominator. |

215 | |

216 | We try to insert NEW_OCC as deep as possible in the tree, and we also |

217 | insert any other block that is a common dominator for BB and one |

218 | block already in the tree. */ |

219 | |

220 | static void |

221 | insert_bb (struct occurrence *new_occ, basic_block idom, |

222 | struct occurrence **p_head) |

223 | { |

224 | struct occurrence *occ, **p_occ; |

225 | |

226 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |

227 | { |

228 | basic_block bb = new_occ->bb, occ_bb = occ->bb; |

229 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); |

230 | if (dom == bb) |

231 | { |

232 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC |

233 | from its list. */ |

234 | *p_occ = occ->next; |

235 | occ->next = new_occ->children; |

236 | new_occ->children = occ; |

237 | |

238 | /* Try the next block (it may as well be dominated by BB). */ |

239 | } |

240 | |

241 | else if (dom == occ_bb) |

242 | { |

243 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ |

244 | insert_bb (new_occ, dom, &occ->children); |

245 | return; |

246 | } |

247 | |

248 | else if (dom != idom) |

249 | { |

250 | gcc_assert (!dom->aux); |

251 | |

252 | /* There is a dominator between IDOM and BB, add it and make |

253 | two children out of NEW_OCC and OCC. First, remove OCC from |

254 | its list. */ |

255 | *p_occ = occ->next; |

256 | new_occ->next = occ; |

257 | occ->next = NULL; |

258 | |

259 | /* None of the previous blocks has DOM as a dominator: if we tail |

260 | recursed, we would reexamine them uselessly. Just switch BB with |

261 | DOM, and go on looking for blocks dominated by DOM. */ |

262 | new_occ = occ_new (dom, new_occ); |

263 | } |

264 | |

265 | else |

266 | { |

267 | /* Nothing special, go on with the next element. */ |

268 | p_occ = &occ->next; |

269 | } |

270 | } |

271 | |

272 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ |

273 | new_occ->next = *p_head; |

274 | *p_head = new_occ; |

275 | } |

276 | |

277 | /* Register that we found a division in BB. |

278 | IMPORTANCE is a measure of how much weighting to give |

279 | that division. Use IMPORTANCE = 2 to register a single |

280 | division. If the division is going to be found multiple |

281 | times use 1 (as it is with squares). */ |

282 | |

283 | static inline void |

284 | register_division_in (basic_block bb, int importance) |

285 | { |

286 | struct occurrence *occ; |

287 | |

288 | occ = (struct occurrence *) bb->aux; |

289 | if (!occ) |

290 | { |

291 | occ = occ_new (bb, NULL); |

292 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |

293 | } |

294 | |

295 | occ->bb_has_division = true; |

296 | occ->num_divisions += importance; |

297 | } |

298 | |

299 | |

300 | /* Compute the number of divisions that postdominate each block in OCC and |

301 | its children. */ |

302 | |

303 | static void |

304 | compute_merit (struct occurrence *occ) |

305 | { |

306 | struct occurrence *occ_child; |

307 | basic_block dom = occ->bb; |

308 | |

309 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |

310 | { |

311 | basic_block bb; |

312 | if (occ_child->children) |

313 | compute_merit (occ_child); |

314 | |

315 | if (flag_exceptions) |

316 | bb = single_noncomplex_succ (dom); |

317 | else |

318 | bb = dom; |

319 | |

320 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) |

321 | occ->num_divisions += occ_child->num_divisions; |

322 | } |

323 | } |

324 | |

325 | |

326 | /* Return whether USE_STMT is a floating-point division by DEF. */ |

327 | static inline bool |

328 | is_division_by (gimple *use_stmt, tree def) |

329 | { |

330 | return is_gimple_assign (use_stmt) |

331 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR |

332 | && gimple_assign_rhs2 (use_stmt) == def |

333 | /* Do not recognize x / x as valid division, as we are getting |

334 | confused later by replacing all immediate uses x in such |

335 | a stmt. */ |

336 | && gimple_assign_rhs1 (use_stmt) != def; |

337 | } |

338 | |

339 | /* Return whether USE_STMT is DEF * DEF. */ |

340 | static inline bool |

341 | is_square_of (gimple *use_stmt, tree def) |

342 | { |

343 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN |

344 | && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) |

345 | { |

346 | tree op0 = gimple_assign_rhs1 (use_stmt); |

347 | tree op1 = gimple_assign_rhs2 (use_stmt); |

348 | |

349 | return op0 == op1 && op0 == def; |

350 | } |

351 | return 0; |

352 | } |

353 | |

354 | /* Return whether USE_STMT is a floating-point division by |

355 | DEF * DEF. */ |

356 | static inline bool |

357 | is_division_by_square (gimple *use_stmt, tree def) |

358 | { |

359 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN |

360 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR |

361 | && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)) |

362 | { |

363 | tree denominator = gimple_assign_rhs2 (use_stmt); |

364 | if (TREE_CODE (denominator) == SSA_NAME) |

365 | { |

366 | return is_square_of (SSA_NAME_DEF_STMT (denominator), def); |

367 | } |

368 | } |

369 | return 0; |

370 | } |

371 | |

372 | /* Walk the subset of the dominator tree rooted at OCC, setting the |

373 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in |

374 | the given basic block. The field may be left NULL, of course, |

375 | if it is not possible or profitable to do the optimization. |

376 | |

377 | DEF_BSI is an iterator pointing at the statement defining DEF. |

378 | If RECIP_DEF is set, a dominator already has a computation that can |

379 | be used. |

380 | |

381 | If should_insert_square_recip is set, then this also inserts |

382 | the square of the reciprocal immediately after the definition |

383 | of the reciprocal. */ |

384 | |

385 | static void |

386 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |

387 | tree def, tree recip_def, tree square_recip_def, |

388 | int should_insert_square_recip, int threshold) |

389 | { |

390 | tree type; |

391 | gassign *new_stmt, *new_square_stmt; |

392 | gimple_stmt_iterator gsi; |

393 | struct occurrence *occ_child; |

394 | |

395 | if (!recip_def |

396 | && (occ->bb_has_division || !flag_trapping_math) |

397 | /* Divide by two as all divisions are counted twice in |

398 | the costing loop. */ |

399 | && occ->num_divisions / 2 >= threshold) |

400 | { |

401 | /* Make a variable with the replacement and substitute it. */ |

402 | type = TREE_TYPE (def); |

403 | recip_def = create_tmp_reg (type, "reciptmp"); |

404 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |

405 | build_one_cst (type), def); |

406 | |

407 | if (should_insert_square_recip) |

408 | { |

409 | square_recip_def = create_tmp_reg (type, "powmult_reciptmp"); |

410 | new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, |

411 | recip_def, recip_def); |

412 | } |

413 | |

414 | if (occ->bb_has_division) |

415 | { |

416 | /* Case 1: insert before an existing division. */ |

417 | gsi = gsi_after_labels (occ->bb); |

418 | while (!gsi_end_p (gsi) |

419 | && (!is_division_by (gsi_stmt (gsi), def)) |

420 | && (!is_division_by_square (gsi_stmt (gsi), def))) |

421 | gsi_next (&gsi); |

422 | |

423 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |

424 | } |

425 | else if (def_gsi && occ->bb == def_gsi->bb) |

426 | { |

427 | /* Case 2: insert right after the definition. Note that this will |

428 | never happen if the definition statement can throw, because in |

429 | that case the sole successor of the statement's basic block will |

430 | dominate all the uses as well. */ |

431 | gsi = *def_gsi; |

432 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |

433 | } |

434 | else |

435 | { |

436 | /* Case 3: insert in a basic block not containing defs/uses. */ |

437 | gsi = gsi_after_labels (occ->bb); |

438 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |

439 | } |

440 | |

441 | /* Regardless of which case the reciprocal as inserted in, |

442 | we insert the square immediately after the reciprocal. */ |

443 | if (should_insert_square_recip) |

444 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |

445 | |

446 | reciprocal_stats.rdivs_inserted++; |

447 | |

448 | occ->recip_def_stmt = new_stmt; |

449 | } |

450 | |

451 | occ->recip_def = recip_def; |

452 | occ->square_recip_def = square_recip_def; |

453 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |

454 | insert_reciprocals (def_gsi, occ_child, def, recip_def, |

455 | square_recip_def, should_insert_square_recip, |

456 | threshold); |

457 | } |

458 | |

459 | /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). |

460 | Take as argument the use for (x * x). */ |

461 | static inline void |

462 | replace_reciprocal_squares (use_operand_p use_p) |

463 | { |

464 | gimple *use_stmt = USE_STMT (use_p); |

465 | basic_block bb = gimple_bb (use_stmt); |

466 | struct occurrence *occ = (struct occurrence *) bb->aux; |

467 | |

468 | if (optimize_bb_for_speed_p (bb) && occ->square_recip_def |

469 | && occ->recip_def) |

470 | { |

471 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |

472 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |

473 | gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def); |

474 | SET_USE (use_p, occ->square_recip_def); |

475 | fold_stmt_inplace (&gsi); |

476 | update_stmt (use_stmt); |

477 | } |

478 | } |

479 | |

480 | |

481 | /* Replace the division at USE_P with a multiplication by the reciprocal, if |

482 | possible. */ |

483 | |

484 | static inline void |

485 | replace_reciprocal (use_operand_p use_p) |

486 | { |

487 | gimple *use_stmt = USE_STMT (use_p); |

488 | basic_block bb = gimple_bb (use_stmt); |

489 | struct occurrence *occ = (struct occurrence *) bb->aux; |

490 | |

491 | if (optimize_bb_for_speed_p (bb) |

492 | && occ->recip_def && use_stmt != occ->recip_def_stmt) |

493 | { |

494 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |

495 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |

496 | SET_USE (use_p, occ->recip_def); |

497 | fold_stmt_inplace (&gsi); |

498 | update_stmt (use_stmt); |

499 | } |

500 | } |

501 | |

502 | |

503 | /* Free OCC and return one more "struct occurrence" to be freed. */ |

504 | |

505 | static struct occurrence * |

506 | free_bb (struct occurrence *occ) |

507 | { |

508 | struct occurrence *child, *next; |

509 | |

510 | /* First get the two pointers hanging off OCC. */ |

511 | next = occ->next; |

512 | child = occ->children; |

513 | occ->bb->aux = NULL; |

514 | occ_pool->remove (occ); |

515 | |

516 | /* Now ensure that we don't recurse unless it is necessary. */ |

517 | if (!child) |

518 | return next; |

519 | else |

520 | { |

521 | while (next) |

522 | next = free_bb (next); |

523 | |

524 | return child; |

525 | } |

526 | } |

527 | |

528 | |

529 | /* Look for floating-point divisions among DEF's uses, and try to |

530 | replace them by multiplications with the reciprocal. Add |

531 | as many statements computing the reciprocal as needed. |

532 | |

533 | DEF must be a GIMPLE register of a floating-point type. */ |

534 | |

535 | static void |

536 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |

537 | { |

538 | use_operand_p use_p, square_use_p; |

539 | imm_use_iterator use_iter, square_use_iter; |

540 | tree square_def; |

541 | struct occurrence *occ; |

542 | int count = 0; |

543 | int threshold; |

544 | int square_recip_count = 0; |

545 | int sqrt_recip_count = 0; |

546 | |

547 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |

548 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); |

549 | |

550 | /* If this is a square (x * x), we should check whether there are any |

551 | enough divisions by x on it's own to warrant waiting for that pass. */ |

552 | if (TREE_CODE (def) == SSA_NAME) |

553 | { |

554 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |

555 | |

556 | if (is_gimple_assign (def_stmt) |

557 | && gimple_assign_rhs_code (def_stmt) == MULT_EXPR |

558 | && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt)) |

559 | { |

560 | /* This statement is a square of something. We should take this |

561 | in to account, as it may be more profitable to not extract |

562 | the reciprocal here. */ |

563 | tree op0 = gimple_assign_rhs1 (def_stmt); |

564 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) |

565 | { |

566 | gimple *use_stmt = USE_STMT (use_p); |

567 | if (is_division_by (use_stmt, op0)) |

568 | sqrt_recip_count ++; |

569 | } |

570 | } |

571 | } |

572 | |

573 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) |

574 | { |

575 | gimple *use_stmt = USE_STMT (use_p); |

576 | if (is_division_by (use_stmt, def)) |

577 | { |

578 | register_division_in (gimple_bb (use_stmt), 2); |

579 | count++; |

580 | } |

581 | |

582 | if (is_square_of (use_stmt, def)) |

583 | { |

584 | square_def = gimple_assign_lhs (use_stmt); |

585 | FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) |

586 | { |

587 | gimple *square_use_stmt = USE_STMT (square_use_p); |

588 | if (is_division_by (square_use_stmt, square_def)) |

589 | { |

590 | /* Halve the relative importance as this is called twice |

591 | for each division by a square. */ |

592 | register_division_in (gimple_bb (square_use_stmt), 1); |

593 | square_recip_count ++; |

594 | } |

595 | } |

596 | } |

597 | } |

598 | |

599 | /* Square reciprocals will have been counted twice. */ |

600 | square_recip_count /= 2; |

601 | |

602 | if (sqrt_recip_count > square_recip_count) |

603 | /* It will be more profitable to extract a 1 / x expression later, |

604 | so it is not worth attempting to extract 1 / (x * x) now. */ |

605 | return; |

606 | |

607 | /* Do the expensive part only if we can hope to optimize something. */ |

608 | if (count + square_recip_count >= threshold |

609 | && count >= 1) |

610 | { |

611 | gimple *use_stmt; |

612 | for (occ = occ_head; occ; occ = occ->next) |

613 | { |

614 | compute_merit (occ); |

615 | insert_reciprocals (def_gsi, occ, def, NULL, NULL, |

616 | square_recip_count, threshold); |

617 | } |

618 | |

619 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |

620 | { |

621 | if (is_division_by (use_stmt, def)) |

622 | { |

623 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |

624 | replace_reciprocal (use_p); |

625 | } |

626 | else if (square_recip_count > 0 |

627 | && is_square_of (use_stmt, def)) |

628 | { |

629 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |

630 | { |

631 | /* Find all uses of the square that are divisions and |

632 | * replace them by multiplications with the inverse. */ |

633 | imm_use_iterator square_iterator; |

634 | gimple *powmult_use_stmt = USE_STMT (use_p); |

635 | tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt); |

636 | |

637 | FOR_EACH_IMM_USE_STMT (powmult_use_stmt, |

638 | square_iterator, powmult_def_name) |

639 | FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) |

640 | { |

641 | gimple *powmult_use_stmt = USE_STMT (square_use_p); |

642 | if (is_division_by (powmult_use_stmt, powmult_def_name)) |

643 | replace_reciprocal_squares (square_use_p); |

644 | } |

645 | } |

646 | } |

647 | } |

648 | } |

649 | |

650 | for (occ = occ_head; occ; ) |

651 | occ = free_bb (occ); |

652 | |

653 | occ_head = NULL; |

654 | } |

655 | |

656 | /* Return an internal function that implements the reciprocal of CALL, |

657 | or IFN_LAST if there is no such function that the target supports. */ |

658 | |

659 | internal_fn |

660 | internal_fn_reciprocal (gcall *call) |

661 | { |

662 | internal_fn ifn; |

663 | |

664 | switch (gimple_call_combined_fn (call)) |

665 | { |

666 | CASE_CFN_SQRT: |

667 | CASE_CFN_SQRT_FN: |

668 | ifn = IFN_RSQRT; |

669 | break; |

670 | |

671 | default: |

672 | return IFN_LAST; |

673 | } |

674 | |

675 | tree_pair types = direct_internal_fn_types (ifn, call); |

676 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) |

677 | return IFN_LAST; |

678 | |

679 | return ifn; |

680 | } |

681 | |

682 | /* Go through all the floating-point SSA_NAMEs, and call |

683 | execute_cse_reciprocals_1 on each of them. */ |

684 | namespace { |

685 | |

686 | const pass_data pass_data_cse_reciprocals = |

687 | { |

688 | GIMPLE_PASS, /* type */ |

689 | "recip", /* name */ |

690 | OPTGROUP_NONE, /* optinfo_flags */ |

691 | TV_TREE_RECIP, /* tv_id */ |

692 | PROP_ssa, /* properties_required */ |

693 | 0, /* properties_provided */ |

694 | 0, /* properties_destroyed */ |

695 | 0, /* todo_flags_start */ |

696 | TODO_update_ssa, /* todo_flags_finish */ |

697 | }; |

698 | |

699 | class pass_cse_reciprocals : public gimple_opt_pass |

700 | { |

701 | public: |

702 | pass_cse_reciprocals (gcc::context *ctxt) |

703 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) |

704 | {} |

705 | |

706 | /* opt_pass methods: */ |

707 | virtual bool gate (function *) { return optimize && flag_reciprocal_math; } |

708 | virtual unsigned int execute (function *); |

709 | |

710 | }; // class pass_cse_reciprocals |

711 | |

712 | unsigned int |

713 | pass_cse_reciprocals::execute (function *fun) |

714 | { |

715 | basic_block bb; |

716 | tree arg; |

717 | |

718 | occ_pool = new object_allocator<occurrence> ("dominators for recip"); |

719 | |

720 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |

721 | calculate_dominance_info (CDI_DOMINATORS); |

722 | calculate_dominance_info (CDI_POST_DOMINATORS); |

723 | |

724 | if (flag_checking) |

725 | FOR_EACH_BB_FN (bb, fun) |

726 | gcc_assert (!bb->aux); |

727 | |

728 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |

729 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |

730 | && is_gimple_reg (arg)) |

731 | { |

732 | tree name = ssa_default_def (fun, arg); |

733 | if (name) |

734 | execute_cse_reciprocals_1 (NULL, name); |

735 | } |

736 | |

737 | FOR_EACH_BB_FN (bb, fun) |

738 | { |

739 | tree def; |

740 | |

741 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |

742 | gsi_next (&gsi)) |

743 | { |

744 | gphi *phi = gsi.phi (); |

745 | def = PHI_RESULT (phi); |

746 | if (! virtual_operand_p (def) |

747 | && FLOAT_TYPE_P (TREE_TYPE (def))) |

748 | execute_cse_reciprocals_1 (NULL, def); |

749 | } |

750 | |

751 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |

752 | gsi_next (&gsi)) |

753 | { |

754 | gimple *stmt = gsi_stmt (gsi); |

755 | |

756 | if (gimple_has_lhs (stmt) |

757 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |

758 | && FLOAT_TYPE_P (TREE_TYPE (def)) |

759 | && TREE_CODE (def) == SSA_NAME) |

760 | execute_cse_reciprocals_1 (&gsi, def); |

761 | } |

762 | |

763 | if (optimize_bb_for_size_p (bb)) |

764 | continue; |

765 | |

766 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |

767 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |

768 | gsi_next (&gsi)) |

769 | { |

770 | gimple *stmt = gsi_stmt (gsi); |

771 | |

772 | if (is_gimple_assign (stmt) |

773 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) |

774 | { |

775 | tree arg1 = gimple_assign_rhs2 (stmt); |

776 | gimple *stmt1; |

777 | |

778 | if (TREE_CODE (arg1) != SSA_NAME) |

779 | continue; |

780 | |

781 | stmt1 = SSA_NAME_DEF_STMT (arg1); |

782 | |

783 | if (is_gimple_call (stmt1) |

784 | && gimple_call_lhs (stmt1)) |

785 | { |

786 | bool fail; |

787 | imm_use_iterator ui; |

788 | use_operand_p use_p; |

789 | tree fndecl = NULL_TREE; |

790 | |

791 | gcall *call = as_a <gcall *> (stmt1); |

792 | internal_fn ifn = internal_fn_reciprocal (call); |

793 | if (ifn == IFN_LAST) |

794 | { |

795 | fndecl = gimple_call_fndecl (call); |

796 | if (!fndecl |

797 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD) |

798 | continue; |

799 | fndecl = targetm.builtin_reciprocal (fndecl); |

800 | if (!fndecl) |

801 | continue; |

802 | } |

803 | |

804 | /* Check that all uses of the SSA name are divisions, |

805 | otherwise replacing the defining statement will do |

806 | the wrong thing. */ |

807 | fail = false; |

808 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) |

809 | { |

810 | gimple *stmt2 = USE_STMT (use_p); |

811 | if (is_gimple_debug (stmt2)) |

812 | continue; |

813 | if (!is_gimple_assign (stmt2) |

814 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR |

815 | || gimple_assign_rhs1 (stmt2) == arg1 |

816 | || gimple_assign_rhs2 (stmt2) != arg1) |

817 | { |

818 | fail = true; |

819 | break; |

820 | } |

821 | } |

822 | if (fail) |

823 | continue; |

824 | |

825 | gimple_replace_ssa_lhs (call, arg1); |

826 | if (gimple_call_internal_p (call) != (ifn != IFN_LAST)) |

827 | { |

828 | auto_vec<tree, 4> args; |

829 | for (unsigned int i = 0; |

830 | i < gimple_call_num_args (call); i++) |

831 | args.safe_push (gimple_call_arg (call, i)); |

832 | gcall *stmt2; |

833 | if (ifn == IFN_LAST) |

834 | stmt2 = gimple_build_call_vec (fndecl, args); |

835 | else |

836 | stmt2 = gimple_build_call_internal_vec (ifn, args); |

837 | gimple_call_set_lhs (stmt2, arg1); |

838 | if (gimple_vdef (call)) |

839 | { |

840 | gimple_set_vdef (stmt2, gimple_vdef (call)); |

841 | SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; |

842 | } |

843 | gimple_call_set_nothrow (stmt2, |

844 | gimple_call_nothrow_p (call)); |

845 | gimple_set_vuse (stmt2, gimple_vuse (call)); |

846 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |

847 | gsi_replace (&gsi2, stmt2, true); |

848 | } |

849 | else |

850 | { |

851 | if (ifn == IFN_LAST) |

852 | gimple_call_set_fndecl (call, fndecl); |

853 | else |

854 | gimple_call_set_internal_fn (call, ifn); |

855 | update_stmt (call); |

856 | } |

857 | reciprocal_stats.rfuncs_inserted++; |

858 | |

859 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |

860 | { |

861 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |

862 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |

863 | fold_stmt_inplace (&gsi); |

864 | update_stmt (stmt); |

865 | } |

866 | } |

867 | } |

868 | } |

869 | } |

870 | |

871 | statistics_counter_event (fun, "reciprocal divs inserted", |

872 | reciprocal_stats.rdivs_inserted); |

873 | statistics_counter_event (fun, "reciprocal functions inserted", |

874 | reciprocal_stats.rfuncs_inserted); |

875 | |

876 | free_dominance_info (CDI_DOMINATORS); |

877 | free_dominance_info (CDI_POST_DOMINATORS); |

878 | delete occ_pool; |

879 | return 0; |

880 | } |

881 | |

882 | } // anon namespace |

883 | |

884 | gimple_opt_pass * |

885 | make_pass_cse_reciprocals (gcc::context *ctxt) |

886 | { |

887 | return new pass_cse_reciprocals (ctxt); |

888 | } |

889 | |

890 | /* Records an occurrence at statement USE_STMT in the vector of trees |

891 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |

892 | is not yet initialized. Returns true if the occurrence was pushed on |

893 | the vector. Adjusts *TOP_BB to be the basic block dominating all |

894 | statements in the vector. */ |

895 | |

896 | static bool |

897 | maybe_record_sincos (vec<gimple *> *stmts, |

898 | basic_block *top_bb, gimple *use_stmt) |

899 | { |

900 | basic_block use_bb = gimple_bb (use_stmt); |

901 | if (*top_bb |

902 | && (*top_bb == use_bb |

903 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) |

904 | stmts->safe_push (use_stmt); |

905 | else if (!*top_bb |

906 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) |

907 | { |

908 | stmts->safe_push (use_stmt); |

909 | *top_bb = use_bb; |

910 | } |

911 | else |

912 | return false; |

913 | |

914 | return true; |

915 | } |

916 | |

917 | /* Look for sin, cos and cexpi calls with the same argument NAME and |

918 | create a single call to cexpi CSEing the result in this case. |

919 | We first walk over all immediate uses of the argument collecting |

920 | statements that we can CSE in a vector and in a second pass replace |

921 | the statement rhs with a REALPART or IMAGPART expression on the |

922 | result of the cexpi call we insert before the use statement that |

923 | dominates all other candidates. */ |

924 | |

925 | static bool |

926 | execute_cse_sincos_1 (tree name) |

927 | { |

928 | gimple_stmt_iterator gsi; |

929 | imm_use_iterator use_iter; |

930 | tree fndecl, res, type; |

931 | gimple *def_stmt, *use_stmt, *stmt; |

932 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |

933 | auto_vec<gimple *> stmts; |

934 | basic_block top_bb = NULL; |

935 | int i; |

936 | bool cfg_changed = false; |

937 | |

938 | type = TREE_TYPE (name); |

939 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) |

940 | { |

941 | if (gimple_code (use_stmt) != GIMPLE_CALL |

942 | || !gimple_call_lhs (use_stmt)) |

943 | continue; |

944 | |

945 | switch (gimple_call_combined_fn (use_stmt)) |

946 | { |

947 | CASE_CFN_COS: |

948 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |

949 | break; |

950 | |

951 | CASE_CFN_SIN: |

952 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |

953 | break; |

954 | |

955 | CASE_CFN_CEXPI: |

956 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |

957 | break; |

958 | |

959 | default:; |

960 | } |

961 | } |

962 | |

963 | if (seen_cos + seen_sin + seen_cexpi <= 1) |

964 | return false; |

965 | |

966 | /* Simply insert cexpi at the beginning of top_bb but not earlier than |

967 | the name def statement. */ |

968 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); |

969 | if (!fndecl) |

970 | return false; |

971 | stmt = gimple_build_call (fndecl, 1, name); |

972 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); |

973 | gimple_call_set_lhs (stmt, res); |

974 | |

975 | def_stmt = SSA_NAME_DEF_STMT (name); |

976 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |

977 | && gimple_code (def_stmt) != GIMPLE_PHI |

978 | && gimple_bb (def_stmt) == top_bb) |

979 | { |

980 | gsi = gsi_for_stmt (def_stmt); |

981 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |

982 | } |

983 | else |

984 | { |

985 | gsi = gsi_after_labels (top_bb); |

986 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |

987 | } |

988 | sincos_stats.inserted++; |

989 | |

990 | /* And adjust the recorded old call sites. */ |

991 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |

992 | { |

993 | tree rhs = NULL; |

994 | |

995 | switch (gimple_call_combined_fn (use_stmt)) |

996 | { |

997 | CASE_CFN_COS: |

998 | rhs = fold_build1 (REALPART_EXPR, type, res); |

999 | break; |

1000 | |

1001 | CASE_CFN_SIN: |

1002 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |

1003 | break; |

1004 | |

1005 | CASE_CFN_CEXPI: |

1006 | rhs = res; |

1007 | break; |

1008 | |

1009 | default:; |

1010 | gcc_unreachable (); |

1011 | } |

1012 | |

1013 | /* Replace call with a copy. */ |

1014 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); |

1015 | |

1016 | gsi = gsi_for_stmt (use_stmt); |

1017 | gsi_replace (&gsi, stmt, true); |

1018 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) |

1019 | cfg_changed = true; |

1020 | } |

1021 | |

1022 | return cfg_changed; |

1023 | } |

1024 | |

1025 | /* To evaluate powi(x,n), the floating point value x raised to the |

1026 | constant integer exponent n, we use a hybrid algorithm that |

1027 | combines the "window method" with look-up tables. For an |

1028 | introduction to exponentiation algorithms and "addition chains", |

1029 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, |

1030 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", |

1031 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation |

1032 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ |

1033 | |

1034 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of |

1035 | multiplications to inline before calling the system library's pow |

1036 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, |

1037 | so this default never requires calling pow, powf or powl. */ |

1038 | |

1039 | #ifndef POWI_MAX_MULTS |

1040 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) |

1041 | #endif |

1042 | |

1043 | /* The size of the "optimal power tree" lookup table. All |

1044 | exponents less than this value are simply looked up in the |

1045 | powi_table below. This threshold is also used to size the |

1046 | cache of pseudo registers that hold intermediate results. */ |

1047 | #define POWI_TABLE_SIZE 256 |

1048 | |

1049 | /* The size, in bits of the window, used in the "window method" |

1050 | exponentiation algorithm. This is equivalent to a radix of |

1051 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ |

1052 | #define POWI_WINDOW_SIZE 3 |

1053 | |

1054 | /* The following table is an efficient representation of an |

1055 | "optimal power tree". For each value, i, the corresponding |

1056 | value, j, in the table states than an optimal evaluation |

1057 | sequence for calculating pow(x,i) can be found by evaluating |

1058 | pow(x,j)*pow(x,i-j). An optimal power tree for the first |

1059 | 100 integers is given in Knuth's "Seminumerical algorithms". */ |

1060 | |

1061 | static const unsigned char powi_table[POWI_TABLE_SIZE] = |

1062 | { |

1063 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ |

1064 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ |

1065 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ |

1066 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ |

1067 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ |

1068 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ |

1069 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ |

1070 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ |

1071 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ |

1072 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ |

1073 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ |

1074 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ |

1075 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ |

1076 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ |

1077 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ |

1078 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ |

1079 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ |

1080 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ |

1081 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ |

1082 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ |

1083 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ |

1084 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ |

1085 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ |

1086 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ |

1087 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ |

1088 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ |

1089 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ |

1090 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ |

1091 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ |

1092 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ |

1093 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ |

1094 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ |

1095 | }; |

1096 | |

1097 | |

1098 | /* Return the number of multiplications required to calculate |

1099 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a |

1100 | subroutine of powi_cost. CACHE is an array indicating |

1101 | which exponents have already been calculated. */ |

1102 | |

1103 | static int |

1104 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) |

1105 | { |

1106 | /* If we've already calculated this exponent, then this evaluation |

1107 | doesn't require any additional multiplications. */ |

1108 | if (cache[n]) |

1109 | return 0; |

1110 | |

1111 | cache[n] = true; |

1112 | return powi_lookup_cost (n - powi_table[n], cache) |

1113 | + powi_lookup_cost (powi_table[n], cache) + 1; |

1114 | } |

1115 | |

1116 | /* Return the number of multiplications required to calculate |

1117 | powi(x,n) for an arbitrary x, given the exponent N. This |

1118 | function needs to be kept in sync with powi_as_mults below. */ |

1119 | |

1120 | static int |

1121 | powi_cost (HOST_WIDE_INT n) |

1122 | { |

1123 | bool cache[POWI_TABLE_SIZE]; |

1124 | unsigned HOST_WIDE_INT digit; |

1125 | unsigned HOST_WIDE_INT val; |

1126 | int result; |

1127 | |

1128 | if (n == 0) |

1129 | return 0; |

1130 | |

1131 | /* Ignore the reciprocal when calculating the cost. */ |

1132 | val = (n < 0) ? -n : n; |

1133 | |

1134 | /* Initialize the exponent cache. */ |

1135 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); |

1136 | cache[1] = true; |

1137 | |

1138 | result = 0; |

1139 | |

1140 | while (val >= POWI_TABLE_SIZE) |

1141 | { |

1142 | if (val & 1) |

1143 | { |

1144 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); |

1145 | result += powi_lookup_cost (digit, cache) |

1146 | + POWI_WINDOW_SIZE + 1; |

1147 | val >>= POWI_WINDOW_SIZE; |

1148 | } |

1149 | else |

1150 | { |

1151 | val >>= 1; |

1152 | result++; |

1153 | } |

1154 | } |

1155 | |

1156 | return result + powi_lookup_cost (val, cache); |

1157 | } |

1158 | |

1159 | /* Recursive subroutine of powi_as_mults. This function takes the |

1160 | array, CACHE, of already calculated exponents and an exponent N and |

1161 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ |

1162 | |

1163 | static tree |

1164 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, |

1165 | HOST_WIDE_INT n, tree *cache) |

1166 | { |

1167 | tree op0, op1, ssa_target; |

1168 | unsigned HOST_WIDE_INT digit; |

1169 | gassign *mult_stmt; |

1170 | |

1171 | if (n < POWI_TABLE_SIZE && cache[n]) |

1172 | return cache[n]; |

1173 | |

1174 | ssa_target = make_temp_ssa_name (type, NULL, "powmult"); |

1175 | |

1176 | if (n < POWI_TABLE_SIZE) |

1177 | { |

1178 | cache[n] = ssa_target; |

1179 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |

1180 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); |

1181 | } |

1182 | else if (n & 1) |

1183 | { |

1184 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); |

1185 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |

1186 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); |

1187 | } |

1188 | else |

1189 | { |

1190 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |

1191 | op1 = op0; |

1192 | } |

1193 | |

1194 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |

1195 | gimple_set_location (mult_stmt, loc); |

1196 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |

1197 | |

1198 | return ssa_target; |

1199 | } |

1200 | |

1201 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. |

1202 | This function needs to be kept in sync with powi_cost above. */ |

1203 | |

1204 | static tree |

1205 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, |

1206 | tree arg0, HOST_WIDE_INT n) |

1207 | { |

1208 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |

1209 | gassign *div_stmt; |

1210 | tree target; |

1211 | |

1212 | if (n == 0) |

1213 | return build_real (type, dconst1); |

1214 | |

1215 | memset (cache, 0, sizeof (cache)); |

1216 | cache[1] = arg0; |

1217 | |

1218 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |

1219 | if (n >= 0) |

1220 | return result; |

1221 | |

1222 | /* If the original exponent was negative, reciprocate the result. */ |

1223 | target = make_temp_ssa_name (type, NULL, "powmult"); |

1224 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |

1225 | build_real (type, dconst1), result); |

1226 | gimple_set_location (div_stmt, loc); |

1227 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |

1228 | |

1229 | return target; |

1230 | } |

1231 | |

1232 | /* ARG0 and N are the two arguments to a powi builtin in GSI with |

1233 | location info LOC. If the arguments are appropriate, create an |

1234 | equivalent sequence of statements prior to GSI using an optimal |

1235 | number of multiplications, and return an expession holding the |

1236 | result. */ |

1237 | |

1238 | static tree |

1239 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, |

1240 | tree arg0, HOST_WIDE_INT n) |

1241 | { |

1242 | /* Avoid largest negative number. */ |

1243 | if (n != -n |

1244 | && ((n >= -1 && n <= 2) |

1245 | || (optimize_function_for_speed_p (cfun) |

1246 | && powi_cost (n) <= POWI_MAX_MULTS))) |

1247 | return powi_as_mults (gsi, loc, arg0, n); |

1248 | |

1249 | return NULL_TREE; |

1250 | } |

1251 | |

1252 | /* Build a gimple call statement that calls FN with argument ARG. |

1253 | Set the lhs of the call statement to a fresh SSA name. Insert the |

1254 | statement prior to GSI's current position, and return the fresh |

1255 | SSA name. */ |

1256 | |

1257 | static tree |

1258 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |

1259 | tree fn, tree arg) |

1260 | { |

1261 | gcall *call_stmt; |

1262 | tree ssa_target; |

1263 | |

1264 | call_stmt = gimple_build_call (fn, 1, arg); |

1265 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); |

1266 | gimple_set_lhs (call_stmt, ssa_target); |

1267 | gimple_set_location (call_stmt, loc); |

1268 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); |

1269 | |

1270 | return ssa_target; |

1271 | } |

1272 | |

1273 | /* Build a gimple binary operation with the given CODE and arguments |

1274 | ARG0, ARG1, assigning the result to a new SSA name for variable |

1275 | TARGET. Insert the statement prior to GSI's current position, and |

1276 | return the fresh SSA name.*/ |

1277 | |

1278 | static tree |

1279 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, |

1280 | const char *name, enum tree_code code, |

1281 | tree arg0, tree arg1) |

1282 | { |

1283 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |

1284 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |

1285 | gimple_set_location (stmt, loc); |

1286 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |

1287 | return result; |

1288 | } |

1289 | |

1290 | /* Build a gimple reference operation with the given CODE and argument |

1291 | ARG, assigning the result to a new SSA name of TYPE with NAME. |

1292 | Insert the statement prior to GSI's current position, and return |

1293 | the fresh SSA name. */ |

1294 | |

1295 | static inline tree |

1296 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, |

1297 | const char *name, enum tree_code code, tree arg0) |

1298 | { |

1299 | tree result = make_temp_ssa_name (type, NULL, name); |

1300 | gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); |

1301 | gimple_set_location (stmt, loc); |

1302 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |

1303 | return result; |

1304 | } |

1305 | |

1306 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |

1307 | prior to GSI's current position, and return the fresh SSA name. */ |

1308 | |

1309 | static tree |

1310 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, |

1311 | tree type, tree val) |

1312 | { |

1313 | tree result = make_ssa_name (type); |

1314 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |

1315 | gimple_set_location (stmt, loc); |

1316 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |

1317 | return result; |

1318 | } |

1319 | |

1320 | struct pow_synth_sqrt_info |

1321 | { |

1322 | bool *factors; |

1323 | unsigned int deepest; |

1324 | unsigned int num_mults; |

1325 | }; |

1326 | |

1327 | /* Return true iff the real value C can be represented as a |

1328 | sum of powers of 0.5 up to N. That is: |

1329 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. |

1330 | Record in INFO the various parameters of the synthesis algorithm such |

1331 | as the factors a[i], the maximum 0.5 power and the number of |

1332 | multiplications that will be required. */ |

1333 | |

1334 | bool |

1335 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, |

1336 | struct pow_synth_sqrt_info *info) |

1337 | { |

1338 | REAL_VALUE_TYPE factor = dconsthalf; |

1339 | REAL_VALUE_TYPE remainder = c; |

1340 | |

1341 | info->deepest = 0; |

1342 | info->num_mults = 0; |

1343 | memset (info->factors, 0, n * sizeof (bool)); |

1344 | |

1345 | for (unsigned i = 0; i < n; i++) |

1346 | { |

1347 | REAL_VALUE_TYPE res; |

1348 | |

1349 | /* If something inexact happened bail out now. */ |

1350 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |

1351 | return false; |

1352 | |

1353 | /* We have hit zero. The number is representable as a sum |

1354 | of powers of 0.5. */ |

1355 | if (real_equal (&res, &dconst0)) |

1356 | { |

1357 | info->factors[i] = true; |

1358 | info->deepest = i + 1; |

1359 | return true; |

1360 | } |

1361 | else if (!REAL_VALUE_NEGATIVE (res)) |

1362 | { |

1363 | remainder = res; |

1364 | info->factors[i] = true; |

1365 | info->num_mults++; |

1366 | } |

1367 | else |

1368 | info->factors[i] = false; |

1369 | |

1370 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |

1371 | } |

1372 | return false; |

1373 | } |

1374 | |

1375 | /* Return the tree corresponding to FN being applied |

1376 | to ARG N times at GSI and LOC. |

1377 | Look up previous results from CACHE if need be. |

1378 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ |

1379 | |

1380 | static tree |

1381 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, |

1382 | tree fn, location_t loc, tree *cache) |

1383 | { |

1384 | tree res = cache[n]; |

1385 | if (!res) |

1386 | { |

1387 | tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); |

1388 | res = build_and_insert_call (gsi, loc, fn, prev); |

1389 | cache[n] = res; |

1390 | } |

1391 | |

1392 | return res; |

1393 | } |

1394 | |

1395 | /* Print to STREAM the repeated application of function FNAME to ARG |

1396 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: |

1397 | "foo (foo (x))". */ |

1398 | |

1399 | static void |

1400 | print_nested_fn (FILE* stream, const char *fname, const char* arg, |

1401 | unsigned int n) |

1402 | { |

1403 | if (n == 0) |

1404 | fprintf (stream, "%s", arg); |

1405 | else |

1406 | { |

1407 | fprintf (stream, "%s (", fname); |

1408 | print_nested_fn (stream, fname, arg, n - 1); |

1409 | fprintf (stream, ")"); |

1410 | } |

1411 | } |

1412 | |

1413 | /* Print to STREAM the fractional sequence of sqrt chains |

1414 | applied to ARG, described by INFO. Used for the dump file. */ |

1415 | |

1416 | static void |

1417 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, |

1418 | struct pow_synth_sqrt_info *info) |

1419 | { |

1420 | for (unsigned int i = 0; i < info->deepest; i++) |

1421 | { |

1422 | bool is_set = info->factors[i]; |

1423 | if (is_set) |

1424 | { |

1425 | print_nested_fn (stream, "sqrt", arg, i + 1); |

1426 | if (i != info->deepest - 1) |

1427 | fprintf (stream, " * "); |

1428 | } |

1429 | } |

1430 | } |

1431 | |

1432 | /* Print to STREAM a representation of raising ARG to an integer |

1433 | power N. Used for the dump file. */ |

1434 | |

1435 | static void |

1436 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) |

1437 | { |

1438 | if (n > 1) |

1439 | fprintf (stream, "powi (%s, "HOST_WIDE_INT_PRINT_DEC ")", arg, n); |

1440 | else if (n == 1) |

1441 | fprintf (stream, "%s", arg); |

1442 | } |

1443 | |

1444 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of |

1445 | square roots. Place at GSI and LOC. Limit the maximum depth |

1446 | of the sqrt chains to MAX_DEPTH. Return the tree holding the |

1447 | result of the expanded sequence or NULL_TREE if the expansion failed. |

1448 | |

1449 | This routine assumes that ARG1 is a real number with a fractional part |

1450 | (the integer exponent case will have been handled earlier in |

1451 | gimple_expand_builtin_pow). |

1452 | |

1453 | For ARG1 > 0.0: |

1454 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part |

1455 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and |

1456 | FRAC_PART == ARG1 - WHOLE_PART: |

1457 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where |

1458 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains |

1459 | if it can be expressed as such, that is if FRAC_PART satisfies: |

1460 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) |

1461 | where integer a[i] is either 0 or 1. |

1462 | |

1463 | Example: |

1464 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) |

1465 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) |

1466 | |

1467 | For ARG1 < 0.0 there are two approaches: |

1468 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) |

1469 | is calculated as above. |

1470 | |

1471 | Example: |

1472 | POW (x, -5.625) == 1.0 / POW (x, 5.625) |

1473 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) |

1474 | |

1475 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) |

1476 | FRAC_PART := ARG1 - WHOLE_PART |

1477 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). |

1478 | Example: |

1479 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) |

1480 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) |

1481 | |

1482 | For ARG1 < 0.0 we choose between (A) and (B) depending on |

1483 | how many multiplications we'd have to do. |

1484 | So, for the example in (B): POW (x, -5.875), if we were to |

1485 | follow algorithm (A) we would produce: |

1486 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) |

1487 | which contains more multiplications than approach (B). |

1488 | |

1489 | Hopefully, this approach will eliminate potentially expensive POW library |

1490 | calls when unsafe floating point math is enabled and allow the compiler to |

1491 | further optimise the multiplies, square roots and divides produced by this |

1492 | function. */ |

1493 | |

1494 | static tree |

1495 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, |

1496 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) |

1497 | { |

1498 | tree type = TREE_TYPE (arg0); |

1499 | machine_mode mode = TYPE_MODE (type); |

1500 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |

1501 | bool one_over = true; |

1502 | |

1503 | if (!sqrtfn) |

1504 | return NULL_TREE; |

1505 | |

1506 | if (TREE_CODE (arg1) != REAL_CST) |

1507 | return NULL_TREE; |

1508 | |

1509 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); |

1510 | |

1511 | gcc_assert (max_depth > 0); |

1512 | tree *cache = XALLOCAVEC (tree, max_depth + 1); |

1513 | |

1514 | struct pow_synth_sqrt_info synth_info; |

1515 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |

1516 | synth_info.deepest = 0; |

1517 | synth_info.num_mults = 0; |

1518 | |

1519 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); |

1520 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); |

1521 | |

1522 | /* The whole and fractional parts of exp. */ |

1523 | REAL_VALUE_TYPE whole_part; |

1524 | REAL_VALUE_TYPE frac_part; |

1525 | |

1526 | real_floor (&whole_part, mode, &exp); |

1527 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |

1528 | |

1529 | |

1530 | REAL_VALUE_TYPE ceil_whole = dconst0; |

1531 | REAL_VALUE_TYPE ceil_fract = dconst0; |

1532 | |

1533 | if (neg_exp) |

1534 | { |

1535 | real_ceil (&ceil_whole, mode, &exp); |

1536 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |

1537 | } |

1538 | |

1539 | if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) |

1540 | return NULL_TREE; |

1541 | |

1542 | /* Check whether it's more profitable to not use 1.0 / ... */ |

1543 | if (neg_exp) |

1544 | { |

1545 | struct pow_synth_sqrt_info alt_synth_info; |

1546 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |

1547 | alt_synth_info.deepest = 0; |

1548 | alt_synth_info.num_mults = 0; |

1549 | |

1550 | if (representable_as_half_series_p (ceil_fract, max_depth, |

1551 | &alt_synth_info) |

1552 | && alt_synth_info.deepest <= synth_info.deepest |

1553 | && alt_synth_info.num_mults < synth_info.num_mults) |

1554 | { |

1555 | whole_part = ceil_whole; |

1556 | frac_part = ceil_fract; |

1557 | synth_info.deepest = alt_synth_info.deepest; |

1558 | synth_info.num_mults = alt_synth_info.num_mults; |

1559 | memcpy (synth_info.factors, alt_synth_info.factors, |

1560 | (max_depth + 1) * sizeof (bool)); |

1561 | one_over = false; |

1562 | } |

1563 | } |

1564 | |

1565 | HOST_WIDE_INT n = real_to_integer (&whole_part); |

1566 | REAL_VALUE_TYPE cint; |

1567 | real_from_integer (&cint, VOIDmode, n, SIGNED); |

1568 | |

1569 | if (!real_identical (&whole_part, &cint)) |

1570 | return NULL_TREE; |

1571 | |

1572 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) |

1573 | return NULL_TREE; |

1574 | |

1575 | memset (cache, 0, (max_depth + 1) * sizeof (tree)); |

1576 | |

1577 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; |

1578 | |

1579 | /* Calculate the integer part of the exponent. */ |

1580 | if (n > 1) |

1581 | { |

1582 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); |

1583 | if (!integer_res) |

1584 | return NULL_TREE; |

1585 | } |

1586 | |

1587 | if (dump_file) |

1588 | { |

1589 | char string[64]; |

1590 | |

1591 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); |

1592 | fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string); |

1593 | |

1594 | if (neg_exp) |

1595 | { |

1596 | if (one_over) |

1597 | { |

1598 | fprintf (dump_file, "1.0 / ("); |

1599 | dump_integer_part (dump_file, "x", n); |

1600 | if (n > 0) |

1601 | fprintf (dump_file, " * "); |

1602 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); |

1603 | fprintf (dump_file, ")"); |

1604 | } |

1605 | else |

1606 | { |

1607 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); |

1608 | fprintf (dump_file, " / ("); |

1609 | dump_integer_part (dump_file, "x", n); |

1610 | fprintf (dump_file, ")"); |

1611 | } |

1612 | } |

1613 | else |

1614 | { |

1615 | dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); |

1616 | if (n > 0) |

1617 | fprintf (dump_file, " * "); |

1618 | dump_integer_part (dump_file, "x", n); |

1619 | } |

1620 | |

1621 | fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest); |

1622 | } |

1623 | |

1624 | |

1625 | tree fract_res = NULL_TREE; |

1626 | cache[0] = arg0; |

1627 | |

1628 | /* Calculate the fractional part of the exponent. */ |

1629 | for (unsigned i = 0; i < synth_info.deepest; i++) |

1630 | { |

1631 | if (synth_info.factors[i]) |

1632 | { |

1633 | tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); |

1634 | |

1635 | if (!fract_res) |

1636 | fract_res = sqrt_chain; |

1637 | |

1638 | else |

1639 | fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |

1640 | fract_res, sqrt_chain); |

1641 | } |

1642 | } |

1643 | |

1644 | tree res = NULL_TREE; |

1645 | |

1646 | if (neg_exp) |

1647 | { |

1648 | if (one_over) |

1649 | { |

1650 | if (n > 0) |

1651 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |

1652 | fract_res, integer_res); |

1653 | else |

1654 | res = fract_res; |

1655 | |

1656 | res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR, |

1657 | build_real (type, dconst1), res); |

1658 | } |

1659 | else |

1660 | { |

1661 | res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |

1662 | fract_res, integer_res); |

1663 | } |

1664 | } |

1665 | else |

1666 | res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |

1667 | fract_res, integer_res); |

1668 | return res; |

1669 | } |

1670 | |

1671 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |

1672 | with location info LOC. If possible, create an equivalent and |

1673 | less expensive sequence of statements prior to GSI, and return an |

1674 | expession holding the result. */ |

1675 | |

1676 | static tree |

1677 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, |

1678 | tree arg0, tree arg1) |

1679 | { |

1680 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |

1681 | REAL_VALUE_TYPE c2, dconst3; |

1682 | HOST_WIDE_INT n; |

1683 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |

1684 | machine_mode mode; |

1685 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); |

1686 | bool hw_sqrt_exists, c_is_int, c2_is_int; |

1687 | |

1688 | dconst1_4 = dconst1; |

1689 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); |

1690 | |

1691 | /* If the exponent isn't a constant, there's nothing of interest |

1692 | to be done. */ |

1693 | if (TREE_CODE (arg1) != REAL_CST) |

1694 | return NULL_TREE; |

1695 | |

1696 | /* Don't perform the operation if flag_signaling_nans is on |

1697 | and the operand is a signaling NaN. */ |

1698 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) |

1699 | && ((TREE_CODE (arg0) == REAL_CST |

1700 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) |

1701 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |

1702 | return NULL_TREE; |

1703 | |

1704 | /* If the exponent is equivalent to an integer, expand to an optimal |

1705 | multiplication sequence when profitable. */ |

1706 | c = TREE_REAL_CST (arg1); |

1707 | n = real_to_integer (&c); |

1708 | real_from_integer (&cint, VOIDmode, n, SIGNED); |

1709 | c_is_int = real_identical (&c, &cint); |

1710 | |

1711 | if (c_is_int |

1712 | && ((n >= -1 && n <= 2) |

1713 | || (flag_unsafe_math_optimizations |

1714 | && speed_p |

1715 | && powi_cost (n) <= POWI_MAX_MULTS))) |

1716 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); |

1717 | |

1718 | /* Attempt various optimizations using sqrt and cbrt. */ |

1719 | type = TREE_TYPE (arg0); |

1720 | mode = TYPE_MODE (type); |

1721 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |

1722 | |

1723 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe |

1724 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while |

1725 | sqrt(-0) = -0. */ |

1726 | if (sqrtfn |

1727 | && real_equal (&c, &dconsthalf) |

1728 | && !HONOR_SIGNED_ZEROS (mode)) |

1729 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |

1730 | |

1731 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |

1732 | |

1733 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |

1734 | optimizations since 1./3. is not exactly representable. If x |

1735 | is negative and finite, the correct value of pow(x,1./3.) is |

1736 | a NaN with the "invalid" exception raised, because the value |

1737 | of 1./3. actually has an even denominator. The correct value |

1738 | of cbrt(x) is a negative real value. */ |

1739 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); |

1740 | dconst1_3 = real_value_truncate (mode, dconst_third ()); |

1741 | |

1742 | if (flag_unsafe_math_optimizations |

1743 | && cbrtfn |

1744 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |

1745 | && real_equal (&c, &dconst1_3)) |

1746 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |

1747 | |

1748 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization |

1749 | if we don't have a hardware sqrt insn. */ |

1750 | dconst1_6 = dconst1_3; |

1751 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); |

1752 | |

1753 | if (flag_unsafe_math_optimizations |

1754 | && sqrtfn |

1755 | && cbrtfn |

1756 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |

1757 | && speed_p |

1758 | && hw_sqrt_exists |

1759 | && real_equal (&c, &dconst1_6)) |

1760 | { |

1761 | /* sqrt(x) */ |

1762 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |

1763 | |

1764 | /* cbrt(sqrt(x)) */ |

1765 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |

1766 | } |

1767 | |

1768 | |

1769 | /* Attempt to expand the POW as a product of square root chains. |

1770 | Expand the 0.25 case even when otpimising for size. */ |

1771 | if (flag_unsafe_math_optimizations |

1772 | && sqrtfn |

1773 | && hw_sqrt_exists |

1774 | && (speed_p || real_equal (&c, &dconst1_4)) |

1775 | && !HONOR_SIGNED_ZEROS (mode)) |

1776 | { |

1777 | unsigned int max_depth = speed_p |

1778 | ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH) |

1779 | : 2; |

1780 | |

1781 | tree expand_with_sqrts |

1782 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); |

1783 | |

1784 | if (expand_with_sqrts) |

1785 | return expand_with_sqrts; |

1786 | } |

1787 | |

1788 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |

1789 | n = real_to_integer (&c2); |

1790 | real_from_integer (&cint, VOIDmode, n, SIGNED); |

1791 | c2_is_int = real_identical (&c2, &cint); |

1792 | |

1793 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |

1794 | |

1795 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; |

1796 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. |

1797 | |

1798 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is |

1799 | different from pow(x, 1./3.) due to rounding and behavior with |

1800 | negative x, we need to constrain this transformation to unsafe |

1801 | math and positive x or finite math. */ |

1802 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |

1803 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |

1804 | real_round (&c2, mode, &c2); |

1805 | n = real_to_integer (&c2); |

1806 | real_from_integer (&cint, VOIDmode, n, SIGNED); |

1807 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |

1808 | real_convert (&c2, mode, &c2); |

1809 | |

1810 | if (flag_unsafe_math_optimizations |

1811 | && cbrtfn |

1812 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |

1813 | && real_identical (&c2, &c) |

1814 | && !c2_is_int |

1815 | && optimize_function_for_speed_p (cfun) |

1816 | && powi_cost (n / 3) <= POWI_MAX_MULTS) |

1817 | { |

1818 | tree powi_x_ndiv3 = NULL_TREE; |

1819 | |

1820 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not |

1821 | possible or profitable, give up. Skip the degenerate case when |

1822 | abs(n) < 3, where the result is always 1. */ |

1823 | if (absu_hwi (n) >= 3) |

1824 | { |

1825 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, |

1826 | abs_hwi (n / 3)); |

1827 | if (!powi_x_ndiv3) |

1828 | return NULL_TREE; |

1829 | } |

1830 | |

1831 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi |

1832 | as that creates an unnecessary variable. Instead, just produce |

1833 | either cbrt(x) or cbrt(x) * cbrt(x). */ |

1834 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |

1835 | |

1836 | if (absu_hwi (n) % 3 == 1) |

1837 | powi_cbrt_x = cbrt_x; |

1838 | else |

1839 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |

1840 | cbrt_x, cbrt_x); |

1841 | |

1842 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ |

1843 | if (absu_hwi (n) < 3) |

1844 | result = powi_cbrt_x; |

1845 | else |

1846 | result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, |

1847 | powi_x_ndiv3, powi_cbrt_x); |

1848 | |

1849 | /* If n is negative, reciprocate the result. */ |

1850 | if (n < 0) |

1851 | result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, |

1852 | build_real (type, dconst1), result); |

1853 | |

1854 | return result; |

1855 | } |

1856 | |

1857 | /* No optimizations succeeded. */ |

1858 | return NULL_TREE; |

1859 | } |

1860 | |

1861 | /* ARG is the argument to a cabs builtin call in GSI with location info |

1862 | LOC. Create a sequence of statements prior to GSI that calculates |

1863 | sqrt(R*R + I*I), where R and I are the real and imaginary components |

1864 | of ARG, respectively. Return an expression holding the result. */ |

1865 | |

1866 | static tree |

1867 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) |

1868 | { |

1869 | tree real_part, imag_part, addend1, addend2, sum, result; |

1870 | tree type = TREE_TYPE (TREE_TYPE (arg)); |

1871 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |

1872 | machine_mode mode = TYPE_MODE (type); |

1873 | |

1874 | if (!flag_unsafe_math_optimizations |

1875 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) |

1876 | || !sqrtfn |

1877 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) |

1878 | return NULL_TREE; |

1879 | |

1880 | real_part = build_and_insert_ref (gsi, loc, type, "cabs", |

1881 | REALPART_EXPR, arg); |

1882 | addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |

1883 | real_part, real_part); |

1884 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs", |

1885 | IMAGPART_EXPR, arg); |

1886 | addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, |

1887 | imag_part, imag_part); |

1888 | sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); |

1889 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); |

1890 | |

1891 | return result; |

1892 | } |

1893 | |

1894 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |

1895 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |

1896 | an optimal number of multiplies, when n is a constant. */ |

1897 | |

1898 | namespace { |

1899 | |

1900 | const pass_data pass_data_cse_sincos = |

1901 | { |

1902 | GIMPLE_PASS, /* type */ |

1903 | "sincos", /* name */ |

1904 | OPTGROUP_NONE, /* optinfo_flags */ |

1905 | TV_TREE_SINCOS, /* tv_id */ |

1906 | PROP_ssa, /* properties_required */ |

1907 | PROP_gimple_opt_math, /* properties_provided */ |

1908 | 0, /* properties_destroyed */ |

1909 | 0, /* todo_flags_start */ |

1910 | TODO_update_ssa, /* todo_flags_finish */ |

1911 | }; |

1912 | |

1913 | class pass_cse_sincos : public gimple_opt_pass |

1914 | { |

1915 | public: |

1916 | pass_cse_sincos (gcc::context *ctxt) |

1917 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) |

1918 | {} |

1919 | |

1920 | /* opt_pass methods: */ |

1921 | virtual bool gate (function *) |

1922 | { |

1923 | /* We no longer require either sincos or cexp, since powi expansion |

1924 | piggybacks on this pass. */ |

1925 | return optimize; |

1926 | } |

1927 | |

1928 | virtual unsigned int execute (function *); |

1929 | |

1930 | }; // class pass_cse_sincos |

1931 | |

1932 | unsigned int |

1933 | pass_cse_sincos::execute (function *fun) |

1934 | { |

1935 | basic_block bb; |

1936 | bool cfg_changed = false; |

1937 | |

1938 | calculate_dominance_info (CDI_DOMINATORS); |

1939 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |

1940 | |

1941 | FOR_EACH_BB_FN (bb, fun) |

1942 | { |

1943 | gimple_stmt_iterator gsi; |

1944 | bool cleanup_eh = false; |

1945 | |

1946 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |

1947 | { |

1948 | gimple *stmt = gsi_stmt (gsi); |

1949 | |

1950 | /* Only the last stmt in a bb could throw, no need to call |

1951 | gimple_purge_dead_eh_edges if we change something in the middle |

1952 | of a basic block. */ |

1953 | cleanup_eh = false; |

1954 | |

1955 | if (is_gimple_call (stmt) |

1956 | && gimple_call_lhs (stmt)) |

1957 | { |

1958 | tree arg, arg0, arg1, result; |

1959 | HOST_WIDE_INT n; |

1960 | location_t loc; |

1961 | |

1962 | switch (gimple_call_combined_fn (stmt)) |

1963 | { |

1964 | CASE_CFN_COS: |

1965 | CASE_CFN_SIN: |

1966 | CASE_CFN_CEXPI: |

1967 | /* Make sure we have either sincos or cexp. */ |

1968 | if (!targetm.libc_has_function (function_c99_math_complex) |

1969 | && !targetm.libc_has_function (function_sincos)) |

1970 | break; |

1971 | |

1972 | arg = gimple_call_arg (stmt, 0); |

1973 | if (TREE_CODE (arg) == SSA_NAME) |

1974 | cfg_changed |= execute_cse_sincos_1 (arg); |

1975 | break; |

1976 | |

1977 | CASE_CFN_POW: |

1978 | arg0 = gimple_call_arg (stmt, 0); |

1979 | arg1 = gimple_call_arg (stmt, 1); |

1980 | |

1981 | loc = gimple_location (stmt); |

1982 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); |

1983 | |

1984 | if (result) |

1985 | { |

1986 | tree lhs = gimple_get_lhs (stmt); |

1987 | gassign *new_stmt = gimple_build_assign (lhs, result); |

1988 | gimple_set_location (new_stmt, loc); |

1989 | unlink_stmt_vdef (stmt); |

1990 | gsi_replace (&gsi, new_stmt, true); |

1991 | cleanup_eh = true; |

1992 | if (gimple_vdef (stmt)) |

1993 | release_ssa_name (gimple_vdef (stmt)); |

1994 | } |

1995 | break; |

1996 | |

1997 | CASE_CFN_POWI: |

1998 | arg0 = gimple_call_arg (stmt, 0); |

1999 | arg1 = gimple_call_arg (stmt, 1); |

2000 | loc = gimple_location (stmt); |

2001 | |

2002 | if (real_minus_onep (arg0)) |

2003 | { |

2004 | tree t0, t1, cond, one, minus_one; |

2005 | gassign *stmt; |

2006 | |

2007 | t0 = TREE_TYPE (arg0); |

2008 | t1 = TREE_TYPE (arg1); |

2009 | one = build_real (t0, dconst1); |

2010 | minus_one = build_real (t0, dconstm1); |

2011 | |

2012 | cond = make_temp_ssa_name (t1, NULL, "powi_cond"); |

2013 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |

2014 | arg1, build_int_cst (t1, 1)); |

2015 | gimple_set_location (stmt, loc); |

2016 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |

2017 | |

2018 | result = make_temp_ssa_name (t0, NULL, "powi"); |

2019 | stmt = gimple_build_assign (result, COND_EXPR, cond, |

2020 | minus_one, one); |

2021 | gimple_set_location (stmt, loc); |

2022 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |

2023 | } |

2024 | else |

2025 | { |

2026 | if (!tree_fits_shwi_p (arg1)) |

2027 | break; |

2028 | |

2029 | n = tree_to_shwi (arg1); |

2030 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |

2031 | } |

2032 | |

2033 | if (result) |

2034 | { |

2035 | tree lhs = gimple_get_lhs (stmt); |

2036 | gassign *new_stmt = gimple_build_assign (lhs, result); |

2037 | gimple_set_location (new_stmt, loc); |

2038 | unlink_stmt_vdef (stmt); |

2039 | gsi_replace (&gsi, new_stmt, true); |

2040 | cleanup_eh = true; |

2041 | if (gimple_vdef (stmt)) |

2042 | release_ssa_name (gimple_vdef (stmt)); |

2043 | } |

2044 | break; |

2045 | |

2046 | CASE_CFN_CABS: |

2047 | arg0 = gimple_call_arg (stmt, 0); |

2048 | loc = gimple_location (stmt); |

2049 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); |

2050 | |

2051 | if (result) |

2052 | { |

2053 | tree lhs = gimple_get_lhs (stmt); |

2054 | gassign *new_stmt = gimple_build_assign (lhs, result); |

2055 | gimple_set_location (new_stmt, loc); |

2056 | unlink_stmt_vdef (stmt); |

2057 | gsi_replace (&gsi, new_stmt, true); |

2058 | cleanup_eh = true; |

2059 | if (gimple_vdef (stmt)) |

2060 | release_ssa_name (gimple_vdef (stmt)); |

2061 | } |

2062 | break; |

2063 | |

2064 | default:; |

2065 | } |

2066 | } |

2067 | } |

2068 | if (cleanup_eh) |

2069 | cfg_changed |= gimple_purge_dead_eh_edges (bb); |

2070 | } |

2071 | |

2072 | statistics_counter_event (fun, "sincos statements inserted", |

2073 | sincos_stats.inserted); |

2074 | |

2075 | return cfg_changed ? TODO_cleanup_cfg : 0; |

2076 | } |

2077 | |

2078 | } // anon namespace |

2079 | |

2080 | gimple_opt_pass * |

2081 | make_pass_cse_sincos (gcc::context *ctxt) |

2082 | { |

2083 | return new pass_cse_sincos (ctxt); |

2084 | } |

2085 | |

2086 | /* Return true if stmt is a type conversion operation that can be stripped |

2087 | when used in a widening multiply operation. */ |

2088 | static bool |

2089 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |

2090 | { |

2091 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); |

2092 | |

2093 | if (TREE_CODE (result_type) == INTEGER_TYPE) |

2094 | { |

2095 | tree op_type; |

2096 | tree inner_op_type; |

2097 | |

2098 | if (!CONVERT_EXPR_CODE_P (rhs_code)) |

2099 | return false; |

2100 | |

2101 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); |

2102 | |

2103 | /* If the type of OP has the same precision as the result, then |

2104 | we can strip this conversion. The multiply operation will be |

2105 | selected to create the correct extension as a by-product. */ |

2106 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) |

2107 | return true; |

2108 | |

2109 | /* We can also strip a conversion if it preserves the signed-ness of |

2110 | the operation and doesn't narrow the range. */ |

2111 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |

2112 | |

2113 | /* If the inner-most type is unsigned, then we can strip any |

2114 | intermediate widening operation. If it's signed, then the |

2115 | intermediate widening operation must also be signed. */ |

2116 | if ((TYPE_UNSIGNED (inner_op_type) |

2117 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) |

2118 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |

2119 | return true; |

2120 | |

2121 | return false; |

2122 | } |

2123 | |

2124 | return rhs_code == FIXED_CONVERT_EXPR; |

2125 | } |

2126 | |

2127 | /* Return true if RHS is a suitable operand for a widening multiplication, |

2128 | assuming a target type of TYPE. |

2129 | There are two cases: |

2130 | |

2131 | - RHS makes some value at least twice as wide. Store that value |

2132 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. |

2133 | |

2134 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, |

2135 | but leave *TYPE_OUT untouched. */ |

2136 | |

2137 | static bool |

2138 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |

2139 | tree *new_rhs_out) |

2140 | { |

2141 | gimple *stmt; |

2142 | tree type1, rhs1; |

2143 | |

2144 | if (TREE_CODE (rhs) == SSA_NAME) |

2145 | { |

2146 | stmt = SSA_NAME_DEF_STMT (rhs); |

2147 | if (is_gimple_assign (stmt)) |

2148 | { |

2149 | if (! widening_mult_conversion_strippable_p (type, stmt)) |

2150 | rhs1 = rhs; |

2151 | else |

2152 | { |

2153 | rhs1 = gimple_assign_rhs1 (stmt); |

2154 | |

2155 | if (TREE_CODE (rhs1) == INTEGER_CST) |

2156 | { |

2157 | *new_rhs_out = rhs1; |

2158 | *type_out = NULL; |

2159 | return true; |

2160 | } |

2161 | } |

2162 | } |

2163 | else |

2164 | rhs1 = rhs; |

2165 | |

2166 | type1 = TREE_TYPE (rhs1); |

2167 | |

2168 | if (TREE_CODE (type1) != TREE_CODE (type) |

2169 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |

2170 | return false; |

2171 | |

2172 | *new_rhs_out = rhs1; |

2173 | *type_out = type1; |

2174 | return true; |

2175 | } |

2176 | |

2177 | if (TREE_CODE (rhs) == INTEGER_CST) |

2178 | { |

2179 | *new_rhs_out = rhs; |

2180 | *type_out = NULL; |

2181 | return true; |

2182 | } |

2183 | |

2184 | return false; |

2185 | } |

2186 | |

2187 | /* Return true if STMT performs a widening multiplication, assuming the |

2188 | output type is TYPE. If so, store the unwidened types of the operands |

2189 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and |

2190 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT |

2191 | and *TYPE2_OUT would give the operands of the multiplication. */ |

2192 | |

2193 | static bool |

2194 | is_widening_mult_p (gimple *stmt, |

2195 | tree *type1_out, tree *rhs1_out, |

2196 | tree *type2_out, tree *rhs2_out) |

2197 | { |

2198 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |

2199 | |

2200 | if (TREE_CODE (type) != INTEGER_TYPE |

2201 | && TREE_CODE (type) != FIXED_POINT_TYPE) |

2202 | return false; |

2203 | |

2204 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |

2205 | rhs1_out)) |

2206 | return false; |

2207 | |

2208 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |

2209 | rhs2_out)) |

2210 | return false; |

2211 | |

2212 | if (*type1_out == NULL) |

2213 | { |

2214 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |

2215 | return false; |

2216 | *type1_out = *type2_out; |

2217 | } |

2218 | |

2219 | if (*type2_out == NULL) |

2220 | { |

2221 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |

2222 | return false; |

2223 | *type2_out = *type1_out; |

2224 | } |

2225 | |

2226 | /* Ensure that the larger of the two operands comes first. */ |

2227 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) |

2228 | { |

2229 | std::swap (*type1_out, *type2_out); |

2230 | std::swap (*rhs1_out, *rhs2_out); |

2231 | } |

2232 | |

2233 | return true; |

2234 | } |

2235 | |

2236 | /* Check to see if the CALL statement is an invocation of copysign |

2237 | with 1. being the first argument. */ |

2238 | static bool |

2239 | is_copysign_call_with_1 (gimple *call) |

2240 | { |

2241 | gcall *c = dyn_cast <gcall *> (call); |

2242 | if (! c) |

2243 | return false; |

2244 | |

2245 | enum combined_fn code = gimple_call_combined_fn (c); |

2246 | |

2247 | if (code == CFN_LAST) |

2248 | return false; |

2249 | |

2250 | if (builtin_fn_p (code)) |

2251 | { |

2252 | switch (as_builtin_fn (code)) |

2253 | { |

2254 | CASE_FLT_FN (BUILT_IN_COPYSIGN): |

2255 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): |

2256 | return real_onep (gimple_call_arg (c, 0)); |

2257 | default: |

2258 | return false; |

2259 | } |

2260 | } |

2261 | |

2262 | if (internal_fn_p (code)) |

2263 | { |

2264 | switch (as_internal_fn (code)) |

2265 | { |

2266 | case IFN_COPYSIGN: |

2267 | return real_onep (gimple_call_arg (c, 0)); |

2268 | default: |

2269 | return false; |

2270 | } |

2271 | } |

2272 | |

2273 | return false; |

2274 | } |

2275 | |

2276 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). |

2277 | This only happens when the the xorsign optab is defined, if the |

2278 | pattern is not a xorsign pattern or if expansion fails FALSE is |

2279 | returned, otherwise TRUE is returned. */ |

2280 | static bool |

2281 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) |

2282 | { |

2283 | tree treeop0, treeop1, lhs, type; |

2284 | location_t loc = gimple_location (stmt); |

2285 | lhs = gimple_assign_lhs (stmt); |

2286 | treeop0 = gimple_assign_rhs1 (stmt); |

2287 | treeop1 = gimple_assign_rhs2 (stmt); |

2288 | type = TREE_TYPE (lhs); |

2289 | machine_mode mode = TYPE_MODE (type); |

2290 | |

2291 | if (HONOR_SNANS (type)) |

2292 | return false; |

2293 | |

2294 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) |

2295 | { |

2296 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); |

2297 | if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0)) |

2298 | { |

2299 | call0 = SSA_NAME_DEF_STMT (treeop1); |

2300 | if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0)) |

2301 | return false; |

2302 | |

2303 | treeop1 = treeop0; |

2304 | } |

2305 | if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) |

2306 | return false; |

2307 | |

2308 | gcall *c = as_a<gcall*> (call0); |

2309 | treeop0 = gimple_call_arg (c, 1); |

2310 | |

2311 | gcall *call_stmt |

2312 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); |

2313 | gimple_set_lhs (call_stmt, lhs); |

2314 | gimple_set_location (call_stmt, loc); |

2315 | gsi_replace (gsi, call_stmt, true); |

2316 | return true; |

2317 | } |

2318 | |

2319 | return false; |

2320 | } |

2321 | |

2322 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |

2323 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return |

2324 | value is true iff we converted the statement. */ |

2325 | |

2326 | static bool |

2327 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |

2328 | { |

2329 | tree lhs, rhs1, rhs2, type, type1, type2; |

2330 | enum insn_code handler; |

2331 | scalar_int_mode to_mode, from_mode, actual_mode; |

2332 | optab op; |

2333 | int actual_precision; |

2334 | location_t loc = gimple_location (stmt); |

2335 | bool from_unsigned1, from_unsigned2; |

2336 | |

2337 | lhs = gimple_assign_lhs (stmt); |

2338 | type = TREE_TYPE (lhs); |

2339 | if (TREE_CODE (type) != INTEGER_TYPE) |

2340 | return false; |

2341 | |

2342 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |

2343 | return false; |

2344 | |

2345 | to_mode = SCALAR_INT_TYPE_MODE (type); |

2346 | from_mode = SCALAR_INT_TYPE_MODE (type1); |

2347 | if (to_mode == from_mode) |

2348 | return false; |

2349 | |

2350 | from_unsigned1 = TYPE_UNSIGNED (type1); |

2351 | from_unsigned2 = TYPE_UNSIGNED (type2); |

2352 | |

2353 | if (from_unsigned1 && from_unsigned2) |

2354 | op = umul_widen_optab; |

2355 | else if (!from_unsigned1 && !from_unsigned2) |

2356 | op = smul_widen_optab; |

2357 | else |

2358 | op = usmul_widen_optab; |

2359 | |

2360 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |

2361 | &actual_mode); |

2362 | |

2363 | if (handler == CODE_FOR_nothing) |

2364 | { |

2365 | if (op != smul_widen_optab) |

2366 | { |

2367 | /* We can use a signed multiply with unsigned types as long as |

2368 | there is a wider mode to use, or it is the smaller of the two |

2369 | types that is unsigned. Note that type1 >= type2, always. */ |

2370 | if ((TYPE_UNSIGNED (type1) |

2371 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) |

2372 | || (TYPE_UNSIGNED (type2) |

2373 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) |

2374 | { |

2375 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |

2376 | || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) |

2377 | return false; |

2378 | } |

2379 | |

2380 | op = smul_widen_optab; |

2381 | handler = find_widening_optab_handler_and_mode (op, to_mode, |

2382 | from_mode, |

2383 | &actual_mode); |

2384 | |

2385 | if (handler == CODE_FOR_nothing) |

2386 | return false; |

2387 | |

2388 | from_unsigned1 = from_unsigned2 = false; |

2389 | } |

2390 | else |

2391 | return false; |

2392 | } |

2393 | |

2394 | /* Ensure that the inputs to the handler are in the correct precison |

2395 | for the opcode. This will be the full mode size. */ |

2396 | actual_precision = GET_MODE_PRECISION (actual_mode); |

2397 | if (2 * actual_precision > TYPE_PRECISION (type)) |

2398 | return false; |

2399 | if (actual_precision != TYPE_PRECISION (type1) |

2400 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |

2401 | rhs1 = build_and_insert_cast (gsi, loc, |

2402 | build_nonstandard_integer_type |

2403 | (actual_precision, from_unsigned1), rhs1); |

2404 | if (actual_precision != TYPE_PRECISION (type2) |

2405 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |

2406 | rhs2 = build_and_insert_cast (gsi, loc, |

2407 | build_nonstandard_integer_type |

2408 | (actual_precision, from_unsigned2), rhs2); |

2409 | |

2410 | /* Handle constants. */ |

2411 | if (TREE_CODE (rhs1) == INTEGER_CST) |

2412 | rhs1 = fold_convert (type1, rhs1); |

2413 | if (TREE_CODE (rhs2) == INTEGER_CST) |

2414 | rhs2 = fold_convert (type2, rhs2); |

2415 | |

2416 | gimple_assign_set_rhs1 (stmt, rhs1); |

2417 | gimple_assign_set_rhs2 (stmt, rhs2); |

2418 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |

2419 | update_stmt (stmt); |

2420 | widen_mul_stats.widen_mults_inserted++; |

2421 | return true; |

2422 | } |

2423 | |

2424 | /* Process a single gimple statement STMT, which is found at the |

2425 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its |

2426 | rhs (given by CODE), and try to convert it into a |

2427 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value |

2428 | is true iff we converted the statement. */ |

2429 | |

2430 | static bool |

2431 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |

2432 | enum tree_code code) |

2433 | { |

2434 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |

2435 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; |

2436 | tree type, type1, type2, optype; |

2437 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |

2438 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; |

2439 | optab this_optab; |

2440 | enum tree_code wmult_code; |

2441 | enum insn_code handler; |

2442 | scalar_mode to_mode, from_mode, actual_mode; |

2443 | location_t loc = gimple_location (stmt); |

2444 | int actual_precision; |

2445 | bool from_unsigned1, from_unsigned2; |

2446 | |

2447 | lhs = gimple_assign_lhs (stmt); |

2448 | type = TREE_TYPE (lhs); |

2449 | if (TREE_CODE (type) != INTEGER_TYPE |

2450 | && TREE_CODE (type) != FIXED_POINT_TYPE) |

2451 | return false; |

2452 | |

2453 | if (code == MINUS_EXPR) |

2454 | wmult_code = WIDEN_MULT_MINUS_EXPR; |

2455 | else |

2456 | wmult_code = WIDEN_MULT_PLUS_EXPR; |

2457 | |

2458 | rhs1 = gimple_assign_rhs1 (stmt); |

2459 | rhs2 = gimple_assign_rhs2 (stmt); |

2460 | |

2461 | if (TREE_CODE (rhs1) == SSA_NAME) |

2462 | { |

2463 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |

2464 | if (is_gimple_assign (rhs1_stmt)) |

2465 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); |

2466 | } |

2467 | |

2468 | if (TREE_CODE (rhs2) == SSA_NAME) |

2469 | { |

2470 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |

2471 | if (is_gimple_assign (rhs2_stmt)) |

2472 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); |

2473 | } |

2474 | |

2475 | /* Allow for one conversion statement between the multiply |

2476 | and addition/subtraction statement. If there are more than |

2477 | one conversions then we assume they would invalidate this |

2478 | transformation. If that's not the case then they should have |

2479 | been folded before now. */ |

2480 | if (CONVERT_EXPR_CODE_P (rhs1_code)) |

2481 | { |

2482 | conv1_stmt = rhs1_stmt; |

2483 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); |

2484 | if (TREE_CODE (rhs1) == SSA_NAME) |

2485 | { |

2486 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |

2487 | if (is_gimple_assign (rhs1_stmt)) |

2488 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); |

2489 | } |

2490 | else |

2491 | return false; |

2492 | } |

2493 | if (CONVERT_EXPR_CODE_P (rhs2_code)) |

2494 | { |

2495 | conv2_stmt = rhs2_stmt; |

2496 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); |

2497 | if (TREE_CODE (rhs2) == SSA_NAME) |

2498 | { |

2499 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |

2500 | if (is_gimple_assign (rhs2_stmt)) |

2501 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); |

2502 | } |

2503 | else |

2504 | return false; |

2505 | } |

2506 | |

2507 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |

2508 | is_widening_mult_p, but we still need the rhs returns. |

2509 | |

2510 | It might also appear that it would be sufficient to use the existing |

2511 | operands of the widening multiply, but that would limit the choice of |

2512 | multiply-and-accumulate instructions. |

2513 | |

2514 | If the widened-multiplication result has more than one uses, it is |

2515 | probably wiser not to do the conversion. */ |

2516 | if (code == PLUS_EXPR |

2517 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) |

2518 | { |

2519 | if (!has_single_use (rhs1) |

2520 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, |

2521 | &type2, &mult_rhs2)) |

2522 | return false; |

2523 | add_rhs = rhs2; |

2524 | conv_stmt = conv1_stmt; |

2525 | } |

2526 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |

2527 | { |

2528 | if (!has_single_use (rhs2) |

2529 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, |

2530 | &type2, &mult_rhs2)) |

2531 | return false; |

2532 | add_rhs = rhs1; |

2533 | conv_stmt = conv2_stmt; |

2534 | } |

2535 | else |

2536 | return false; |

2537 | |

2538 | to_mode = SCALAR_TYPE_MODE (type); |

2539 | from_mode = SCALAR_TYPE_MODE (type1); |

2540 | if (to_mode == from_mode) |

2541 | return false; |

2542 | |

2543 | from_unsigned1 = TYPE_UNSIGNED (type1); |

2544 | from_unsigned2 = TYPE_UNSIGNED (type2); |

2545 | optype = type1; |

2546 | |

2547 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |

2548 | if (from_unsigned1 != from_unsigned2) |

2549 | { |

2550 | if (!INTEGRAL_TYPE_P (type)) |

2551 | return false; |

2552 | /* We can use a signed multiply with unsigned types as long as |

2553 | there is a wider mode to use, or it is the smaller of the two |

2554 | types that is unsigned. Note that type1 >= type2, always. */ |

2555 | if ((from_unsigned1 |

2556 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) |

2557 | || (from_unsigned2 |

2558 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) |

2559 | { |

2560 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |

2561 | || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) |

2562 | return false; |

2563 | } |

2564 | |

2565 | from_unsigned1 = from_unsigned2 = false; |

2566 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |

2567 | false); |

2568 | } |

2569 | |

2570 | /* If there was a conversion between the multiply and addition |

2571 | then we need to make sure it fits a multiply-and-accumulate. |

2572 | The should be a single mode change which does not change the |

2573 | value. */ |

2574 | if (conv_stmt) |

2575 | { |

2576 | /* We use the original, unmodified data types for this. */ |

2577 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |

2578 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); |

2579 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); |

2580 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); |

2581 | |

2582 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) |

2583 | { |

2584 | /* Conversion is a truncate. */ |

2585 | if (TYPE_PRECISION (to_type) < data_size) |

2586 | return false; |

2587 | } |

2588 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) |

2589 | { |

2590 | /* Conversion is an extend. Check it's the right sort. */ |

2591 | if (TYPE_UNSIGNED (from_type) != is_unsigned |

2592 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) |

2593 | return false; |

2594 | } |

2595 | /* else convert is a no-op for our purposes. */ |

2596 | } |

2597 | |

2598 | /* Verify that the machine can perform a widening multiply |

2599 | accumulate in this mode/signedness combination, otherwise |

2600 | this transformation is likely to pessimize code. */ |

2601 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |

2602 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |

2603 | from_mode, &actual_mode); |

2604 | |

2605 | if (handler == CODE_FOR_nothing) |

2606 | return false; |

2607 | |

2608 | /* Ensure that the inputs to the handler are in the correct precison |

2609 | for the opcode. This will be the full mode size. */ |

2610 | actual_precision = GET_MODE_PRECISION (actual_mode); |

2611 | if (actual_precision != TYPE_PRECISION (type1) |

2612 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |

2613 | mult_rhs1 = build_and_insert_cast (gsi, loc, |

2614 | build_nonstandard_integer_type |

2615 | (actual_precision, from_unsigned1), |

2616 | mult_rhs1); |

2617 | if (actual_precision != TYPE_PRECISION (type2) |

2618 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |

2619 | mult_rhs2 = build_and_insert_cast (gsi, loc, |

2620 | build_nonstandard_integer_type |

2621 | (actual_precision, from_unsigned2), |

2622 | mult_rhs2); |

2623 | |

2624 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |

2625 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |

2626 | |

2627 | /* Handle constants. */ |

2628 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) |

2629 | mult_rhs1 = fold_convert (type1, mult_rhs1); |

2630 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |

2631 | mult_rhs2 = fold_convert (type2, mult_rhs2); |

2632 | |

2633 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |

2634 | add_rhs); |

2635 | update_stmt (gsi_stmt (*gsi)); |

2636 | widen_mul_stats.maccs_inserted++; |

2637 | return true; |

2638 | } |

2639 | |

2640 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |

2641 | with uses in additions and subtractions to form fused multiply-add |

2642 | operations. Returns true if successful and MUL_STMT should be removed. */ |

2643 | |

2644 | static bool |

2645 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) |

2646 | { |

2647 | tree mul_result = gimple_get_lhs (mul_stmt); |

2648 | tree type = TREE_TYPE (mul_result); |

2649 | gimple *use_stmt, *neguse_stmt; |

2650 | gassign *fma_stmt; |

2651 | use_operand_p use_p; |

2652 | imm_use_iterator imm_iter; |

2653 | |

2654 | if (FLOAT_TYPE_P (type) |

2655 | && flag_fp_contract_mode == FP_CONTRACT_OFF) |

2656 | return false; |

2657 | |

2658 | /* We don't want to do bitfield reduction ops. */ |

2659 | if (INTEGRAL_TYPE_P (type) |

2660 | && !type_has_mode_precision_p (type)) |

2661 | return false; |

2662 | |

2663 | /* If the target doesn't support it, don't generate it. We assume that |

2664 | if fma isn't available then fms, fnma or fnms are not either. */ |

2665 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) |

2666 | return false; |

2667 | |

2668 | /* If the multiplication has zero uses, it is kept around probably because |

2669 | of -fnon-call-exceptions. Don't optimize it away in that case, |

2670 | it is DCE job. */ |

2671 | if (has_zero_uses (mul_result)) |

2672 | return false; |

2673 | |

2674 | /* Make sure that the multiplication statement becomes dead after |

2675 | the transformation, thus that all uses are transformed to FMAs. |

2676 | This means we assume that an FMA operation has the same cost |

2677 | as an addition. */ |

2678 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) |

2679 | { |

2680 | enum tree_code use_code; |

2681 | tree result = mul_result; |

2682 | bool negate_p = false; |

2683 | |

2684 | use_stmt = USE_STMT (use_p); |

2685 | |

2686 | if (is_gimple_debug (use_stmt)) |

2687 | continue; |

2688 | |

2689 | /* For now restrict this operations to single basic blocks. In theory |

2690 | we would want to support sinking the multiplication in |

2691 | m = a*b; |

2692 | if () |

2693 | ma = m + c; |

2694 | else |

2695 | d = m; |

2696 | to form a fma in the then block and sink the multiplication to the |

2697 | else block. */ |

2698 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |

2699 | return false; |

2700 | |

2701 | if (!is_gimple_assign (use_stmt)) |

2702 | return false; |

2703 | |

2704 | use_code = gimple_assign_rhs_code (use_stmt); |

2705 | |

2706 | /* A negate on the multiplication leads to FNMA. */ |

2707 | if (use_code == NEGATE_EXPR) |

2708 | { |

2709 | ssa_op_iter iter; |

2710 | use_operand_p usep; |

2711 | |

2712 | result = gimple_assign_lhs (use_stmt); |

2713 | |

2714 | /* Make sure the negate statement becomes dead with this |

2715 | single transformation. */ |

2716 | if (!single_imm_use (gimple_assign_lhs (use_stmt), |

2717 | &use_p, &neguse_stmt)) |

2718 | return false; |

2719 | |

2720 | /* Make sure the multiplication isn't also used on that stmt. */ |

2721 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |

2722 | if (USE_FROM_PTR (usep) == mul_result) |

2723 | return false; |

2724 | |

2725 | /* Re-validate. */ |

2726 | use_stmt = neguse_stmt; |

2727 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |

2728 | return false; |

2729 | if (!is_gimple_assign (use_stmt)) |

2730 | return false; |

2731 | |

2732 | use_code = gimple_assign_rhs_code (use_stmt); |

2733 | negate_p = true; |

2734 | } |

2735 | |

2736 | switch (use_code) |

2737 | { |

2738 | case MINUS_EXPR: |

2739 | if (gimple_assign_rhs2 (use_stmt) == result) |

2740 | negate_p = !negate_p; |

2741 | break; |

2742 | case PLUS_EXPR: |

2743 | break; |

2744 | default: |

2745 | /* FMA can only be formed from PLUS and MINUS. */ |

2746 | return false; |

2747 | } |

2748 | |

2749 | /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed |

2750 | by a MULT_EXPR that we'll visit later, we might be able to |

2751 | get a more profitable match with fnma. |

2752 | OTOH, if we don't, a negate / fma pair has likely lower latency |

2753 | that a mult / subtract pair. */ |

2754 | if (use_code == MINUS_EXPR && !negate_p |

2755 | && gimple_assign_rhs1 (use_stmt) == result |

2756 | && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing |

2757 | && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) |

2758 | { |

2759 | tree rhs2 = gimple_assign_rhs2 (use_stmt); |

2760 | |

2761 | if (TREE_CODE (rhs2) == SSA_NAME) |

2762 | { |

2763 | gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2); |

2764 | if (has_single_use (rhs2) |

2765 | && is_gimple_assign (stmt2) |

2766 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) |

2767 | return false; |

2768 | } |

2769 | } |

2770 | |

2771 | /* We can't handle a * b + a * b. */ |

2772 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) |

2773 | return false; |

2774 | |

2775 | /* While it is possible to validate whether or not the exact form |

2776 | that we've recognized is available in the backend, the assumption |

2777 | is that the transformation is never a loss. For instance, suppose |

2778 | the target only has the plain FMA pattern available. Consider |

2779 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which |

2780 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we |

2781 | still have 3 operations, but in the FMA form the two NEGs are |

2782 | independent and could be run in parallel. */ |

2783 | } |

2784 | |

2785 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) |

2786 | { |

2787 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |

2788 | enum tree_code use_code; |

2789 | tree addop, mulop1 = op1, result = mul_result; |

2790 | bool negate_p = false; |

2791 | |

2792 | if (is_gimple_debug (use_stmt)) |

2793 | continue; |

2794 | |

2795 | use_code = gimple_assign_rhs_code (use_stmt); |

2796 | if (use_code == NEGATE_EXPR) |

2797 | { |

2798 | result = gimple_assign_lhs (use_stmt); |

2799 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); |

2800 | gsi_remove (&gsi, true); |

2801 | release_defs (use_stmt); |

2802 | |

2803 | use_stmt = neguse_stmt; |

2804 | gsi = gsi_for_stmt (use_stmt); |

2805 | use_code = gimple_assign_rhs_code (use_stmt); |

2806 | negate_p = true; |

2807 | } |

2808 | |

2809 | if (gimple_assign_rhs1 (use_stmt) == result) |

2810 | { |

2811 | addop = gimple_assign_rhs2 (use_stmt); |

2812 | /* a * b - c -> a * b + (-c) */ |

2813 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) |

2814 | addop = force_gimple_operand_gsi (&gsi, |

2815 | build1 (NEGATE_EXPR, |

2816 | type, addop), |

2817 | true, NULL_TREE, true, |

2818 | GSI_SAME_STMT); |

2819 | } |

2820 | else |

2821 | { |

2822 | addop = gimple_assign_rhs1 (use_stmt); |

2823 | /* a - b * c -> (-b) * c + a */ |

2824 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) |

2825 | negate_p = !negate_p; |

2826 | } |

2827 | |

2828 | if (negate_p) |

2829 | mulop1 = force_gimple_operand_gsi (&gsi, |

2830 | build1 (NEGATE_EXPR, |

2831 | type, mulop1), |

2832 | true, NULL_TREE, true, |

2833 | GSI_SAME_STMT); |

2834 | |

2835 | fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), |

2836 | FMA_EXPR, mulop1, op2, addop); |

2837 | gsi_replace (&gsi, fma_stmt, true); |

2838 | widen_mul_stats.fmas_inserted++; |

2839 | } |

2840 | |

2841 | return true; |

2842 | } |

2843 | |

2844 | |

2845 | /* Helper function of match_uaddsub_overflow. Return 1 |

2846 | if USE_STMT is unsigned overflow check ovf != 0 for |

2847 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 |

2848 | and 0 otherwise. */ |

2849 | |

2850 | static int |

2851 | uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt) |

2852 | { |

2853 | enum tree_code ccode = ERROR_MARK; |

2854 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; |

2855 | if (gimple_code (use_stmt) == GIMPLE_COND) |

2856 | { |

2857 | ccode = gimple_cond_code (use_stmt); |

2858 | crhs1 = gimple_cond_lhs (use_stmt); |

2859 | crhs2 = gimple_cond_rhs (use_stmt); |

2860 | } |

2861 | else if (is_gimple_assign (use_stmt)) |

2862 | { |

2863 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) |

2864 | { |

2865 | ccode = gimple_assign_rhs_code (use_stmt); |

2866 | crhs1 = gimple_assign_rhs1 (use_stmt); |

2867 | crhs2 = gimple_assign_rhs2 (use_stmt); |

2868 | } |

2869 | else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) |

2870 | { |

2871 | tree cond = gimple_assign_rhs1 (use_stmt); |

2872 | if (COMPARISON_CLASS_P (cond)) |

2873 | { |

2874 | ccode = TREE_CODE (cond); |

2875 | crhs1 = TREE_OPERAND (cond, 0); |

2876 | crhs2 = TREE_OPERAND (cond, 1); |

2877 | } |

2878 | else |

2879 | return 0; |

2880 | } |

2881 | else |

2882 | return 0; |

2883 | } |

2884 | else |

2885 | return 0; |

2886 | |

2887 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) |

2888 | return 0; |

2889 | |

2890 | enum tree_code code = gimple_assign_rhs_code (stmt); |

2891 | tree lhs = gimple_assign_lhs (stmt); |

2892 | tree rhs1 = gimple_assign_rhs1 (stmt); |

2893 | tree rhs2 = gimple_assign_rhs2 (stmt); |

2894 | |

2895 | switch (ccode) |

2896 | { |

2897 | case GT_EXPR: |

2898 | case LE_EXPR: |

2899 | /* r = a - b; r > a or r <= a |

2900 | r = a + b; a > r or a <= r or b > r or b <= r. */ |

2901 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) |

2902 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) |

2903 | && crhs2 == lhs)) |

2904 | return ccode == GT_EXPR ? 1 : -1; |

2905 | break; |

2906 | case LT_EXPR: |

2907 | case GE_EXPR: |

2908 | /* r = a - b; a < r or a >= r |

2909 | r = a + b; r < a or r >= a or r < b or r >= b. */ |

2910 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) |

2911 | || (code == PLUS_EXPR && crhs1 == lhs |

2912 | && (crhs2 == rhs1 || crhs2 == rhs2))) |

2913 | return ccode == LT_EXPR ? 1 : -1; |

2914 | break; |

2915 | default: |

2916 | break; |

2917 | } |

2918 | return 0; |

2919 | } |

2920 | |

2921 | /* Recognize for unsigned x |

2922 | x = y - z; |

2923 | if (x > y) |

2924 | where there are other uses of x and replace it with |

2925 | _7 = SUB_OVERFLOW (y, z); |

2926 | x = REALPART_EXPR <_7>; |

2927 | _8 = IMAGPART_EXPR <_7>; |

2928 | if (_8) |

2929 | and similarly for addition. */ |

2930 | |

2931 | static bool |

2932 | match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, |

2933 | enum tree_code code) |

2934 | { |

2935 | tree lhs = gimple_assign_lhs (stmt); |

2936 | tree type = TREE_TYPE (lhs); |

2937 | use_operand_p use_p; |

2938 | imm_use_iterator iter; |

2939 | bool use_seen = false; |

2940 | bool ovf_use_seen = false; |

2941 | gimple *use_stmt; |

2942 | |

2943 | gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); |

2944 | if (!INTEGRAL_TYPE_P (type) |

2945 | || !TYPE_UNSIGNED (type) |

2946 | || has_zero_uses (lhs) |

2947 | || has_single_use (lhs) |

2948 | || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, |

2949 | TYPE_MODE (type)) == CODE_FOR_nothing) |

2950 | return false; |

2951 | |

2952 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |

2953 | { |

2954 | use_stmt = USE_STMT (use_p); |

2955 | if (is_gimple_debug (use_stmt)) |

2956 | continue; |

2957 | |

2958 | if (uaddsub_overflow_check_p (stmt, use_stmt)) |

2959 | ovf_use_seen = true; |

2960 | else |

2961 | use_seen = true; |

2962 | if (ovf_use_seen && use_seen) |

2963 | break; |

2964 | } |

2965 | |

2966 | if (!ovf_use_seen || !use_seen) |

2967 | return false; |

2968 | |

2969 | tree ctype = build_complex_type (type); |

2970 | tree rhs1 = gimple_assign_rhs1 (stmt); |

2971 | tree rhs2 = gimple_assign_rhs2 (stmt); |

2972 | gcall *g = gimple_build_call_internal (code == PLUS_EXPR |

2973 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, |

2974 | 2, rhs1, rhs2); |

2975 | tree ctmp = make_ssa_name (ctype); |

2976 | gimple_call_set_lhs (g, ctmp); |

2977 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |

2978 | gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR, |

2979 | build1 (REALPART_EXPR, type, ctmp)); |

2980 | gsi_replace (gsi, g2, true); |

2981 | tree ovf = make_ssa_name (type); |

2982 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, |

2983 | build1 (IMAGPART_EXPR, type, ctmp)); |

2984 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); |

2985 | |

2986 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |

2987 | { |

2988 | if (is_gimple_debug (use_stmt)) |

2989 | continue; |

2990 | |

2991 | int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt); |

2992 | if (ovf_use == 0) |

2993 | continue; |

2994 | if (gimple_code (use_stmt) == GIMPLE_COND) |

2995 | { |

2996 | gcond *cond_stmt = as_a <gcond *> (use_stmt); |

2997 | gimple_cond_set_lhs (cond_stmt, ovf); |

2998 | gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); |

2999 | gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); |

3000 | } |

3001 | else |

3002 | { |

3003 | gcc_checking_assert (is_gimple_assign (use_stmt)); |

3004 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) |

3005 | { |

3006 | gimple_assign_set_rhs1 (use_stmt, ovf); |

3007 | gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0)); |

3008 | gimple_assign_set_rhs_code (use_stmt, |

3009 | ovf_use == 1 ? NE_EXPR : EQ_EXPR); |

3010 | } |

3011 | else |

3012 | { |

3013 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) |

3014 | == COND_EXPR); |

3015 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, |

3016 | boolean_type_node, ovf, |

3017 | build_int_cst (type, 0)); |

3018 | gimple_assign_set_rhs1 (use_stmt, cond); |

3019 | } |

3020 | } |

3021 | update_stmt (use_stmt); |

3022 | } |

3023 | return true; |

3024 | } |

3025 | |

3026 | /* Return true if target has support for divmod. */ |

3027 | |

3028 | static bool |

3029 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) |

3030 | { |

3031 | /* If target supports hardware divmod insn, use it for divmod. */ |

3032 | if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) |

3033 | return true; |

3034 | |

3035 | /* Check if libfunc for divmod is available. */ |

3036 | rtx libfunc = optab_libfunc (divmod_optab, mode); |

3037 | if (libfunc != NULL_RTX) |

3038 | { |

3039 | /* If optab_handler exists for div_optab, perhaps in a wider mode, |

3040 | we don't want to use the libfunc even if it exists for given mode. */ |

3041 | machine_mode div_mode; |

3042 | FOR_EACH_MODE_FROM (div_mode, mode) |

3043 | if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) |

3044 | return false; |

3045 | |

3046 | return targetm.expand_divmod_libfunc != NULL; |

3047 | } |

3048 | |

3049 | return false; |

3050 | } |

3051 | |

3052 | /* Check if stmt is candidate for divmod transform. */ |

3053 | |

3054 | static bool |

3055 | divmod_candidate_p (gassign *stmt) |

3056 | { |

3057 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |

3058 | machine_mode mode = TYPE_MODE (type); |

3059 | optab divmod_optab, div_optab; |

3060 | |

3061 | if (TYPE_UNSIGNED (type)) |

3062 | { |

3063 | divmod_optab = udivmod_optab; |

3064 | div_optab = udiv_optab; |

3065 | } |

3066 | else |

3067 | { |

3068 | divmod_optab = sdivmod_optab; |

3069 | div_optab = sdiv_optab; |

3070 | } |

3071 | |

3072 | tree op1 = gimple_assign_rhs1 (stmt); |

3073 | tree op2 = gimple_assign_rhs2 (stmt); |

3074 | |

3075 | /* Disable the transform if either is a constant, since division-by-constant |

3076 | may have specialized expansion. */ |

3077 | if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) |

3078 | return false; |

3079 | |

3080 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should |

3081 | expand using the [su]divv optabs. */ |

3082 | if (TYPE_OVERFLOW_TRAPS (type)) |

3083 | return false; |

3084 | |

3085 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) |

3086 | return false; |

3087 | |

3088 | return true; |

3089 | } |

3090 | |

3091 | /* This function looks for: |

3092 | t1 = a TRUNC_DIV_EXPR b; |

3093 | t2 = a TRUNC_MOD_EXPR b; |

3094 | and transforms it to the following sequence: |

3095 | complex_tmp = DIVMOD (a, b); |

3096 | t1 = REALPART_EXPR(a); |

3097 | t2 = IMAGPART_EXPR(b); |

3098 | For conditions enabling the transform see divmod_candidate_p(). |

3099 | |

3100 | The pass has three parts: |

3101 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all |

3102 | other trunc_div_expr and trunc_mod_expr stmts. |

3103 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt |

3104 | to stmts vector. |

3105 | 3) Insert DIVMOD call just before top_stmt and update entries in |

3106 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, |

3107 | IMAGPART_EXPR for mod). */ |

3108 | |

3109 | static bool |

3110 | convert_to_divmod (gassign *stmt) |

3111 | { |

3112 | if (stmt_can_throw_internal (stmt) |

3113 | || !divmod_candidate_p (stmt)) |

3114 | return false; |

3115 | |

3116 | tree op1 = gimple_assign_rhs1 (stmt); |

3117 | tree op2 = gimple_assign_rhs2 (stmt); |

3118 | |

3119 | imm_use_iterator use_iter; |

3120 | gimple *use_stmt; |

3121 | auto_vec<gimple *> stmts; |

3122 | |

3123 | gimple *top_stmt = stmt; |

3124 | basic_block top_bb = gimple_bb (stmt); |

3125 | |

3126 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates |

3127 | at-least stmt and possibly other trunc_div/trunc_mod stmts |

3128 | having same operands as stmt. */ |

3129 | |

3130 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) |

3131 | { |

3132 | if (is_gimple_assign (use_stmt) |

3133 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR |

3134 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |

3135 | && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) |

3136 | && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) |

3137 | { |

3138 | if (stmt_can_throw_internal (use_stmt)) |

3139 | continue; |

3140 | |

3141 | basic_block bb = gimple_bb (use_stmt); |

3142 | |

3143 | if (bb == top_bb) |

3144 | { |

3145 | if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) |

3146 | top_stmt = use_stmt; |

3147 | } |

3148 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) |

3149 | { |

3150 | top_bb = bb; |

3151 | top_stmt = use_stmt; |

3152 | } |

3153 | } |

3154 | } |

3155 | |

3156 | tree top_op1 = gimple_assign_rhs1 (top_stmt); |

3157 | tree top_op2 = gimple_assign_rhs2 (top_stmt); |

3158 | |

3159 | stmts.safe_push (top_stmt); |

3160 | bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); |

3161 | |

3162 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb |

3163 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since |

3164 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the |

3165 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ |

3166 | |

3167 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) |

3168 | { |

3169 | if (is_gimple_assign (use_stmt) |

3170 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR |

3171 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |

3172 | && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) |

3173 | && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) |

3174 | { |

3175 | if (use_stmt == top_stmt |

3176 | || stmt_can_throw_internal (use_stmt) |

3177 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) |

3178 | continue; |

3179 | |

3180 | stmts.safe_push (use_stmt); |

3181 | if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) |

3182 | div_seen = true; |

3183 | } |

3184 | } |

3185 | |

3186 | if (!div_seen) |

3187 | return false; |

3188 | |

3189 | /* Part 3: Create libcall to internal fn DIVMOD: |

3190 | divmod_tmp = DIVMOD (op1, op2). */ |

3191 | |

3192 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); |

3193 | tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), |

3194 | call_stmt, "divmod_tmp"); |

3195 | gimple_call_set_lhs (call_stmt, res); |

3196 | /* We rejected throwing statements above. */ |

3197 | gimple_call_set_nothrow (call_stmt, true); |

3198 | |

3199 | /* Insert the call before top_stmt. */ |

3200 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); |

3201 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); |

3202 | |

3203 | widen_mul_stats.divmod_calls_inserted++; |

3204 | |

3205 | /* Update all statements in stmts vector: |

3206 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> |

3207 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ |

3208 | |

3209 | for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) |

3210 | { |

3211 | tree new_rhs; |

3212 | |

3213 | switch (gimple_assign_rhs_code (use_stmt)) |

3214 | { |

3215 | case TRUNC_DIV_EXPR: |

3216 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); |

3217 | break; |

3218 | |

3219 | case TRUNC_MOD_EXPR: |

3220 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); |

3221 | break; |

3222 | |

3223 | default: |

3224 | gcc_unreachable (); |

3225 | } |

3226 | |

3227 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |

3228 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); |

3229 | update_stmt (use_stmt); |

3230 | } |

3231 | |

3232 | return true; |

3233 | } |

3234 | |

3235 | /* Find integer multiplications where the operands are extended from |

3236 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR |

3237 | where appropriate. */ |

3238 | |

3239 | namespace { |

3240 | |

3241 | const pass_data pass_data_optimize_widening_mul = |

3242 | { |

3243 | GIMPLE_PASS, /* type */ |

3244 | "widening_mul", /* name */ |

3245 | OPTGROUP_NONE, /* optinfo_flags */ |

3246 | TV_TREE_WIDEN_MUL, /* tv_id */ |

3247 | PROP_ssa, /* properties_required */ |

3248 | 0, /* properties_provided */ |

3249 | 0, /* properties_destroyed */ |

3250 | 0, /* todo_flags_start */ |

3251 | TODO_update_ssa, /* todo_flags_finish */ |

3252 | }; |

3253 | |

3254 | class pass_optimize_widening_mul : public gimple_opt_pass |

3255 | { |

3256 | public: |

3257 | pass_optimize_widening_mul (gcc::context *ctxt) |

3258 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) |

3259 | {} |

3260 | |

3261 | /* opt_pass methods: */ |

3262 | virtual bool gate (function *) |

3263 | { |

3264 | return flag_expensive_optimizations && optimize; |

3265 | } |

3266 | |

3267 | virtual unsigned int execute (function *); |

3268 | |

3269 | }; // class pass_optimize_widening_mul |

3270 | |

3271 | unsigned int |

3272 | pass_optimize_widening_mul::execute (function *fun) |

3273 | { |

3274 | basic_block bb; |

3275 | bool cfg_changed = false; |

3276 | |

3277 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |

3278 | calculate_dominance_info (CDI_DOMINATORS); |

3279 | renumber_gimple_stmt_uids (); |

3280 | |

3281 | FOR_EACH_BB_FN (bb, fun) |

3282 | { |

3283 | gimple_stmt_iterator gsi; |

3284 | |

3285 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |

3286 | { |

3287 | gimple *stmt = gsi_stmt (gsi); |

3288 | enum tree_code code; |

3289 | |

3290 | if (is_gimple_assign (stmt)) |

3291 | { |

3292 | code = gimple_assign_rhs_code (stmt); |

3293 | switch (code) |

3294 | { |

3295 | case MULT_EXPR: |

3296 | if (!convert_mult_to_widen (stmt, &gsi) |

3297 | && !convert_expand_mult_copysign (stmt, &gsi) |

3298 | && convert_mult_to_fma (stmt, |

3299 | gimple_assign_rhs1 (stmt), |

3300 | gimple_assign_rhs2 (stmt))) |

3301 | { |

3302 | gsi_remove (&gsi, true); |

3303 | release_defs (stmt); |

3304 | continue; |

3305 | } |

3306 | break; |

3307 | |

3308 | case PLUS_EXPR: |

3309 | case MINUS_EXPR: |

3310 | if (!convert_plusminus_to_widen (&gsi, stmt, code)) |

3311 | match_uaddsub_overflow (&gsi, stmt, code); |

3312 | break; |

3313 | |

3314 | case TRUNC_MOD_EXPR: |

3315 | convert_to_divmod (as_a<gassign *> (stmt)); |

3316 | break; |

3317 | |

3318 | default:; |

3319 | } |

3320 | } |

3321 | else if (is_gimple_call (stmt) |

3322 | && gimple_call_lhs (stmt)) |

3323 | { |

3324 | tree fndecl = gimple_call_fndecl (stmt); |

3325 | if (fndecl |

3326 | && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) |

3327 | { |

3328 | switch (DECL_FUNCTION_CODE (fndecl)) |

3329 | { |

3330 | case BUILT_IN_POWF: |

3331 | case BUILT_IN_POW: |

3332 | case BUILT_IN_POWL: |

3333 | if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST |

3334 | && real_equal |

3335 | (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), |

3336 | &dconst2) |

3337 | && convert_mult_to_fma (stmt, |

3338 | gimple_call_arg (stmt, 0), |

3339 | gimple_call_arg (stmt, 0))) |

3340 | { |

3341 | unlink_stmt_vdef (stmt); |

3342 | if (gsi_remove (&gsi, true) |

3343 | && gimple_purge_dead_eh_edges (bb)) |

3344 | cfg_changed = true; |

3345 | release_defs (stmt); |

3346 | continue; |

3347 | } |

3348 | break; |

3349 | |

3350 | default:; |

3351 | } |

3352 | } |

3353 | } |

3354 | gsi_next (&gsi); |

3355 | } |

3356 | } |

3357 | |

3358 | statistics_counter_event (fun, "widening multiplications inserted", |

3359 | widen_mul_stats.widen_mults_inserted); |

3360 | statistics_counter_event (fun, "widening maccs inserted", |

3361 | widen_mul_stats.maccs_inserted); |

3362 | statistics_counter_event (fun, "fused multiply-adds inserted", |

3363 | widen_mul_stats.fmas_inserted); |

3364 | statistics_counter_event (fun, "divmod calls inserted", |

3365 | widen_mul_stats.divmod_calls_inserted); |

3366 | |

3367 | return cfg_changed ? TODO_cleanup_cfg : 0; |

3368 | } |

3369 | |

3370 | } // anon namespace |

3371 | |

3372 | gimple_opt_pass * |

3373 | make_pass_optimize_widening_mul (gcc::context *ctxt) |

3374 | { |

3375 | return new pass_optimize_widening_mul (ctxt); |

3376 | } |

3377 |