1 | /* Global, SSA-based optimizations using mathematical identities. |
2 | Copyright (C) 2005-2017 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it |
7 | under the terms of the GNU General Public License as published by the |
8 | Free Software Foundation; either version 3, or (at your option) any |
9 | later version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT |
12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* Currently, the only mini-pass in this file tries to CSE reciprocal |
21 | operations. These are common in sequences such as this one: |
22 | |
23 | modulus = sqrt(x*x + y*y + z*z); |
24 | x = x / modulus; |
25 | y = y / modulus; |
26 | z = z / modulus; |
27 | |
28 | that can be optimized to |
29 | |
30 | modulus = sqrt(x*x + y*y + z*z); |
31 | rmodulus = 1.0 / modulus; |
32 | x = x * rmodulus; |
33 | y = y * rmodulus; |
34 | z = z * rmodulus; |
35 | |
36 | We do this for loop invariant divisors, and with this pass whenever |
37 | we notice that a division has the same divisor multiple times. |
38 | |
39 | Of course, like in PRE, we don't insert a division if a dominator |
40 | already has one. However, this cannot be done as an extension of |
41 | PRE for several reasons. |
42 | |
43 | First of all, with some experiments it was found out that the |
44 | transformation is not always useful if there are only two divisions |
45 | by the same divisor. This is probably because modern processors |
46 | can pipeline the divisions; on older, in-order processors it should |
47 | still be effective to optimize two divisions by the same number. |
48 | We make this a param, and it shall be called N in the remainder of |
49 | this comment. |
50 | |
51 | Second, if trapping math is active, we have less freedom on where |
52 | to insert divisions: we can only do so in basic blocks that already |
53 | contain one. (If divisions don't trap, instead, we can insert |
54 | divisions elsewhere, which will be in blocks that are common dominators |
55 | of those that have the division). |
56 | |
57 | We really don't want to compute the reciprocal unless a division will |
58 | be found. To do this, we won't insert the division in a basic block |
59 | that has less than N divisions *post-dominating* it. |
60 | |
61 | The algorithm constructs a subset of the dominator tree, holding the |
62 | blocks containing the divisions and the common dominators to them, |
63 | and walk it twice. The first walk is in post-order, and it annotates |
64 | each block with the number of divisions that post-dominate it: this |
65 | gives information on where divisions can be inserted profitably. |
66 | The second walk is in pre-order, and it inserts divisions as explained |
67 | above, and replaces divisions by multiplications. |
68 | |
69 | In the best case, the cost of the pass is O(n_statements). In the |
70 | worst-case, the cost is due to creating the dominator tree subset, |
71 | with a cost of O(n_basic_blocks ^ 2); however this can only happen |
72 | for n_statements / n_basic_blocks statements. So, the amortized cost |
73 | of creating the dominator tree subset is O(n_basic_blocks) and the |
74 | worst-case cost of the pass is O(n_statements * n_basic_blocks). |
75 | |
76 | More practically, the cost will be small because there are few |
77 | divisions, and they tend to be in the same basic block, so insert_bb |
78 | is called very few times. |
79 | |
80 | If we did this using domwalk.c, an efficient implementation would have |
81 | to work on all the variables in a single pass, because we could not |
82 | work on just a subset of the dominator tree, as we do now, and the |
83 | cost would also be something like O(n_statements * n_basic_blocks). |
84 | The data structures would be more complex in order to work on all the |
85 | variables in a single pass. */ |
86 | |
87 | #include "config.h" |
88 | #include "system.h" |
89 | #include "coretypes.h" |
90 | #include "backend.h" |
91 | #include "target.h" |
92 | #include "rtl.h" |
93 | #include "tree.h" |
94 | #include "gimple.h" |
95 | #include "predict.h" |
96 | #include "alloc-pool.h" |
97 | #include "tree-pass.h" |
98 | #include "ssa.h" |
99 | #include "optabs-tree.h" |
100 | #include "gimple-pretty-print.h" |
101 | #include "alias.h" |
102 | #include "fold-const.h" |
103 | #include "gimple-fold.h" |
104 | #include "gimple-iterator.h" |
105 | #include "gimplify.h" |
106 | #include "gimplify-me.h" |
107 | #include "stor-layout.h" |
108 | #include "tree-cfg.h" |
109 | #include "tree-dfa.h" |
110 | #include "tree-ssa.h" |
111 | #include "builtins.h" |
112 | #include "params.h" |
113 | #include "internal-fn.h" |
114 | #include "case-cfn-macros.h" |
115 | #include "optabs-libfuncs.h" |
116 | #include "tree-eh.h" |
117 | #include "targhooks.h" |
118 | |
119 | /* This structure represents one basic block that either computes a |
120 | division, or is a common dominator for basic block that compute a |
121 | division. */ |
122 | struct occurrence { |
123 | /* The basic block represented by this structure. */ |
124 | basic_block bb; |
125 | |
126 | /* If non-NULL, the SSA_NAME holding the definition for a reciprocal |
127 | inserted in BB. */ |
128 | tree recip_def; |
129 | |
130 | /* If non-NULL, the SSA_NAME holding the definition for a squared |
131 | reciprocal inserted in BB. */ |
132 | tree square_recip_def; |
133 | |
134 | /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that |
135 | was inserted in BB. */ |
136 | gimple *recip_def_stmt; |
137 | |
138 | /* Pointer to a list of "struct occurrence"s for blocks dominated |
139 | by BB. */ |
140 | struct occurrence *children; |
141 | |
142 | /* Pointer to the next "struct occurrence"s in the list of blocks |
143 | sharing a common dominator. */ |
144 | struct occurrence *next; |
145 | |
146 | /* The number of divisions that are in BB before compute_merit. The |
147 | number of divisions that are in BB or post-dominate it after |
148 | compute_merit. */ |
149 | int num_divisions; |
150 | |
151 | /* True if the basic block has a division, false if it is a common |
152 | dominator for basic blocks that do. If it is false and trapping |
153 | math is active, BB is not a candidate for inserting a reciprocal. */ |
154 | bool bb_has_division; |
155 | }; |
156 | |
157 | static struct |
158 | { |
159 | /* Number of 1.0/X ops inserted. */ |
160 | int rdivs_inserted; |
161 | |
162 | /* Number of 1.0/FUNC ops inserted. */ |
163 | int rfuncs_inserted; |
164 | } reciprocal_stats; |
165 | |
166 | static struct |
167 | { |
168 | /* Number of cexpi calls inserted. */ |
169 | int inserted; |
170 | } sincos_stats; |
171 | |
172 | static struct |
173 | { |
174 | /* Number of widening multiplication ops inserted. */ |
175 | int widen_mults_inserted; |
176 | |
177 | /* Number of integer multiply-and-accumulate ops inserted. */ |
178 | int maccs_inserted; |
179 | |
180 | /* Number of fp fused multiply-add ops inserted. */ |
181 | int fmas_inserted; |
182 | |
183 | /* Number of divmod calls inserted. */ |
184 | int divmod_calls_inserted; |
185 | } widen_mul_stats; |
186 | |
187 | /* The instance of "struct occurrence" representing the highest |
188 | interesting block in the dominator tree. */ |
189 | static struct occurrence *occ_head; |
190 | |
191 | /* Allocation pool for getting instances of "struct occurrence". */ |
192 | static object_allocator<occurrence> *occ_pool; |
193 | |
194 | |
195 | |
196 | /* Allocate and return a new struct occurrence for basic block BB, and |
197 | whose children list is headed by CHILDREN. */ |
198 | static struct occurrence * |
199 | occ_new (basic_block bb, struct occurrence *children) |
200 | { |
201 | struct occurrence *occ; |
202 | |
203 | bb->aux = occ = occ_pool->allocate (); |
204 | memset (occ, 0, sizeof (struct occurrence)); |
205 | |
206 | occ->bb = bb; |
207 | occ->children = children; |
208 | return occ; |
209 | } |
210 | |
211 | |
212 | /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a |
213 | list of "struct occurrence"s, one per basic block, having IDOM as |
214 | their common dominator. |
215 | |
216 | We try to insert NEW_OCC as deep as possible in the tree, and we also |
217 | insert any other block that is a common dominator for BB and one |
218 | block already in the tree. */ |
219 | |
220 | static void |
221 | insert_bb (struct occurrence *new_occ, basic_block idom, |
222 | struct occurrence **p_head) |
223 | { |
224 | struct occurrence *occ, **p_occ; |
225 | |
226 | for (p_occ = p_head; (occ = *p_occ) != NULL; ) |
227 | { |
228 | basic_block bb = new_occ->bb, occ_bb = occ->bb; |
229 | basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); |
230 | if (dom == bb) |
231 | { |
232 | /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC |
233 | from its list. */ |
234 | *p_occ = occ->next; |
235 | occ->next = new_occ->children; |
236 | new_occ->children = occ; |
237 | |
238 | /* Try the next block (it may as well be dominated by BB). */ |
239 | } |
240 | |
241 | else if (dom == occ_bb) |
242 | { |
243 | /* OCC_BB dominates BB. Tail recurse to look deeper. */ |
244 | insert_bb (new_occ, dom, &occ->children); |
245 | return; |
246 | } |
247 | |
248 | else if (dom != idom) |
249 | { |
250 | gcc_assert (!dom->aux); |
251 | |
252 | /* There is a dominator between IDOM and BB, add it and make |
253 | two children out of NEW_OCC and OCC. First, remove OCC from |
254 | its list. */ |
255 | *p_occ = occ->next; |
256 | new_occ->next = occ; |
257 | occ->next = NULL; |
258 | |
259 | /* None of the previous blocks has DOM as a dominator: if we tail |
260 | recursed, we would reexamine them uselessly. Just switch BB with |
261 | DOM, and go on looking for blocks dominated by DOM. */ |
262 | new_occ = occ_new (dom, new_occ); |
263 | } |
264 | |
265 | else |
266 | { |
267 | /* Nothing special, go on with the next element. */ |
268 | p_occ = &occ->next; |
269 | } |
270 | } |
271 | |
272 | /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ |
273 | new_occ->next = *p_head; |
274 | *p_head = new_occ; |
275 | } |
276 | |
277 | /* Register that we found a division in BB. |
278 | IMPORTANCE is a measure of how much weighting to give |
279 | that division. Use IMPORTANCE = 2 to register a single |
280 | division. If the division is going to be found multiple |
281 | times use 1 (as it is with squares). */ |
282 | |
283 | static inline void |
284 | register_division_in (basic_block bb, int importance) |
285 | { |
286 | struct occurrence *occ; |
287 | |
288 | occ = (struct occurrence *) bb->aux; |
289 | if (!occ) |
290 | { |
291 | occ = occ_new (bb, NULL); |
292 | insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); |
293 | } |
294 | |
295 | occ->bb_has_division = true; |
296 | occ->num_divisions += importance; |
297 | } |
298 | |
299 | |
300 | /* Compute the number of divisions that postdominate each block in OCC and |
301 | its children. */ |
302 | |
303 | static void |
304 | compute_merit (struct occurrence *occ) |
305 | { |
306 | struct occurrence *occ_child; |
307 | basic_block dom = occ->bb; |
308 | |
309 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
310 | { |
311 | basic_block bb; |
312 | if (occ_child->children) |
313 | compute_merit (occ_child); |
314 | |
315 | if (flag_exceptions) |
316 | bb = single_noncomplex_succ (dom); |
317 | else |
318 | bb = dom; |
319 | |
320 | if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) |
321 | occ->num_divisions += occ_child->num_divisions; |
322 | } |
323 | } |
324 | |
325 | |
326 | /* Return whether USE_STMT is a floating-point division by DEF. */ |
327 | static inline bool |
328 | is_division_by (gimple *use_stmt, tree def) |
329 | { |
330 | return is_gimple_assign (use_stmt) |
331 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR |
332 | && gimple_assign_rhs2 (use_stmt) == def |
333 | /* Do not recognize x / x as valid division, as we are getting |
334 | confused later by replacing all immediate uses x in such |
335 | a stmt. */ |
336 | && gimple_assign_rhs1 (use_stmt) != def; |
337 | } |
338 | |
339 | /* Return whether USE_STMT is DEF * DEF. */ |
340 | static inline bool |
341 | is_square_of (gimple *use_stmt, tree def) |
342 | { |
343 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN |
344 | && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) |
345 | { |
346 | tree op0 = gimple_assign_rhs1 (use_stmt); |
347 | tree op1 = gimple_assign_rhs2 (use_stmt); |
348 | |
349 | return op0 == op1 && op0 == def; |
350 | } |
351 | return 0; |
352 | } |
353 | |
354 | /* Return whether USE_STMT is a floating-point division by |
355 | DEF * DEF. */ |
356 | static inline bool |
357 | is_division_by_square (gimple *use_stmt, tree def) |
358 | { |
359 | if (gimple_code (use_stmt) == GIMPLE_ASSIGN |
360 | && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR |
361 | && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)) |
362 | { |
363 | tree denominator = gimple_assign_rhs2 (use_stmt); |
364 | if (TREE_CODE (denominator) == SSA_NAME) |
365 | { |
366 | return is_square_of (SSA_NAME_DEF_STMT (denominator), def); |
367 | } |
368 | } |
369 | return 0; |
370 | } |
371 | |
372 | /* Walk the subset of the dominator tree rooted at OCC, setting the |
373 | RECIP_DEF field to a definition of 1.0 / DEF that can be used in |
374 | the given basic block. The field may be left NULL, of course, |
375 | if it is not possible or profitable to do the optimization. |
376 | |
377 | DEF_BSI is an iterator pointing at the statement defining DEF. |
378 | If RECIP_DEF is set, a dominator already has a computation that can |
379 | be used. |
380 | |
381 | If should_insert_square_recip is set, then this also inserts |
382 | the square of the reciprocal immediately after the definition |
383 | of the reciprocal. */ |
384 | |
385 | static void |
386 | insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, |
387 | tree def, tree recip_def, tree square_recip_def, |
388 | int should_insert_square_recip, int threshold) |
389 | { |
390 | tree type; |
391 | gassign *new_stmt, *new_square_stmt; |
392 | gimple_stmt_iterator gsi; |
393 | struct occurrence *occ_child; |
394 | |
395 | if (!recip_def |
396 | && (occ->bb_has_division || !flag_trapping_math) |
397 | /* Divide by two as all divisions are counted twice in |
398 | the costing loop. */ |
399 | && occ->num_divisions / 2 >= threshold) |
400 | { |
401 | /* Make a variable with the replacement and substitute it. */ |
402 | type = TREE_TYPE (def); |
403 | recip_def = create_tmp_reg (type, "reciptmp" ); |
404 | new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, |
405 | build_one_cst (type), def); |
406 | |
407 | if (should_insert_square_recip) |
408 | { |
409 | square_recip_def = create_tmp_reg (type, "powmult_reciptmp" ); |
410 | new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, |
411 | recip_def, recip_def); |
412 | } |
413 | |
414 | if (occ->bb_has_division) |
415 | { |
416 | /* Case 1: insert before an existing division. */ |
417 | gsi = gsi_after_labels (occ->bb); |
418 | while (!gsi_end_p (gsi) |
419 | && (!is_division_by (gsi_stmt (gsi), def)) |
420 | && (!is_division_by_square (gsi_stmt (gsi), def))) |
421 | gsi_next (&gsi); |
422 | |
423 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
424 | } |
425 | else if (def_gsi && occ->bb == def_gsi->bb) |
426 | { |
427 | /* Case 2: insert right after the definition. Note that this will |
428 | never happen if the definition statement can throw, because in |
429 | that case the sole successor of the statement's basic block will |
430 | dominate all the uses as well. */ |
431 | gsi = *def_gsi; |
432 | gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); |
433 | } |
434 | else |
435 | { |
436 | /* Case 3: insert in a basic block not containing defs/uses. */ |
437 | gsi = gsi_after_labels (occ->bb); |
438 | gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); |
439 | } |
440 | |
441 | /* Regardless of which case the reciprocal as inserted in, |
442 | we insert the square immediately after the reciprocal. */ |
443 | if (should_insert_square_recip) |
444 | gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); |
445 | |
446 | reciprocal_stats.rdivs_inserted++; |
447 | |
448 | occ->recip_def_stmt = new_stmt; |
449 | } |
450 | |
451 | occ->recip_def = recip_def; |
452 | occ->square_recip_def = square_recip_def; |
453 | for (occ_child = occ->children; occ_child; occ_child = occ_child->next) |
454 | insert_reciprocals (def_gsi, occ_child, def, recip_def, |
455 | square_recip_def, should_insert_square_recip, |
456 | threshold); |
457 | } |
458 | |
459 | /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). |
460 | Take as argument the use for (x * x). */ |
461 | static inline void |
462 | replace_reciprocal_squares (use_operand_p use_p) |
463 | { |
464 | gimple *use_stmt = USE_STMT (use_p); |
465 | basic_block bb = gimple_bb (use_stmt); |
466 | struct occurrence *occ = (struct occurrence *) bb->aux; |
467 | |
468 | if (optimize_bb_for_speed_p (bb) && occ->square_recip_def |
469 | && occ->recip_def) |
470 | { |
471 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
472 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
473 | gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def); |
474 | SET_USE (use_p, occ->square_recip_def); |
475 | fold_stmt_inplace (&gsi); |
476 | update_stmt (use_stmt); |
477 | } |
478 | } |
479 | |
480 | |
481 | /* Replace the division at USE_P with a multiplication by the reciprocal, if |
482 | possible. */ |
483 | |
484 | static inline void |
485 | replace_reciprocal (use_operand_p use_p) |
486 | { |
487 | gimple *use_stmt = USE_STMT (use_p); |
488 | basic_block bb = gimple_bb (use_stmt); |
489 | struct occurrence *occ = (struct occurrence *) bb->aux; |
490 | |
491 | if (optimize_bb_for_speed_p (bb) |
492 | && occ->recip_def && use_stmt != occ->recip_def_stmt) |
493 | { |
494 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
495 | gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); |
496 | SET_USE (use_p, occ->recip_def); |
497 | fold_stmt_inplace (&gsi); |
498 | update_stmt (use_stmt); |
499 | } |
500 | } |
501 | |
502 | |
503 | /* Free OCC and return one more "struct occurrence" to be freed. */ |
504 | |
505 | static struct occurrence * |
506 | free_bb (struct occurrence *occ) |
507 | { |
508 | struct occurrence *child, *next; |
509 | |
510 | /* First get the two pointers hanging off OCC. */ |
511 | next = occ->next; |
512 | child = occ->children; |
513 | occ->bb->aux = NULL; |
514 | occ_pool->remove (occ); |
515 | |
516 | /* Now ensure that we don't recurse unless it is necessary. */ |
517 | if (!child) |
518 | return next; |
519 | else |
520 | { |
521 | while (next) |
522 | next = free_bb (next); |
523 | |
524 | return child; |
525 | } |
526 | } |
527 | |
528 | |
529 | /* Look for floating-point divisions among DEF's uses, and try to |
530 | replace them by multiplications with the reciprocal. Add |
531 | as many statements computing the reciprocal as needed. |
532 | |
533 | DEF must be a GIMPLE register of a floating-point type. */ |
534 | |
535 | static void |
536 | execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) |
537 | { |
538 | use_operand_p use_p, square_use_p; |
539 | imm_use_iterator use_iter, square_use_iter; |
540 | tree square_def; |
541 | struct occurrence *occ; |
542 | int count = 0; |
543 | int threshold; |
544 | int square_recip_count = 0; |
545 | int sqrt_recip_count = 0; |
546 | |
547 | gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); |
548 | threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); |
549 | |
550 | /* If this is a square (x * x), we should check whether there are any |
551 | enough divisions by x on it's own to warrant waiting for that pass. */ |
552 | if (TREE_CODE (def) == SSA_NAME) |
553 | { |
554 | gimple *def_stmt = SSA_NAME_DEF_STMT (def); |
555 | |
556 | if (is_gimple_assign (def_stmt) |
557 | && gimple_assign_rhs_code (def_stmt) == MULT_EXPR |
558 | && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt)) |
559 | { |
560 | /* This statement is a square of something. We should take this |
561 | in to account, as it may be more profitable to not extract |
562 | the reciprocal here. */ |
563 | tree op0 = gimple_assign_rhs1 (def_stmt); |
564 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) |
565 | { |
566 | gimple *use_stmt = USE_STMT (use_p); |
567 | if (is_division_by (use_stmt, op0)) |
568 | sqrt_recip_count ++; |
569 | } |
570 | } |
571 | } |
572 | |
573 | FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) |
574 | { |
575 | gimple *use_stmt = USE_STMT (use_p); |
576 | if (is_division_by (use_stmt, def)) |
577 | { |
578 | register_division_in (gimple_bb (use_stmt), 2); |
579 | count++; |
580 | } |
581 | |
582 | if (is_square_of (use_stmt, def)) |
583 | { |
584 | square_def = gimple_assign_lhs (use_stmt); |
585 | FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) |
586 | { |
587 | gimple *square_use_stmt = USE_STMT (square_use_p); |
588 | if (is_division_by (square_use_stmt, square_def)) |
589 | { |
590 | /* Halve the relative importance as this is called twice |
591 | for each division by a square. */ |
592 | register_division_in (gimple_bb (square_use_stmt), 1); |
593 | square_recip_count ++; |
594 | } |
595 | } |
596 | } |
597 | } |
598 | |
599 | /* Square reciprocals will have been counted twice. */ |
600 | square_recip_count /= 2; |
601 | |
602 | if (sqrt_recip_count > square_recip_count) |
603 | /* It will be more profitable to extract a 1 / x expression later, |
604 | so it is not worth attempting to extract 1 / (x * x) now. */ |
605 | return; |
606 | |
607 | /* Do the expensive part only if we can hope to optimize something. */ |
608 | if (count + square_recip_count >= threshold |
609 | && count >= 1) |
610 | { |
611 | gimple *use_stmt; |
612 | for (occ = occ_head; occ; occ = occ->next) |
613 | { |
614 | compute_merit (occ); |
615 | insert_reciprocals (def_gsi, occ, def, NULL, NULL, |
616 | square_recip_count, threshold); |
617 | } |
618 | |
619 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) |
620 | { |
621 | if (is_division_by (use_stmt, def)) |
622 | { |
623 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
624 | replace_reciprocal (use_p); |
625 | } |
626 | else if (square_recip_count > 0 |
627 | && is_square_of (use_stmt, def)) |
628 | { |
629 | FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) |
630 | { |
631 | /* Find all uses of the square that are divisions and |
632 | * replace them by multiplications with the inverse. */ |
633 | imm_use_iterator square_iterator; |
634 | gimple *powmult_use_stmt = USE_STMT (use_p); |
635 | tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt); |
636 | |
637 | FOR_EACH_IMM_USE_STMT (powmult_use_stmt, |
638 | square_iterator, powmult_def_name) |
639 | FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) |
640 | { |
641 | gimple *powmult_use_stmt = USE_STMT (square_use_p); |
642 | if (is_division_by (powmult_use_stmt, powmult_def_name)) |
643 | replace_reciprocal_squares (square_use_p); |
644 | } |
645 | } |
646 | } |
647 | } |
648 | } |
649 | |
650 | for (occ = occ_head; occ; ) |
651 | occ = free_bb (occ); |
652 | |
653 | occ_head = NULL; |
654 | } |
655 | |
656 | /* Return an internal function that implements the reciprocal of CALL, |
657 | or IFN_LAST if there is no such function that the target supports. */ |
658 | |
659 | internal_fn |
660 | internal_fn_reciprocal (gcall *call) |
661 | { |
662 | internal_fn ifn; |
663 | |
664 | switch (gimple_call_combined_fn (call)) |
665 | { |
666 | CASE_CFN_SQRT: |
667 | CASE_CFN_SQRT_FN: |
668 | ifn = IFN_RSQRT; |
669 | break; |
670 | |
671 | default: |
672 | return IFN_LAST; |
673 | } |
674 | |
675 | tree_pair types = direct_internal_fn_types (ifn, call); |
676 | if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) |
677 | return IFN_LAST; |
678 | |
679 | return ifn; |
680 | } |
681 | |
682 | /* Go through all the floating-point SSA_NAMEs, and call |
683 | execute_cse_reciprocals_1 on each of them. */ |
684 | namespace { |
685 | |
686 | const pass_data pass_data_cse_reciprocals = |
687 | { |
688 | GIMPLE_PASS, /* type */ |
689 | "recip" , /* name */ |
690 | OPTGROUP_NONE, /* optinfo_flags */ |
691 | TV_TREE_RECIP, /* tv_id */ |
692 | PROP_ssa, /* properties_required */ |
693 | 0, /* properties_provided */ |
694 | 0, /* properties_destroyed */ |
695 | 0, /* todo_flags_start */ |
696 | TODO_update_ssa, /* todo_flags_finish */ |
697 | }; |
698 | |
699 | class pass_cse_reciprocals : public gimple_opt_pass |
700 | { |
701 | public: |
702 | pass_cse_reciprocals (gcc::context *ctxt) |
703 | : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) |
704 | {} |
705 | |
706 | /* opt_pass methods: */ |
707 | virtual bool gate (function *) { return optimize && flag_reciprocal_math; } |
708 | virtual unsigned int execute (function *); |
709 | |
710 | }; // class pass_cse_reciprocals |
711 | |
712 | unsigned int |
713 | pass_cse_reciprocals::execute (function *fun) |
714 | { |
715 | basic_block bb; |
716 | tree arg; |
717 | |
718 | occ_pool = new object_allocator<occurrence> ("dominators for recip" ); |
719 | |
720 | memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); |
721 | calculate_dominance_info (CDI_DOMINATORS); |
722 | calculate_dominance_info (CDI_POST_DOMINATORS); |
723 | |
724 | if (flag_checking) |
725 | FOR_EACH_BB_FN (bb, fun) |
726 | gcc_assert (!bb->aux); |
727 | |
728 | for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) |
729 | if (FLOAT_TYPE_P (TREE_TYPE (arg)) |
730 | && is_gimple_reg (arg)) |
731 | { |
732 | tree name = ssa_default_def (fun, arg); |
733 | if (name) |
734 | execute_cse_reciprocals_1 (NULL, name); |
735 | } |
736 | |
737 | FOR_EACH_BB_FN (bb, fun) |
738 | { |
739 | tree def; |
740 | |
741 | for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
742 | gsi_next (&gsi)) |
743 | { |
744 | gphi *phi = gsi.phi (); |
745 | def = PHI_RESULT (phi); |
746 | if (! virtual_operand_p (def) |
747 | && FLOAT_TYPE_P (TREE_TYPE (def))) |
748 | execute_cse_reciprocals_1 (NULL, def); |
749 | } |
750 | |
751 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
752 | gsi_next (&gsi)) |
753 | { |
754 | gimple *stmt = gsi_stmt (gsi); |
755 | |
756 | if (gimple_has_lhs (stmt) |
757 | && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL |
758 | && FLOAT_TYPE_P (TREE_TYPE (def)) |
759 | && TREE_CODE (def) == SSA_NAME) |
760 | execute_cse_reciprocals_1 (&gsi, def); |
761 | } |
762 | |
763 | if (optimize_bb_for_size_p (bb)) |
764 | continue; |
765 | |
766 | /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ |
767 | for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); |
768 | gsi_next (&gsi)) |
769 | { |
770 | gimple *stmt = gsi_stmt (gsi); |
771 | |
772 | if (is_gimple_assign (stmt) |
773 | && gimple_assign_rhs_code (stmt) == RDIV_EXPR) |
774 | { |
775 | tree arg1 = gimple_assign_rhs2 (stmt); |
776 | gimple *stmt1; |
777 | |
778 | if (TREE_CODE (arg1) != SSA_NAME) |
779 | continue; |
780 | |
781 | stmt1 = SSA_NAME_DEF_STMT (arg1); |
782 | |
783 | if (is_gimple_call (stmt1) |
784 | && gimple_call_lhs (stmt1)) |
785 | { |
786 | bool fail; |
787 | imm_use_iterator ui; |
788 | use_operand_p use_p; |
789 | tree fndecl = NULL_TREE; |
790 | |
791 | gcall *call = as_a <gcall *> (stmt1); |
792 | internal_fn ifn = internal_fn_reciprocal (call); |
793 | if (ifn == IFN_LAST) |
794 | { |
795 | fndecl = gimple_call_fndecl (call); |
796 | if (!fndecl |
797 | || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD) |
798 | continue; |
799 | fndecl = targetm.builtin_reciprocal (fndecl); |
800 | if (!fndecl) |
801 | continue; |
802 | } |
803 | |
804 | /* Check that all uses of the SSA name are divisions, |
805 | otherwise replacing the defining statement will do |
806 | the wrong thing. */ |
807 | fail = false; |
808 | FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) |
809 | { |
810 | gimple *stmt2 = USE_STMT (use_p); |
811 | if (is_gimple_debug (stmt2)) |
812 | continue; |
813 | if (!is_gimple_assign (stmt2) |
814 | || gimple_assign_rhs_code (stmt2) != RDIV_EXPR |
815 | || gimple_assign_rhs1 (stmt2) == arg1 |
816 | || gimple_assign_rhs2 (stmt2) != arg1) |
817 | { |
818 | fail = true; |
819 | break; |
820 | } |
821 | } |
822 | if (fail) |
823 | continue; |
824 | |
825 | gimple_replace_ssa_lhs (call, arg1); |
826 | if (gimple_call_internal_p (call) != (ifn != IFN_LAST)) |
827 | { |
828 | auto_vec<tree, 4> args; |
829 | for (unsigned int i = 0; |
830 | i < gimple_call_num_args (call); i++) |
831 | args.safe_push (gimple_call_arg (call, i)); |
832 | gcall *stmt2; |
833 | if (ifn == IFN_LAST) |
834 | stmt2 = gimple_build_call_vec (fndecl, args); |
835 | else |
836 | stmt2 = gimple_build_call_internal_vec (ifn, args); |
837 | gimple_call_set_lhs (stmt2, arg1); |
838 | if (gimple_vdef (call)) |
839 | { |
840 | gimple_set_vdef (stmt2, gimple_vdef (call)); |
841 | SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; |
842 | } |
843 | gimple_call_set_nothrow (stmt2, |
844 | gimple_call_nothrow_p (call)); |
845 | gimple_set_vuse (stmt2, gimple_vuse (call)); |
846 | gimple_stmt_iterator gsi2 = gsi_for_stmt (call); |
847 | gsi_replace (&gsi2, stmt2, true); |
848 | } |
849 | else |
850 | { |
851 | if (ifn == IFN_LAST) |
852 | gimple_call_set_fndecl (call, fndecl); |
853 | else |
854 | gimple_call_set_internal_fn (call, ifn); |
855 | update_stmt (call); |
856 | } |
857 | reciprocal_stats.rfuncs_inserted++; |
858 | |
859 | FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) |
860 | { |
861 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
862 | gimple_assign_set_rhs_code (stmt, MULT_EXPR); |
863 | fold_stmt_inplace (&gsi); |
864 | update_stmt (stmt); |
865 | } |
866 | } |
867 | } |
868 | } |
869 | } |
870 | |
871 | statistics_counter_event (fun, "reciprocal divs inserted" , |
872 | reciprocal_stats.rdivs_inserted); |
873 | statistics_counter_event (fun, "reciprocal functions inserted" , |
874 | reciprocal_stats.rfuncs_inserted); |
875 | |
876 | free_dominance_info (CDI_DOMINATORS); |
877 | free_dominance_info (CDI_POST_DOMINATORS); |
878 | delete occ_pool; |
879 | return 0; |
880 | } |
881 | |
882 | } // anon namespace |
883 | |
884 | gimple_opt_pass * |
885 | make_pass_cse_reciprocals (gcc::context *ctxt) |
886 | { |
887 | return new pass_cse_reciprocals (ctxt); |
888 | } |
889 | |
890 | /* Records an occurrence at statement USE_STMT in the vector of trees |
891 | STMTS if it is dominated by *TOP_BB or dominates it or this basic block |
892 | is not yet initialized. Returns true if the occurrence was pushed on |
893 | the vector. Adjusts *TOP_BB to be the basic block dominating all |
894 | statements in the vector. */ |
895 | |
896 | static bool |
897 | maybe_record_sincos (vec<gimple *> *stmts, |
898 | basic_block *top_bb, gimple *use_stmt) |
899 | { |
900 | basic_block use_bb = gimple_bb (use_stmt); |
901 | if (*top_bb |
902 | && (*top_bb == use_bb |
903 | || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) |
904 | stmts->safe_push (use_stmt); |
905 | else if (!*top_bb |
906 | || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) |
907 | { |
908 | stmts->safe_push (use_stmt); |
909 | *top_bb = use_bb; |
910 | } |
911 | else |
912 | return false; |
913 | |
914 | return true; |
915 | } |
916 | |
917 | /* Look for sin, cos and cexpi calls with the same argument NAME and |
918 | create a single call to cexpi CSEing the result in this case. |
919 | We first walk over all immediate uses of the argument collecting |
920 | statements that we can CSE in a vector and in a second pass replace |
921 | the statement rhs with a REALPART or IMAGPART expression on the |
922 | result of the cexpi call we insert before the use statement that |
923 | dominates all other candidates. */ |
924 | |
925 | static bool |
926 | execute_cse_sincos_1 (tree name) |
927 | { |
928 | gimple_stmt_iterator gsi; |
929 | imm_use_iterator use_iter; |
930 | tree fndecl, res, type; |
931 | gimple *def_stmt, *use_stmt, *stmt; |
932 | int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; |
933 | auto_vec<gimple *> stmts; |
934 | basic_block top_bb = NULL; |
935 | int i; |
936 | bool cfg_changed = false; |
937 | |
938 | type = TREE_TYPE (name); |
939 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) |
940 | { |
941 | if (gimple_code (use_stmt) != GIMPLE_CALL |
942 | || !gimple_call_lhs (use_stmt)) |
943 | continue; |
944 | |
945 | switch (gimple_call_combined_fn (use_stmt)) |
946 | { |
947 | CASE_CFN_COS: |
948 | seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
949 | break; |
950 | |
951 | CASE_CFN_SIN: |
952 | seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
953 | break; |
954 | |
955 | CASE_CFN_CEXPI: |
956 | seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; |
957 | break; |
958 | |
959 | default:; |
960 | } |
961 | } |
962 | |
963 | if (seen_cos + seen_sin + seen_cexpi <= 1) |
964 | return false; |
965 | |
966 | /* Simply insert cexpi at the beginning of top_bb but not earlier than |
967 | the name def statement. */ |
968 | fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); |
969 | if (!fndecl) |
970 | return false; |
971 | stmt = gimple_build_call (fndecl, 1, name); |
972 | res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp" ); |
973 | gimple_call_set_lhs (stmt, res); |
974 | |
975 | def_stmt = SSA_NAME_DEF_STMT (name); |
976 | if (!SSA_NAME_IS_DEFAULT_DEF (name) |
977 | && gimple_code (def_stmt) != GIMPLE_PHI |
978 | && gimple_bb (def_stmt) == top_bb) |
979 | { |
980 | gsi = gsi_for_stmt (def_stmt); |
981 | gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
982 | } |
983 | else |
984 | { |
985 | gsi = gsi_after_labels (top_bb); |
986 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
987 | } |
988 | sincos_stats.inserted++; |
989 | |
990 | /* And adjust the recorded old call sites. */ |
991 | for (i = 0; stmts.iterate (i, &use_stmt); ++i) |
992 | { |
993 | tree rhs = NULL; |
994 | |
995 | switch (gimple_call_combined_fn (use_stmt)) |
996 | { |
997 | CASE_CFN_COS: |
998 | rhs = fold_build1 (REALPART_EXPR, type, res); |
999 | break; |
1000 | |
1001 | CASE_CFN_SIN: |
1002 | rhs = fold_build1 (IMAGPART_EXPR, type, res); |
1003 | break; |
1004 | |
1005 | CASE_CFN_CEXPI: |
1006 | rhs = res; |
1007 | break; |
1008 | |
1009 | default:; |
1010 | gcc_unreachable (); |
1011 | } |
1012 | |
1013 | /* Replace call with a copy. */ |
1014 | stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); |
1015 | |
1016 | gsi = gsi_for_stmt (use_stmt); |
1017 | gsi_replace (&gsi, stmt, true); |
1018 | if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) |
1019 | cfg_changed = true; |
1020 | } |
1021 | |
1022 | return cfg_changed; |
1023 | } |
1024 | |
1025 | /* To evaluate powi(x,n), the floating point value x raised to the |
1026 | constant integer exponent n, we use a hybrid algorithm that |
1027 | combines the "window method" with look-up tables. For an |
1028 | introduction to exponentiation algorithms and "addition chains", |
1029 | see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, |
1030 | "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", |
1031 | 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation |
1032 | Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ |
1033 | |
1034 | /* Provide a default value for POWI_MAX_MULTS, the maximum number of |
1035 | multiplications to inline before calling the system library's pow |
1036 | function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, |
1037 | so this default never requires calling pow, powf or powl. */ |
1038 | |
1039 | #ifndef POWI_MAX_MULTS |
1040 | #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) |
1041 | #endif |
1042 | |
1043 | /* The size of the "optimal power tree" lookup table. All |
1044 | exponents less than this value are simply looked up in the |
1045 | powi_table below. This threshold is also used to size the |
1046 | cache of pseudo registers that hold intermediate results. */ |
1047 | #define POWI_TABLE_SIZE 256 |
1048 | |
1049 | /* The size, in bits of the window, used in the "window method" |
1050 | exponentiation algorithm. This is equivalent to a radix of |
1051 | (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ |
1052 | #define POWI_WINDOW_SIZE 3 |
1053 | |
1054 | /* The following table is an efficient representation of an |
1055 | "optimal power tree". For each value, i, the corresponding |
1056 | value, j, in the table states than an optimal evaluation |
1057 | sequence for calculating pow(x,i) can be found by evaluating |
1058 | pow(x,j)*pow(x,i-j). An optimal power tree for the first |
1059 | 100 integers is given in Knuth's "Seminumerical algorithms". */ |
1060 | |
1061 | static const unsigned char powi_table[POWI_TABLE_SIZE] = |
1062 | { |
1063 | 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ |
1064 | 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ |
1065 | 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ |
1066 | 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ |
1067 | 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ |
1068 | 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ |
1069 | 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ |
1070 | 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ |
1071 | 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ |
1072 | 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ |
1073 | 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ |
1074 | 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ |
1075 | 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ |
1076 | 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ |
1077 | 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ |
1078 | 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ |
1079 | 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ |
1080 | 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ |
1081 | 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ |
1082 | 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ |
1083 | 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ |
1084 | 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ |
1085 | 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ |
1086 | 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ |
1087 | 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ |
1088 | 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ |
1089 | 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ |
1090 | 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ |
1091 | 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ |
1092 | 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ |
1093 | 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ |
1094 | 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ |
1095 | }; |
1096 | |
1097 | |
1098 | /* Return the number of multiplications required to calculate |
1099 | powi(x,n) where n is less than POWI_TABLE_SIZE. This is a |
1100 | subroutine of powi_cost. CACHE is an array indicating |
1101 | which exponents have already been calculated. */ |
1102 | |
1103 | static int |
1104 | powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) |
1105 | { |
1106 | /* If we've already calculated this exponent, then this evaluation |
1107 | doesn't require any additional multiplications. */ |
1108 | if (cache[n]) |
1109 | return 0; |
1110 | |
1111 | cache[n] = true; |
1112 | return powi_lookup_cost (n - powi_table[n], cache) |
1113 | + powi_lookup_cost (powi_table[n], cache) + 1; |
1114 | } |
1115 | |
1116 | /* Return the number of multiplications required to calculate |
1117 | powi(x,n) for an arbitrary x, given the exponent N. This |
1118 | function needs to be kept in sync with powi_as_mults below. */ |
1119 | |
1120 | static int |
1121 | powi_cost (HOST_WIDE_INT n) |
1122 | { |
1123 | bool cache[POWI_TABLE_SIZE]; |
1124 | unsigned HOST_WIDE_INT digit; |
1125 | unsigned HOST_WIDE_INT val; |
1126 | int result; |
1127 | |
1128 | if (n == 0) |
1129 | return 0; |
1130 | |
1131 | /* Ignore the reciprocal when calculating the cost. */ |
1132 | val = (n < 0) ? -n : n; |
1133 | |
1134 | /* Initialize the exponent cache. */ |
1135 | memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); |
1136 | cache[1] = true; |
1137 | |
1138 | result = 0; |
1139 | |
1140 | while (val >= POWI_TABLE_SIZE) |
1141 | { |
1142 | if (val & 1) |
1143 | { |
1144 | digit = val & ((1 << POWI_WINDOW_SIZE) - 1); |
1145 | result += powi_lookup_cost (digit, cache) |
1146 | + POWI_WINDOW_SIZE + 1; |
1147 | val >>= POWI_WINDOW_SIZE; |
1148 | } |
1149 | else |
1150 | { |
1151 | val >>= 1; |
1152 | result++; |
1153 | } |
1154 | } |
1155 | |
1156 | return result + powi_lookup_cost (val, cache); |
1157 | } |
1158 | |
1159 | /* Recursive subroutine of powi_as_mults. This function takes the |
1160 | array, CACHE, of already calculated exponents and an exponent N and |
1161 | returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ |
1162 | |
1163 | static tree |
1164 | powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, |
1165 | HOST_WIDE_INT n, tree *cache) |
1166 | { |
1167 | tree op0, op1, ssa_target; |
1168 | unsigned HOST_WIDE_INT digit; |
1169 | gassign *mult_stmt; |
1170 | |
1171 | if (n < POWI_TABLE_SIZE && cache[n]) |
1172 | return cache[n]; |
1173 | |
1174 | ssa_target = make_temp_ssa_name (type, NULL, "powmult" ); |
1175 | |
1176 | if (n < POWI_TABLE_SIZE) |
1177 | { |
1178 | cache[n] = ssa_target; |
1179 | op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); |
1180 | op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); |
1181 | } |
1182 | else if (n & 1) |
1183 | { |
1184 | digit = n & ((1 << POWI_WINDOW_SIZE) - 1); |
1185 | op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); |
1186 | op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); |
1187 | } |
1188 | else |
1189 | { |
1190 | op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); |
1191 | op1 = op0; |
1192 | } |
1193 | |
1194 | mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); |
1195 | gimple_set_location (mult_stmt, loc); |
1196 | gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); |
1197 | |
1198 | return ssa_target; |
1199 | } |
1200 | |
1201 | /* Convert ARG0**N to a tree of multiplications of ARG0 with itself. |
1202 | This function needs to be kept in sync with powi_cost above. */ |
1203 | |
1204 | static tree |
1205 | powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, |
1206 | tree arg0, HOST_WIDE_INT n) |
1207 | { |
1208 | tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); |
1209 | gassign *div_stmt; |
1210 | tree target; |
1211 | |
1212 | if (n == 0) |
1213 | return build_real (type, dconst1); |
1214 | |
1215 | memset (cache, 0, sizeof (cache)); |
1216 | cache[1] = arg0; |
1217 | |
1218 | result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache); |
1219 | if (n >= 0) |
1220 | return result; |
1221 | |
1222 | /* If the original exponent was negative, reciprocate the result. */ |
1223 | target = make_temp_ssa_name (type, NULL, "powmult" ); |
1224 | div_stmt = gimple_build_assign (target, RDIV_EXPR, |
1225 | build_real (type, dconst1), result); |
1226 | gimple_set_location (div_stmt, loc); |
1227 | gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); |
1228 | |
1229 | return target; |
1230 | } |
1231 | |
1232 | /* ARG0 and N are the two arguments to a powi builtin in GSI with |
1233 | location info LOC. If the arguments are appropriate, create an |
1234 | equivalent sequence of statements prior to GSI using an optimal |
1235 | number of multiplications, and return an expession holding the |
1236 | result. */ |
1237 | |
1238 | static tree |
1239 | gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, |
1240 | tree arg0, HOST_WIDE_INT n) |
1241 | { |
1242 | /* Avoid largest negative number. */ |
1243 | if (n != -n |
1244 | && ((n >= -1 && n <= 2) |
1245 | || (optimize_function_for_speed_p (cfun) |
1246 | && powi_cost (n) <= POWI_MAX_MULTS))) |
1247 | return powi_as_mults (gsi, loc, arg0, n); |
1248 | |
1249 | return NULL_TREE; |
1250 | } |
1251 | |
1252 | /* Build a gimple call statement that calls FN with argument ARG. |
1253 | Set the lhs of the call statement to a fresh SSA name. Insert the |
1254 | statement prior to GSI's current position, and return the fresh |
1255 | SSA name. */ |
1256 | |
1257 | static tree |
1258 | build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, |
1259 | tree fn, tree arg) |
1260 | { |
1261 | gcall *call_stmt; |
1262 | tree ssa_target; |
1263 | |
1264 | call_stmt = gimple_build_call (fn, 1, arg); |
1265 | ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot" ); |
1266 | gimple_set_lhs (call_stmt, ssa_target); |
1267 | gimple_set_location (call_stmt, loc); |
1268 | gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); |
1269 | |
1270 | return ssa_target; |
1271 | } |
1272 | |
1273 | /* Build a gimple binary operation with the given CODE and arguments |
1274 | ARG0, ARG1, assigning the result to a new SSA name for variable |
1275 | TARGET. Insert the statement prior to GSI's current position, and |
1276 | return the fresh SSA name.*/ |
1277 | |
1278 | static tree |
1279 | build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, |
1280 | const char *name, enum tree_code code, |
1281 | tree arg0, tree arg1) |
1282 | { |
1283 | tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); |
1284 | gassign *stmt = gimple_build_assign (result, code, arg0, arg1); |
1285 | gimple_set_location (stmt, loc); |
1286 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1287 | return result; |
1288 | } |
1289 | |
1290 | /* Build a gimple reference operation with the given CODE and argument |
1291 | ARG, assigning the result to a new SSA name of TYPE with NAME. |
1292 | Insert the statement prior to GSI's current position, and return |
1293 | the fresh SSA name. */ |
1294 | |
1295 | static inline tree |
1296 | build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, |
1297 | const char *name, enum tree_code code, tree arg0) |
1298 | { |
1299 | tree result = make_temp_ssa_name (type, NULL, name); |
1300 | gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); |
1301 | gimple_set_location (stmt, loc); |
1302 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1303 | return result; |
1304 | } |
1305 | |
1306 | /* Build a gimple assignment to cast VAL to TYPE. Insert the statement |
1307 | prior to GSI's current position, and return the fresh SSA name. */ |
1308 | |
1309 | static tree |
1310 | build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, |
1311 | tree type, tree val) |
1312 | { |
1313 | tree result = make_ssa_name (type); |
1314 | gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); |
1315 | gimple_set_location (stmt, loc); |
1316 | gsi_insert_before (gsi, stmt, GSI_SAME_STMT); |
1317 | return result; |
1318 | } |
1319 | |
1320 | struct pow_synth_sqrt_info |
1321 | { |
1322 | bool *factors; |
1323 | unsigned int deepest; |
1324 | unsigned int num_mults; |
1325 | }; |
1326 | |
1327 | /* Return true iff the real value C can be represented as a |
1328 | sum of powers of 0.5 up to N. That is: |
1329 | C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. |
1330 | Record in INFO the various parameters of the synthesis algorithm such |
1331 | as the factors a[i], the maximum 0.5 power and the number of |
1332 | multiplications that will be required. */ |
1333 | |
1334 | bool |
1335 | representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, |
1336 | struct pow_synth_sqrt_info *info) |
1337 | { |
1338 | REAL_VALUE_TYPE factor = dconsthalf; |
1339 | REAL_VALUE_TYPE remainder = c; |
1340 | |
1341 | info->deepest = 0; |
1342 | info->num_mults = 0; |
1343 | memset (info->factors, 0, n * sizeof (bool)); |
1344 | |
1345 | for (unsigned i = 0; i < n; i++) |
1346 | { |
1347 | REAL_VALUE_TYPE res; |
1348 | |
1349 | /* If something inexact happened bail out now. */ |
1350 | if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) |
1351 | return false; |
1352 | |
1353 | /* We have hit zero. The number is representable as a sum |
1354 | of powers of 0.5. */ |
1355 | if (real_equal (&res, &dconst0)) |
1356 | { |
1357 | info->factors[i] = true; |
1358 | info->deepest = i + 1; |
1359 | return true; |
1360 | } |
1361 | else if (!REAL_VALUE_NEGATIVE (res)) |
1362 | { |
1363 | remainder = res; |
1364 | info->factors[i] = true; |
1365 | info->num_mults++; |
1366 | } |
1367 | else |
1368 | info->factors[i] = false; |
1369 | |
1370 | real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); |
1371 | } |
1372 | return false; |
1373 | } |
1374 | |
1375 | /* Return the tree corresponding to FN being applied |
1376 | to ARG N times at GSI and LOC. |
1377 | Look up previous results from CACHE if need be. |
1378 | cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ |
1379 | |
1380 | static tree |
1381 | get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, |
1382 | tree fn, location_t loc, tree *cache) |
1383 | { |
1384 | tree res = cache[n]; |
1385 | if (!res) |
1386 | { |
1387 | tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); |
1388 | res = build_and_insert_call (gsi, loc, fn, prev); |
1389 | cache[n] = res; |
1390 | } |
1391 | |
1392 | return res; |
1393 | } |
1394 | |
1395 | /* Print to STREAM the repeated application of function FNAME to ARG |
1396 | N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: |
1397 | "foo (foo (x))". */ |
1398 | |
1399 | static void |
1400 | print_nested_fn (FILE* stream, const char *fname, const char* arg, |
1401 | unsigned int n) |
1402 | { |
1403 | if (n == 0) |
1404 | fprintf (stream, "%s" , arg); |
1405 | else |
1406 | { |
1407 | fprintf (stream, "%s (" , fname); |
1408 | print_nested_fn (stream, fname, arg, n - 1); |
1409 | fprintf (stream, ")" ); |
1410 | } |
1411 | } |
1412 | |
1413 | /* Print to STREAM the fractional sequence of sqrt chains |
1414 | applied to ARG, described by INFO. Used for the dump file. */ |
1415 | |
1416 | static void |
1417 | dump_fractional_sqrt_sequence (FILE *stream, const char *arg, |
1418 | struct pow_synth_sqrt_info *info) |
1419 | { |
1420 | for (unsigned int i = 0; i < info->deepest; i++) |
1421 | { |
1422 | bool is_set = info->factors[i]; |
1423 | if (is_set) |
1424 | { |
1425 | print_nested_fn (stream, "sqrt" , arg, i + 1); |
1426 | if (i != info->deepest - 1) |
1427 | fprintf (stream, " * " ); |
1428 | } |
1429 | } |
1430 | } |
1431 | |
1432 | /* Print to STREAM a representation of raising ARG to an integer |
1433 | power N. Used for the dump file. */ |
1434 | |
1435 | static void |
1436 | dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) |
1437 | { |
1438 | if (n > 1) |
1439 | fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")" , arg, n); |
1440 | else if (n == 1) |
1441 | fprintf (stream, "%s" , arg); |
1442 | } |
1443 | |
1444 | /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of |
1445 | square roots. Place at GSI and LOC. Limit the maximum depth |
1446 | of the sqrt chains to MAX_DEPTH. Return the tree holding the |
1447 | result of the expanded sequence or NULL_TREE if the expansion failed. |
1448 | |
1449 | This routine assumes that ARG1 is a real number with a fractional part |
1450 | (the integer exponent case will have been handled earlier in |
1451 | gimple_expand_builtin_pow). |
1452 | |
1453 | For ARG1 > 0.0: |
1454 | * For ARG1 composed of a whole part WHOLE_PART and a fractional part |
1455 | FRAC_PART i.e. WHOLE_PART == floor (ARG1) and |
1456 | FRAC_PART == ARG1 - WHOLE_PART: |
1457 | Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where |
1458 | POW (ARG0, FRAC_PART) is expanded as a product of square root chains |
1459 | if it can be expressed as such, that is if FRAC_PART satisfies: |
1460 | FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) |
1461 | where integer a[i] is either 0 or 1. |
1462 | |
1463 | Example: |
1464 | POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) |
1465 | --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) |
1466 | |
1467 | For ARG1 < 0.0 there are two approaches: |
1468 | * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) |
1469 | is calculated as above. |
1470 | |
1471 | Example: |
1472 | POW (x, -5.625) == 1.0 / POW (x, 5.625) |
1473 | --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) |
1474 | |
1475 | * (B) : WHOLE_PART := - ceil (abs (ARG1)) |
1476 | FRAC_PART := ARG1 - WHOLE_PART |
1477 | and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). |
1478 | Example: |
1479 | POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) |
1480 | --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) |
1481 | |
1482 | For ARG1 < 0.0 we choose between (A) and (B) depending on |
1483 | how many multiplications we'd have to do. |
1484 | So, for the example in (B): POW (x, -5.875), if we were to |
1485 | follow algorithm (A) we would produce: |
1486 | 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) |
1487 | which contains more multiplications than approach (B). |
1488 | |
1489 | Hopefully, this approach will eliminate potentially expensive POW library |
1490 | calls when unsafe floating point math is enabled and allow the compiler to |
1491 | further optimise the multiplies, square roots and divides produced by this |
1492 | function. */ |
1493 | |
1494 | static tree |
1495 | expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, |
1496 | tree arg0, tree arg1, HOST_WIDE_INT max_depth) |
1497 | { |
1498 | tree type = TREE_TYPE (arg0); |
1499 | machine_mode mode = TYPE_MODE (type); |
1500 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |
1501 | bool one_over = true; |
1502 | |
1503 | if (!sqrtfn) |
1504 | return NULL_TREE; |
1505 | |
1506 | if (TREE_CODE (arg1) != REAL_CST) |
1507 | return NULL_TREE; |
1508 | |
1509 | REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); |
1510 | |
1511 | gcc_assert (max_depth > 0); |
1512 | tree *cache = XALLOCAVEC (tree, max_depth + 1); |
1513 | |
1514 | struct pow_synth_sqrt_info synth_info; |
1515 | synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
1516 | synth_info.deepest = 0; |
1517 | synth_info.num_mults = 0; |
1518 | |
1519 | bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); |
1520 | REAL_VALUE_TYPE exp = real_value_abs (&exp_init); |
1521 | |
1522 | /* The whole and fractional parts of exp. */ |
1523 | REAL_VALUE_TYPE whole_part; |
1524 | REAL_VALUE_TYPE frac_part; |
1525 | |
1526 | real_floor (&whole_part, mode, &exp); |
1527 | real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); |
1528 | |
1529 | |
1530 | REAL_VALUE_TYPE ceil_whole = dconst0; |
1531 | REAL_VALUE_TYPE ceil_fract = dconst0; |
1532 | |
1533 | if (neg_exp) |
1534 | { |
1535 | real_ceil (&ceil_whole, mode, &exp); |
1536 | real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); |
1537 | } |
1538 | |
1539 | if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) |
1540 | return NULL_TREE; |
1541 | |
1542 | /* Check whether it's more profitable to not use 1.0 / ... */ |
1543 | if (neg_exp) |
1544 | { |
1545 | struct pow_synth_sqrt_info alt_synth_info; |
1546 | alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); |
1547 | alt_synth_info.deepest = 0; |
1548 | alt_synth_info.num_mults = 0; |
1549 | |
1550 | if (representable_as_half_series_p (ceil_fract, max_depth, |
1551 | &alt_synth_info) |
1552 | && alt_synth_info.deepest <= synth_info.deepest |
1553 | && alt_synth_info.num_mults < synth_info.num_mults) |
1554 | { |
1555 | whole_part = ceil_whole; |
1556 | frac_part = ceil_fract; |
1557 | synth_info.deepest = alt_synth_info.deepest; |
1558 | synth_info.num_mults = alt_synth_info.num_mults; |
1559 | memcpy (synth_info.factors, alt_synth_info.factors, |
1560 | (max_depth + 1) * sizeof (bool)); |
1561 | one_over = false; |
1562 | } |
1563 | } |
1564 | |
1565 | HOST_WIDE_INT n = real_to_integer (&whole_part); |
1566 | REAL_VALUE_TYPE cint; |
1567 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
1568 | |
1569 | if (!real_identical (&whole_part, &cint)) |
1570 | return NULL_TREE; |
1571 | |
1572 | if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) |
1573 | return NULL_TREE; |
1574 | |
1575 | memset (cache, 0, (max_depth + 1) * sizeof (tree)); |
1576 | |
1577 | tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; |
1578 | |
1579 | /* Calculate the integer part of the exponent. */ |
1580 | if (n > 1) |
1581 | { |
1582 | integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); |
1583 | if (!integer_res) |
1584 | return NULL_TREE; |
1585 | } |
1586 | |
1587 | if (dump_file) |
1588 | { |
1589 | char string[64]; |
1590 | |
1591 | real_to_decimal (string, &exp_init, sizeof (string), 0, 1); |
1592 | fprintf (dump_file, "synthesizing pow (x, %s) as:\n" , string); |
1593 | |
1594 | if (neg_exp) |
1595 | { |
1596 | if (one_over) |
1597 | { |
1598 | fprintf (dump_file, "1.0 / (" ); |
1599 | dump_integer_part (dump_file, "x" , n); |
1600 | if (n > 0) |
1601 | fprintf (dump_file, " * " ); |
1602 | dump_fractional_sqrt_sequence (dump_file, "x" , &synth_info); |
1603 | fprintf (dump_file, ")" ); |
1604 | } |
1605 | else |
1606 | { |
1607 | dump_fractional_sqrt_sequence (dump_file, "x" , &synth_info); |
1608 | fprintf (dump_file, " / (" ); |
1609 | dump_integer_part (dump_file, "x" , n); |
1610 | fprintf (dump_file, ")" ); |
1611 | } |
1612 | } |
1613 | else |
1614 | { |
1615 | dump_fractional_sqrt_sequence (dump_file, "x" , &synth_info); |
1616 | if (n > 0) |
1617 | fprintf (dump_file, " * " ); |
1618 | dump_integer_part (dump_file, "x" , n); |
1619 | } |
1620 | |
1621 | fprintf (dump_file, "\ndeepest sqrt chain: %d\n" , synth_info.deepest); |
1622 | } |
1623 | |
1624 | |
1625 | tree fract_res = NULL_TREE; |
1626 | cache[0] = arg0; |
1627 | |
1628 | /* Calculate the fractional part of the exponent. */ |
1629 | for (unsigned i = 0; i < synth_info.deepest; i++) |
1630 | { |
1631 | if (synth_info.factors[i]) |
1632 | { |
1633 | tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); |
1634 | |
1635 | if (!fract_res) |
1636 | fract_res = sqrt_chain; |
1637 | |
1638 | else |
1639 | fract_res = build_and_insert_binop (gsi, loc, "powroot" , MULT_EXPR, |
1640 | fract_res, sqrt_chain); |
1641 | } |
1642 | } |
1643 | |
1644 | tree res = NULL_TREE; |
1645 | |
1646 | if (neg_exp) |
1647 | { |
1648 | if (one_over) |
1649 | { |
1650 | if (n > 0) |
1651 | res = build_and_insert_binop (gsi, loc, "powroot" , MULT_EXPR, |
1652 | fract_res, integer_res); |
1653 | else |
1654 | res = fract_res; |
1655 | |
1656 | res = build_and_insert_binop (gsi, loc, "powrootrecip" , RDIV_EXPR, |
1657 | build_real (type, dconst1), res); |
1658 | } |
1659 | else |
1660 | { |
1661 | res = build_and_insert_binop (gsi, loc, "powroot" , RDIV_EXPR, |
1662 | fract_res, integer_res); |
1663 | } |
1664 | } |
1665 | else |
1666 | res = build_and_insert_binop (gsi, loc, "powroot" , MULT_EXPR, |
1667 | fract_res, integer_res); |
1668 | return res; |
1669 | } |
1670 | |
1671 | /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI |
1672 | with location info LOC. If possible, create an equivalent and |
1673 | less expensive sequence of statements prior to GSI, and return an |
1674 | expession holding the result. */ |
1675 | |
1676 | static tree |
1677 | gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, |
1678 | tree arg0, tree arg1) |
1679 | { |
1680 | REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; |
1681 | REAL_VALUE_TYPE c2, dconst3; |
1682 | HOST_WIDE_INT n; |
1683 | tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; |
1684 | machine_mode mode; |
1685 | bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); |
1686 | bool hw_sqrt_exists, c_is_int, c2_is_int; |
1687 | |
1688 | dconst1_4 = dconst1; |
1689 | SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); |
1690 | |
1691 | /* If the exponent isn't a constant, there's nothing of interest |
1692 | to be done. */ |
1693 | if (TREE_CODE (arg1) != REAL_CST) |
1694 | return NULL_TREE; |
1695 | |
1696 | /* Don't perform the operation if flag_signaling_nans is on |
1697 | and the operand is a signaling NaN. */ |
1698 | if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) |
1699 | && ((TREE_CODE (arg0) == REAL_CST |
1700 | && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) |
1701 | || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) |
1702 | return NULL_TREE; |
1703 | |
1704 | /* If the exponent is equivalent to an integer, expand to an optimal |
1705 | multiplication sequence when profitable. */ |
1706 | c = TREE_REAL_CST (arg1); |
1707 | n = real_to_integer (&c); |
1708 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
1709 | c_is_int = real_identical (&c, &cint); |
1710 | |
1711 | if (c_is_int |
1712 | && ((n >= -1 && n <= 2) |
1713 | || (flag_unsafe_math_optimizations |
1714 | && speed_p |
1715 | && powi_cost (n) <= POWI_MAX_MULTS))) |
1716 | return gimple_expand_builtin_powi (gsi, loc, arg0, n); |
1717 | |
1718 | /* Attempt various optimizations using sqrt and cbrt. */ |
1719 | type = TREE_TYPE (arg0); |
1720 | mode = TYPE_MODE (type); |
1721 | sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |
1722 | |
1723 | /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe |
1724 | unless signed zeros must be maintained. pow(-0,0.5) = +0, while |
1725 | sqrt(-0) = -0. */ |
1726 | if (sqrtfn |
1727 | && real_equal (&c, &dconsthalf) |
1728 | && !HONOR_SIGNED_ZEROS (mode)) |
1729 | return build_and_insert_call (gsi, loc, sqrtfn, arg0); |
1730 | |
1731 | hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; |
1732 | |
1733 | /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math |
1734 | optimizations since 1./3. is not exactly representable. If x |
1735 | is negative and finite, the correct value of pow(x,1./3.) is |
1736 | a NaN with the "invalid" exception raised, because the value |
1737 | of 1./3. actually has an even denominator. The correct value |
1738 | of cbrt(x) is a negative real value. */ |
1739 | cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); |
1740 | dconst1_3 = real_value_truncate (mode, dconst_third ()); |
1741 | |
1742 | if (flag_unsafe_math_optimizations |
1743 | && cbrtfn |
1744 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
1745 | && real_equal (&c, &dconst1_3)) |
1746 | return build_and_insert_call (gsi, loc, cbrtfn, arg0); |
1747 | |
1748 | /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization |
1749 | if we don't have a hardware sqrt insn. */ |
1750 | dconst1_6 = dconst1_3; |
1751 | SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); |
1752 | |
1753 | if (flag_unsafe_math_optimizations |
1754 | && sqrtfn |
1755 | && cbrtfn |
1756 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
1757 | && speed_p |
1758 | && hw_sqrt_exists |
1759 | && real_equal (&c, &dconst1_6)) |
1760 | { |
1761 | /* sqrt(x) */ |
1762 | sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); |
1763 | |
1764 | /* cbrt(sqrt(x)) */ |
1765 | return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); |
1766 | } |
1767 | |
1768 | |
1769 | /* Attempt to expand the POW as a product of square root chains. |
1770 | Expand the 0.25 case even when otpimising for size. */ |
1771 | if (flag_unsafe_math_optimizations |
1772 | && sqrtfn |
1773 | && hw_sqrt_exists |
1774 | && (speed_p || real_equal (&c, &dconst1_4)) |
1775 | && !HONOR_SIGNED_ZEROS (mode)) |
1776 | { |
1777 | unsigned int max_depth = speed_p |
1778 | ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH) |
1779 | : 2; |
1780 | |
1781 | tree expand_with_sqrts |
1782 | = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); |
1783 | |
1784 | if (expand_with_sqrts) |
1785 | return expand_with_sqrts; |
1786 | } |
1787 | |
1788 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); |
1789 | n = real_to_integer (&c2); |
1790 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
1791 | c2_is_int = real_identical (&c2, &cint); |
1792 | |
1793 | /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into |
1794 | |
1795 | powi(x, n/3) * powi(cbrt(x), n%3), n > 0; |
1796 | 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. |
1797 | |
1798 | Do not calculate the first factor when n/3 = 0. As cbrt(x) is |
1799 | different from pow(x, 1./3.) due to rounding and behavior with |
1800 | negative x, we need to constrain this transformation to unsafe |
1801 | math and positive x or finite math. */ |
1802 | real_from_integer (&dconst3, VOIDmode, 3, SIGNED); |
1803 | real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); |
1804 | real_round (&c2, mode, &c2); |
1805 | n = real_to_integer (&c2); |
1806 | real_from_integer (&cint, VOIDmode, n, SIGNED); |
1807 | real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); |
1808 | real_convert (&c2, mode, &c2); |
1809 | |
1810 | if (flag_unsafe_math_optimizations |
1811 | && cbrtfn |
1812 | && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) |
1813 | && real_identical (&c2, &c) |
1814 | && !c2_is_int |
1815 | && optimize_function_for_speed_p (cfun) |
1816 | && powi_cost (n / 3) <= POWI_MAX_MULTS) |
1817 | { |
1818 | tree powi_x_ndiv3 = NULL_TREE; |
1819 | |
1820 | /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not |
1821 | possible or profitable, give up. Skip the degenerate case when |
1822 | abs(n) < 3, where the result is always 1. */ |
1823 | if (absu_hwi (n) >= 3) |
1824 | { |
1825 | powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, |
1826 | abs_hwi (n / 3)); |
1827 | if (!powi_x_ndiv3) |
1828 | return NULL_TREE; |
1829 | } |
1830 | |
1831 | /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi |
1832 | as that creates an unnecessary variable. Instead, just produce |
1833 | either cbrt(x) or cbrt(x) * cbrt(x). */ |
1834 | cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); |
1835 | |
1836 | if (absu_hwi (n) % 3 == 1) |
1837 | powi_cbrt_x = cbrt_x; |
1838 | else |
1839 | powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot" , MULT_EXPR, |
1840 | cbrt_x, cbrt_x); |
1841 | |
1842 | /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ |
1843 | if (absu_hwi (n) < 3) |
1844 | result = powi_cbrt_x; |
1845 | else |
1846 | result = build_and_insert_binop (gsi, loc, "powroot" , MULT_EXPR, |
1847 | powi_x_ndiv3, powi_cbrt_x); |
1848 | |
1849 | /* If n is negative, reciprocate the result. */ |
1850 | if (n < 0) |
1851 | result = build_and_insert_binop (gsi, loc, "powroot" , RDIV_EXPR, |
1852 | build_real (type, dconst1), result); |
1853 | |
1854 | return result; |
1855 | } |
1856 | |
1857 | /* No optimizations succeeded. */ |
1858 | return NULL_TREE; |
1859 | } |
1860 | |
1861 | /* ARG is the argument to a cabs builtin call in GSI with location info |
1862 | LOC. Create a sequence of statements prior to GSI that calculates |
1863 | sqrt(R*R + I*I), where R and I are the real and imaginary components |
1864 | of ARG, respectively. Return an expression holding the result. */ |
1865 | |
1866 | static tree |
1867 | gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) |
1868 | { |
1869 | tree real_part, imag_part, addend1, addend2, sum, result; |
1870 | tree type = TREE_TYPE (TREE_TYPE (arg)); |
1871 | tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); |
1872 | machine_mode mode = TYPE_MODE (type); |
1873 | |
1874 | if (!flag_unsafe_math_optimizations |
1875 | || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) |
1876 | || !sqrtfn |
1877 | || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) |
1878 | return NULL_TREE; |
1879 | |
1880 | real_part = build_and_insert_ref (gsi, loc, type, "cabs" , |
1881 | REALPART_EXPR, arg); |
1882 | addend1 = build_and_insert_binop (gsi, loc, "cabs" , MULT_EXPR, |
1883 | real_part, real_part); |
1884 | imag_part = build_and_insert_ref (gsi, loc, type, "cabs" , |
1885 | IMAGPART_EXPR, arg); |
1886 | addend2 = build_and_insert_binop (gsi, loc, "cabs" , MULT_EXPR, |
1887 | imag_part, imag_part); |
1888 | sum = build_and_insert_binop (gsi, loc, "cabs" , PLUS_EXPR, addend1, addend2); |
1889 | result = build_and_insert_call (gsi, loc, sqrtfn, sum); |
1890 | |
1891 | return result; |
1892 | } |
1893 | |
1894 | /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 |
1895 | on the SSA_NAME argument of each of them. Also expand powi(x,n) into |
1896 | an optimal number of multiplies, when n is a constant. */ |
1897 | |
1898 | namespace { |
1899 | |
1900 | const pass_data pass_data_cse_sincos = |
1901 | { |
1902 | GIMPLE_PASS, /* type */ |
1903 | "sincos" , /* name */ |
1904 | OPTGROUP_NONE, /* optinfo_flags */ |
1905 | TV_TREE_SINCOS, /* tv_id */ |
1906 | PROP_ssa, /* properties_required */ |
1907 | PROP_gimple_opt_math, /* properties_provided */ |
1908 | 0, /* properties_destroyed */ |
1909 | 0, /* todo_flags_start */ |
1910 | TODO_update_ssa, /* todo_flags_finish */ |
1911 | }; |
1912 | |
1913 | class pass_cse_sincos : public gimple_opt_pass |
1914 | { |
1915 | public: |
1916 | pass_cse_sincos (gcc::context *ctxt) |
1917 | : gimple_opt_pass (pass_data_cse_sincos, ctxt) |
1918 | {} |
1919 | |
1920 | /* opt_pass methods: */ |
1921 | virtual bool gate (function *) |
1922 | { |
1923 | /* We no longer require either sincos or cexp, since powi expansion |
1924 | piggybacks on this pass. */ |
1925 | return optimize; |
1926 | } |
1927 | |
1928 | virtual unsigned int execute (function *); |
1929 | |
1930 | }; // class pass_cse_sincos |
1931 | |
1932 | unsigned int |
1933 | pass_cse_sincos::execute (function *fun) |
1934 | { |
1935 | basic_block bb; |
1936 | bool cfg_changed = false; |
1937 | |
1938 | calculate_dominance_info (CDI_DOMINATORS); |
1939 | memset (&sincos_stats, 0, sizeof (sincos_stats)); |
1940 | |
1941 | FOR_EACH_BB_FN (bb, fun) |
1942 | { |
1943 | gimple_stmt_iterator gsi; |
1944 | bool cleanup_eh = false; |
1945 | |
1946 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
1947 | { |
1948 | gimple *stmt = gsi_stmt (gsi); |
1949 | |
1950 | /* Only the last stmt in a bb could throw, no need to call |
1951 | gimple_purge_dead_eh_edges if we change something in the middle |
1952 | of a basic block. */ |
1953 | cleanup_eh = false; |
1954 | |
1955 | if (is_gimple_call (stmt) |
1956 | && gimple_call_lhs (stmt)) |
1957 | { |
1958 | tree arg, arg0, arg1, result; |
1959 | HOST_WIDE_INT n; |
1960 | location_t loc; |
1961 | |
1962 | switch (gimple_call_combined_fn (stmt)) |
1963 | { |
1964 | CASE_CFN_COS: |
1965 | CASE_CFN_SIN: |
1966 | CASE_CFN_CEXPI: |
1967 | /* Make sure we have either sincos or cexp. */ |
1968 | if (!targetm.libc_has_function (function_c99_math_complex) |
1969 | && !targetm.libc_has_function (function_sincos)) |
1970 | break; |
1971 | |
1972 | arg = gimple_call_arg (stmt, 0); |
1973 | if (TREE_CODE (arg) == SSA_NAME) |
1974 | cfg_changed |= execute_cse_sincos_1 (arg); |
1975 | break; |
1976 | |
1977 | CASE_CFN_POW: |
1978 | arg0 = gimple_call_arg (stmt, 0); |
1979 | arg1 = gimple_call_arg (stmt, 1); |
1980 | |
1981 | loc = gimple_location (stmt); |
1982 | result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); |
1983 | |
1984 | if (result) |
1985 | { |
1986 | tree lhs = gimple_get_lhs (stmt); |
1987 | gassign *new_stmt = gimple_build_assign (lhs, result); |
1988 | gimple_set_location (new_stmt, loc); |
1989 | unlink_stmt_vdef (stmt); |
1990 | gsi_replace (&gsi, new_stmt, true); |
1991 | cleanup_eh = true; |
1992 | if (gimple_vdef (stmt)) |
1993 | release_ssa_name (gimple_vdef (stmt)); |
1994 | } |
1995 | break; |
1996 | |
1997 | CASE_CFN_POWI: |
1998 | arg0 = gimple_call_arg (stmt, 0); |
1999 | arg1 = gimple_call_arg (stmt, 1); |
2000 | loc = gimple_location (stmt); |
2001 | |
2002 | if (real_minus_onep (arg0)) |
2003 | { |
2004 | tree t0, t1, cond, one, minus_one; |
2005 | gassign *stmt; |
2006 | |
2007 | t0 = TREE_TYPE (arg0); |
2008 | t1 = TREE_TYPE (arg1); |
2009 | one = build_real (t0, dconst1); |
2010 | minus_one = build_real (t0, dconstm1); |
2011 | |
2012 | cond = make_temp_ssa_name (t1, NULL, "powi_cond" ); |
2013 | stmt = gimple_build_assign (cond, BIT_AND_EXPR, |
2014 | arg1, build_int_cst (t1, 1)); |
2015 | gimple_set_location (stmt, loc); |
2016 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
2017 | |
2018 | result = make_temp_ssa_name (t0, NULL, "powi" ); |
2019 | stmt = gimple_build_assign (result, COND_EXPR, cond, |
2020 | minus_one, one); |
2021 | gimple_set_location (stmt, loc); |
2022 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
2023 | } |
2024 | else |
2025 | { |
2026 | if (!tree_fits_shwi_p (arg1)) |
2027 | break; |
2028 | |
2029 | n = tree_to_shwi (arg1); |
2030 | result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); |
2031 | } |
2032 | |
2033 | if (result) |
2034 | { |
2035 | tree lhs = gimple_get_lhs (stmt); |
2036 | gassign *new_stmt = gimple_build_assign (lhs, result); |
2037 | gimple_set_location (new_stmt, loc); |
2038 | unlink_stmt_vdef (stmt); |
2039 | gsi_replace (&gsi, new_stmt, true); |
2040 | cleanup_eh = true; |
2041 | if (gimple_vdef (stmt)) |
2042 | release_ssa_name (gimple_vdef (stmt)); |
2043 | } |
2044 | break; |
2045 | |
2046 | CASE_CFN_CABS: |
2047 | arg0 = gimple_call_arg (stmt, 0); |
2048 | loc = gimple_location (stmt); |
2049 | result = gimple_expand_builtin_cabs (&gsi, loc, arg0); |
2050 | |
2051 | if (result) |
2052 | { |
2053 | tree lhs = gimple_get_lhs (stmt); |
2054 | gassign *new_stmt = gimple_build_assign (lhs, result); |
2055 | gimple_set_location (new_stmt, loc); |
2056 | unlink_stmt_vdef (stmt); |
2057 | gsi_replace (&gsi, new_stmt, true); |
2058 | cleanup_eh = true; |
2059 | if (gimple_vdef (stmt)) |
2060 | release_ssa_name (gimple_vdef (stmt)); |
2061 | } |
2062 | break; |
2063 | |
2064 | default:; |
2065 | } |
2066 | } |
2067 | } |
2068 | if (cleanup_eh) |
2069 | cfg_changed |= gimple_purge_dead_eh_edges (bb); |
2070 | } |
2071 | |
2072 | statistics_counter_event (fun, "sincos statements inserted" , |
2073 | sincos_stats.inserted); |
2074 | |
2075 | return cfg_changed ? TODO_cleanup_cfg : 0; |
2076 | } |
2077 | |
2078 | } // anon namespace |
2079 | |
2080 | gimple_opt_pass * |
2081 | make_pass_cse_sincos (gcc::context *ctxt) |
2082 | { |
2083 | return new pass_cse_sincos (ctxt); |
2084 | } |
2085 | |
2086 | /* Return true if stmt is a type conversion operation that can be stripped |
2087 | when used in a widening multiply operation. */ |
2088 | static bool |
2089 | widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) |
2090 | { |
2091 | enum tree_code rhs_code = gimple_assign_rhs_code (stmt); |
2092 | |
2093 | if (TREE_CODE (result_type) == INTEGER_TYPE) |
2094 | { |
2095 | tree op_type; |
2096 | tree inner_op_type; |
2097 | |
2098 | if (!CONVERT_EXPR_CODE_P (rhs_code)) |
2099 | return false; |
2100 | |
2101 | op_type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2102 | |
2103 | /* If the type of OP has the same precision as the result, then |
2104 | we can strip this conversion. The multiply operation will be |
2105 | selected to create the correct extension as a by-product. */ |
2106 | if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) |
2107 | return true; |
2108 | |
2109 | /* We can also strip a conversion if it preserves the signed-ness of |
2110 | the operation and doesn't narrow the range. */ |
2111 | inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); |
2112 | |
2113 | /* If the inner-most type is unsigned, then we can strip any |
2114 | intermediate widening operation. If it's signed, then the |
2115 | intermediate widening operation must also be signed. */ |
2116 | if ((TYPE_UNSIGNED (inner_op_type) |
2117 | || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) |
2118 | && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) |
2119 | return true; |
2120 | |
2121 | return false; |
2122 | } |
2123 | |
2124 | return rhs_code == FIXED_CONVERT_EXPR; |
2125 | } |
2126 | |
2127 | /* Return true if RHS is a suitable operand for a widening multiplication, |
2128 | assuming a target type of TYPE. |
2129 | There are two cases: |
2130 | |
2131 | - RHS makes some value at least twice as wide. Store that value |
2132 | in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. |
2133 | |
2134 | - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, |
2135 | but leave *TYPE_OUT untouched. */ |
2136 | |
2137 | static bool |
2138 | is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, |
2139 | tree *new_rhs_out) |
2140 | { |
2141 | gimple *stmt; |
2142 | tree type1, rhs1; |
2143 | |
2144 | if (TREE_CODE (rhs) == SSA_NAME) |
2145 | { |
2146 | stmt = SSA_NAME_DEF_STMT (rhs); |
2147 | if (is_gimple_assign (stmt)) |
2148 | { |
2149 | if (! widening_mult_conversion_strippable_p (type, stmt)) |
2150 | rhs1 = rhs; |
2151 | else |
2152 | { |
2153 | rhs1 = gimple_assign_rhs1 (stmt); |
2154 | |
2155 | if (TREE_CODE (rhs1) == INTEGER_CST) |
2156 | { |
2157 | *new_rhs_out = rhs1; |
2158 | *type_out = NULL; |
2159 | return true; |
2160 | } |
2161 | } |
2162 | } |
2163 | else |
2164 | rhs1 = rhs; |
2165 | |
2166 | type1 = TREE_TYPE (rhs1); |
2167 | |
2168 | if (TREE_CODE (type1) != TREE_CODE (type) |
2169 | || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) |
2170 | return false; |
2171 | |
2172 | *new_rhs_out = rhs1; |
2173 | *type_out = type1; |
2174 | return true; |
2175 | } |
2176 | |
2177 | if (TREE_CODE (rhs) == INTEGER_CST) |
2178 | { |
2179 | *new_rhs_out = rhs; |
2180 | *type_out = NULL; |
2181 | return true; |
2182 | } |
2183 | |
2184 | return false; |
2185 | } |
2186 | |
2187 | /* Return true if STMT performs a widening multiplication, assuming the |
2188 | output type is TYPE. If so, store the unwidened types of the operands |
2189 | in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and |
2190 | *RHS2_OUT such that converting those operands to types *TYPE1_OUT |
2191 | and *TYPE2_OUT would give the operands of the multiplication. */ |
2192 | |
2193 | static bool |
2194 | is_widening_mult_p (gimple *stmt, |
2195 | tree *type1_out, tree *rhs1_out, |
2196 | tree *type2_out, tree *rhs2_out) |
2197 | { |
2198 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
2199 | |
2200 | if (TREE_CODE (type) != INTEGER_TYPE |
2201 | && TREE_CODE (type) != FIXED_POINT_TYPE) |
2202 | return false; |
2203 | |
2204 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, |
2205 | rhs1_out)) |
2206 | return false; |
2207 | |
2208 | if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, |
2209 | rhs2_out)) |
2210 | return false; |
2211 | |
2212 | if (*type1_out == NULL) |
2213 | { |
2214 | if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) |
2215 | return false; |
2216 | *type1_out = *type2_out; |
2217 | } |
2218 | |
2219 | if (*type2_out == NULL) |
2220 | { |
2221 | if (!int_fits_type_p (*rhs2_out, *type1_out)) |
2222 | return false; |
2223 | *type2_out = *type1_out; |
2224 | } |
2225 | |
2226 | /* Ensure that the larger of the two operands comes first. */ |
2227 | if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) |
2228 | { |
2229 | std::swap (*type1_out, *type2_out); |
2230 | std::swap (*rhs1_out, *rhs2_out); |
2231 | } |
2232 | |
2233 | return true; |
2234 | } |
2235 | |
2236 | /* Check to see if the CALL statement is an invocation of copysign |
2237 | with 1. being the first argument. */ |
2238 | static bool |
2239 | is_copysign_call_with_1 (gimple *call) |
2240 | { |
2241 | gcall *c = dyn_cast <gcall *> (call); |
2242 | if (! c) |
2243 | return false; |
2244 | |
2245 | enum combined_fn code = gimple_call_combined_fn (c); |
2246 | |
2247 | if (code == CFN_LAST) |
2248 | return false; |
2249 | |
2250 | if (builtin_fn_p (code)) |
2251 | { |
2252 | switch (as_builtin_fn (code)) |
2253 | { |
2254 | CASE_FLT_FN (BUILT_IN_COPYSIGN): |
2255 | CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): |
2256 | return real_onep (gimple_call_arg (c, 0)); |
2257 | default: |
2258 | return false; |
2259 | } |
2260 | } |
2261 | |
2262 | if (internal_fn_p (code)) |
2263 | { |
2264 | switch (as_internal_fn (code)) |
2265 | { |
2266 | case IFN_COPYSIGN: |
2267 | return real_onep (gimple_call_arg (c, 0)); |
2268 | default: |
2269 | return false; |
2270 | } |
2271 | } |
2272 | |
2273 | return false; |
2274 | } |
2275 | |
2276 | /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). |
2277 | This only happens when the the xorsign optab is defined, if the |
2278 | pattern is not a xorsign pattern or if expansion fails FALSE is |
2279 | returned, otherwise TRUE is returned. */ |
2280 | static bool |
2281 | convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) |
2282 | { |
2283 | tree treeop0, treeop1, lhs, type; |
2284 | location_t loc = gimple_location (stmt); |
2285 | lhs = gimple_assign_lhs (stmt); |
2286 | treeop0 = gimple_assign_rhs1 (stmt); |
2287 | treeop1 = gimple_assign_rhs2 (stmt); |
2288 | type = TREE_TYPE (lhs); |
2289 | machine_mode mode = TYPE_MODE (type); |
2290 | |
2291 | if (HONOR_SNANS (type)) |
2292 | return false; |
2293 | |
2294 | if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) |
2295 | { |
2296 | gimple *call0 = SSA_NAME_DEF_STMT (treeop0); |
2297 | if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0)) |
2298 | { |
2299 | call0 = SSA_NAME_DEF_STMT (treeop1); |
2300 | if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0)) |
2301 | return false; |
2302 | |
2303 | treeop1 = treeop0; |
2304 | } |
2305 | if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) |
2306 | return false; |
2307 | |
2308 | gcall *c = as_a<gcall*> (call0); |
2309 | treeop0 = gimple_call_arg (c, 1); |
2310 | |
2311 | gcall *call_stmt |
2312 | = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); |
2313 | gimple_set_lhs (call_stmt, lhs); |
2314 | gimple_set_location (call_stmt, loc); |
2315 | gsi_replace (gsi, call_stmt, true); |
2316 | return true; |
2317 | } |
2318 | |
2319 | return false; |
2320 | } |
2321 | |
2322 | /* Process a single gimple statement STMT, which has a MULT_EXPR as |
2323 | its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return |
2324 | value is true iff we converted the statement. */ |
2325 | |
2326 | static bool |
2327 | convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) |
2328 | { |
2329 | tree lhs, rhs1, rhs2, type, type1, type2; |
2330 | enum insn_code handler; |
2331 | scalar_int_mode to_mode, from_mode, actual_mode; |
2332 | optab op; |
2333 | int actual_precision; |
2334 | location_t loc = gimple_location (stmt); |
2335 | bool from_unsigned1, from_unsigned2; |
2336 | |
2337 | lhs = gimple_assign_lhs (stmt); |
2338 | type = TREE_TYPE (lhs); |
2339 | if (TREE_CODE (type) != INTEGER_TYPE) |
2340 | return false; |
2341 | |
2342 | if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) |
2343 | return false; |
2344 | |
2345 | to_mode = SCALAR_INT_TYPE_MODE (type); |
2346 | from_mode = SCALAR_INT_TYPE_MODE (type1); |
2347 | if (to_mode == from_mode) |
2348 | return false; |
2349 | |
2350 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2351 | from_unsigned2 = TYPE_UNSIGNED (type2); |
2352 | |
2353 | if (from_unsigned1 && from_unsigned2) |
2354 | op = umul_widen_optab; |
2355 | else if (!from_unsigned1 && !from_unsigned2) |
2356 | op = smul_widen_optab; |
2357 | else |
2358 | op = usmul_widen_optab; |
2359 | |
2360 | handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, |
2361 | &actual_mode); |
2362 | |
2363 | if (handler == CODE_FOR_nothing) |
2364 | { |
2365 | if (op != smul_widen_optab) |
2366 | { |
2367 | /* We can use a signed multiply with unsigned types as long as |
2368 | there is a wider mode to use, or it is the smaller of the two |
2369 | types that is unsigned. Note that type1 >= type2, always. */ |
2370 | if ((TYPE_UNSIGNED (type1) |
2371 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) |
2372 | || (TYPE_UNSIGNED (type2) |
2373 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) |
2374 | { |
2375 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
2376 | || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) |
2377 | return false; |
2378 | } |
2379 | |
2380 | op = smul_widen_optab; |
2381 | handler = find_widening_optab_handler_and_mode (op, to_mode, |
2382 | from_mode, |
2383 | &actual_mode); |
2384 | |
2385 | if (handler == CODE_FOR_nothing) |
2386 | return false; |
2387 | |
2388 | from_unsigned1 = from_unsigned2 = false; |
2389 | } |
2390 | else |
2391 | return false; |
2392 | } |
2393 | |
2394 | /* Ensure that the inputs to the handler are in the correct precison |
2395 | for the opcode. This will be the full mode size. */ |
2396 | actual_precision = GET_MODE_PRECISION (actual_mode); |
2397 | if (2 * actual_precision > TYPE_PRECISION (type)) |
2398 | return false; |
2399 | if (actual_precision != TYPE_PRECISION (type1) |
2400 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
2401 | rhs1 = build_and_insert_cast (gsi, loc, |
2402 | build_nonstandard_integer_type |
2403 | (actual_precision, from_unsigned1), rhs1); |
2404 | if (actual_precision != TYPE_PRECISION (type2) |
2405 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
2406 | rhs2 = build_and_insert_cast (gsi, loc, |
2407 | build_nonstandard_integer_type |
2408 | (actual_precision, from_unsigned2), rhs2); |
2409 | |
2410 | /* Handle constants. */ |
2411 | if (TREE_CODE (rhs1) == INTEGER_CST) |
2412 | rhs1 = fold_convert (type1, rhs1); |
2413 | if (TREE_CODE (rhs2) == INTEGER_CST) |
2414 | rhs2 = fold_convert (type2, rhs2); |
2415 | |
2416 | gimple_assign_set_rhs1 (stmt, rhs1); |
2417 | gimple_assign_set_rhs2 (stmt, rhs2); |
2418 | gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); |
2419 | update_stmt (stmt); |
2420 | widen_mul_stats.widen_mults_inserted++; |
2421 | return true; |
2422 | } |
2423 | |
2424 | /* Process a single gimple statement STMT, which is found at the |
2425 | iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its |
2426 | rhs (given by CODE), and try to convert it into a |
2427 | WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value |
2428 | is true iff we converted the statement. */ |
2429 | |
2430 | static bool |
2431 | convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, |
2432 | enum tree_code code) |
2433 | { |
2434 | gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; |
2435 | gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; |
2436 | tree type, type1, type2, optype; |
2437 | tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; |
2438 | enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; |
2439 | optab this_optab; |
2440 | enum tree_code wmult_code; |
2441 | enum insn_code handler; |
2442 | scalar_mode to_mode, from_mode, actual_mode; |
2443 | location_t loc = gimple_location (stmt); |
2444 | int actual_precision; |
2445 | bool from_unsigned1, from_unsigned2; |
2446 | |
2447 | lhs = gimple_assign_lhs (stmt); |
2448 | type = TREE_TYPE (lhs); |
2449 | if (TREE_CODE (type) != INTEGER_TYPE |
2450 | && TREE_CODE (type) != FIXED_POINT_TYPE) |
2451 | return false; |
2452 | |
2453 | if (code == MINUS_EXPR) |
2454 | wmult_code = WIDEN_MULT_MINUS_EXPR; |
2455 | else |
2456 | wmult_code = WIDEN_MULT_PLUS_EXPR; |
2457 | |
2458 | rhs1 = gimple_assign_rhs1 (stmt); |
2459 | rhs2 = gimple_assign_rhs2 (stmt); |
2460 | |
2461 | if (TREE_CODE (rhs1) == SSA_NAME) |
2462 | { |
2463 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
2464 | if (is_gimple_assign (rhs1_stmt)) |
2465 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); |
2466 | } |
2467 | |
2468 | if (TREE_CODE (rhs2) == SSA_NAME) |
2469 | { |
2470 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
2471 | if (is_gimple_assign (rhs2_stmt)) |
2472 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); |
2473 | } |
2474 | |
2475 | /* Allow for one conversion statement between the multiply |
2476 | and addition/subtraction statement. If there are more than |
2477 | one conversions then we assume they would invalidate this |
2478 | transformation. If that's not the case then they should have |
2479 | been folded before now. */ |
2480 | if (CONVERT_EXPR_CODE_P (rhs1_code)) |
2481 | { |
2482 | conv1_stmt = rhs1_stmt; |
2483 | rhs1 = gimple_assign_rhs1 (rhs1_stmt); |
2484 | if (TREE_CODE (rhs1) == SSA_NAME) |
2485 | { |
2486 | rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); |
2487 | if (is_gimple_assign (rhs1_stmt)) |
2488 | rhs1_code = gimple_assign_rhs_code (rhs1_stmt); |
2489 | } |
2490 | else |
2491 | return false; |
2492 | } |
2493 | if (CONVERT_EXPR_CODE_P (rhs2_code)) |
2494 | { |
2495 | conv2_stmt = rhs2_stmt; |
2496 | rhs2 = gimple_assign_rhs1 (rhs2_stmt); |
2497 | if (TREE_CODE (rhs2) == SSA_NAME) |
2498 | { |
2499 | rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); |
2500 | if (is_gimple_assign (rhs2_stmt)) |
2501 | rhs2_code = gimple_assign_rhs_code (rhs2_stmt); |
2502 | } |
2503 | else |
2504 | return false; |
2505 | } |
2506 | |
2507 | /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call |
2508 | is_widening_mult_p, but we still need the rhs returns. |
2509 | |
2510 | It might also appear that it would be sufficient to use the existing |
2511 | operands of the widening multiply, but that would limit the choice of |
2512 | multiply-and-accumulate instructions. |
2513 | |
2514 | If the widened-multiplication result has more than one uses, it is |
2515 | probably wiser not to do the conversion. */ |
2516 | if (code == PLUS_EXPR |
2517 | && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) |
2518 | { |
2519 | if (!has_single_use (rhs1) |
2520 | || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, |
2521 | &type2, &mult_rhs2)) |
2522 | return false; |
2523 | add_rhs = rhs2; |
2524 | conv_stmt = conv1_stmt; |
2525 | } |
2526 | else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) |
2527 | { |
2528 | if (!has_single_use (rhs2) |
2529 | || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, |
2530 | &type2, &mult_rhs2)) |
2531 | return false; |
2532 | add_rhs = rhs1; |
2533 | conv_stmt = conv2_stmt; |
2534 | } |
2535 | else |
2536 | return false; |
2537 | |
2538 | to_mode = SCALAR_TYPE_MODE (type); |
2539 | from_mode = SCALAR_TYPE_MODE (type1); |
2540 | if (to_mode == from_mode) |
2541 | return false; |
2542 | |
2543 | from_unsigned1 = TYPE_UNSIGNED (type1); |
2544 | from_unsigned2 = TYPE_UNSIGNED (type2); |
2545 | optype = type1; |
2546 | |
2547 | /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ |
2548 | if (from_unsigned1 != from_unsigned2) |
2549 | { |
2550 | if (!INTEGRAL_TYPE_P (type)) |
2551 | return false; |
2552 | /* We can use a signed multiply with unsigned types as long as |
2553 | there is a wider mode to use, or it is the smaller of the two |
2554 | types that is unsigned. Note that type1 >= type2, always. */ |
2555 | if ((from_unsigned1 |
2556 | && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) |
2557 | || (from_unsigned2 |
2558 | && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) |
2559 | { |
2560 | if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) |
2561 | || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) |
2562 | return false; |
2563 | } |
2564 | |
2565 | from_unsigned1 = from_unsigned2 = false; |
2566 | optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), |
2567 | false); |
2568 | } |
2569 | |
2570 | /* If there was a conversion between the multiply and addition |
2571 | then we need to make sure it fits a multiply-and-accumulate. |
2572 | The should be a single mode change which does not change the |
2573 | value. */ |
2574 | if (conv_stmt) |
2575 | { |
2576 | /* We use the original, unmodified data types for this. */ |
2577 | tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); |
2578 | tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); |
2579 | int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); |
2580 | bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); |
2581 | |
2582 | if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) |
2583 | { |
2584 | /* Conversion is a truncate. */ |
2585 | if (TYPE_PRECISION (to_type) < data_size) |
2586 | return false; |
2587 | } |
2588 | else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) |
2589 | { |
2590 | /* Conversion is an extend. Check it's the right sort. */ |
2591 | if (TYPE_UNSIGNED (from_type) != is_unsigned |
2592 | && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) |
2593 | return false; |
2594 | } |
2595 | /* else convert is a no-op for our purposes. */ |
2596 | } |
2597 | |
2598 | /* Verify that the machine can perform a widening multiply |
2599 | accumulate in this mode/signedness combination, otherwise |
2600 | this transformation is likely to pessimize code. */ |
2601 | this_optab = optab_for_tree_code (wmult_code, optype, optab_default); |
2602 | handler = find_widening_optab_handler_and_mode (this_optab, to_mode, |
2603 | from_mode, &actual_mode); |
2604 | |
2605 | if (handler == CODE_FOR_nothing) |
2606 | return false; |
2607 | |
2608 | /* Ensure that the inputs to the handler are in the correct precison |
2609 | for the opcode. This will be the full mode size. */ |
2610 | actual_precision = GET_MODE_PRECISION (actual_mode); |
2611 | if (actual_precision != TYPE_PRECISION (type1) |
2612 | || from_unsigned1 != TYPE_UNSIGNED (type1)) |
2613 | mult_rhs1 = build_and_insert_cast (gsi, loc, |
2614 | build_nonstandard_integer_type |
2615 | (actual_precision, from_unsigned1), |
2616 | mult_rhs1); |
2617 | if (actual_precision != TYPE_PRECISION (type2) |
2618 | || from_unsigned2 != TYPE_UNSIGNED (type2)) |
2619 | mult_rhs2 = build_and_insert_cast (gsi, loc, |
2620 | build_nonstandard_integer_type |
2621 | (actual_precision, from_unsigned2), |
2622 | mult_rhs2); |
2623 | |
2624 | if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) |
2625 | add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); |
2626 | |
2627 | /* Handle constants. */ |
2628 | if (TREE_CODE (mult_rhs1) == INTEGER_CST) |
2629 | mult_rhs1 = fold_convert (type1, mult_rhs1); |
2630 | if (TREE_CODE (mult_rhs2) == INTEGER_CST) |
2631 | mult_rhs2 = fold_convert (type2, mult_rhs2); |
2632 | |
2633 | gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, |
2634 | add_rhs); |
2635 | update_stmt (gsi_stmt (*gsi)); |
2636 | widen_mul_stats.maccs_inserted++; |
2637 | return true; |
2638 | } |
2639 | |
2640 | /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 |
2641 | with uses in additions and subtractions to form fused multiply-add |
2642 | operations. Returns true if successful and MUL_STMT should be removed. */ |
2643 | |
2644 | static bool |
2645 | convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2) |
2646 | { |
2647 | tree mul_result = gimple_get_lhs (mul_stmt); |
2648 | tree type = TREE_TYPE (mul_result); |
2649 | gimple *use_stmt, *neguse_stmt; |
2650 | gassign *fma_stmt; |
2651 | use_operand_p use_p; |
2652 | imm_use_iterator imm_iter; |
2653 | |
2654 | if (FLOAT_TYPE_P (type) |
2655 | && flag_fp_contract_mode == FP_CONTRACT_OFF) |
2656 | return false; |
2657 | |
2658 | /* We don't want to do bitfield reduction ops. */ |
2659 | if (INTEGRAL_TYPE_P (type) |
2660 | && !type_has_mode_precision_p (type)) |
2661 | return false; |
2662 | |
2663 | /* If the target doesn't support it, don't generate it. We assume that |
2664 | if fma isn't available then fms, fnma or fnms are not either. */ |
2665 | if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) |
2666 | return false; |
2667 | |
2668 | /* If the multiplication has zero uses, it is kept around probably because |
2669 | of -fnon-call-exceptions. Don't optimize it away in that case, |
2670 | it is DCE job. */ |
2671 | if (has_zero_uses (mul_result)) |
2672 | return false; |
2673 | |
2674 | /* Make sure that the multiplication statement becomes dead after |
2675 | the transformation, thus that all uses are transformed to FMAs. |
2676 | This means we assume that an FMA operation has the same cost |
2677 | as an addition. */ |
2678 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) |
2679 | { |
2680 | enum tree_code use_code; |
2681 | tree result = mul_result; |
2682 | bool negate_p = false; |
2683 | |
2684 | use_stmt = USE_STMT (use_p); |
2685 | |
2686 | if (is_gimple_debug (use_stmt)) |
2687 | continue; |
2688 | |
2689 | /* For now restrict this operations to single basic blocks. In theory |
2690 | we would want to support sinking the multiplication in |
2691 | m = a*b; |
2692 | if () |
2693 | ma = m + c; |
2694 | else |
2695 | d = m; |
2696 | to form a fma in the then block and sink the multiplication to the |
2697 | else block. */ |
2698 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |
2699 | return false; |
2700 | |
2701 | if (!is_gimple_assign (use_stmt)) |
2702 | return false; |
2703 | |
2704 | use_code = gimple_assign_rhs_code (use_stmt); |
2705 | |
2706 | /* A negate on the multiplication leads to FNMA. */ |
2707 | if (use_code == NEGATE_EXPR) |
2708 | { |
2709 | ssa_op_iter iter; |
2710 | use_operand_p usep; |
2711 | |
2712 | result = gimple_assign_lhs (use_stmt); |
2713 | |
2714 | /* Make sure the negate statement becomes dead with this |
2715 | single transformation. */ |
2716 | if (!single_imm_use (gimple_assign_lhs (use_stmt), |
2717 | &use_p, &neguse_stmt)) |
2718 | return false; |
2719 | |
2720 | /* Make sure the multiplication isn't also used on that stmt. */ |
2721 | FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) |
2722 | if (USE_FROM_PTR (usep) == mul_result) |
2723 | return false; |
2724 | |
2725 | /* Re-validate. */ |
2726 | use_stmt = neguse_stmt; |
2727 | if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) |
2728 | return false; |
2729 | if (!is_gimple_assign (use_stmt)) |
2730 | return false; |
2731 | |
2732 | use_code = gimple_assign_rhs_code (use_stmt); |
2733 | negate_p = true; |
2734 | } |
2735 | |
2736 | switch (use_code) |
2737 | { |
2738 | case MINUS_EXPR: |
2739 | if (gimple_assign_rhs2 (use_stmt) == result) |
2740 | negate_p = !negate_p; |
2741 | break; |
2742 | case PLUS_EXPR: |
2743 | break; |
2744 | default: |
2745 | /* FMA can only be formed from PLUS and MINUS. */ |
2746 | return false; |
2747 | } |
2748 | |
2749 | /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed |
2750 | by a MULT_EXPR that we'll visit later, we might be able to |
2751 | get a more profitable match with fnma. |
2752 | OTOH, if we don't, a negate / fma pair has likely lower latency |
2753 | that a mult / subtract pair. */ |
2754 | if (use_code == MINUS_EXPR && !negate_p |
2755 | && gimple_assign_rhs1 (use_stmt) == result |
2756 | && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing |
2757 | && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) |
2758 | { |
2759 | tree rhs2 = gimple_assign_rhs2 (use_stmt); |
2760 | |
2761 | if (TREE_CODE (rhs2) == SSA_NAME) |
2762 | { |
2763 | gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2); |
2764 | if (has_single_use (rhs2) |
2765 | && is_gimple_assign (stmt2) |
2766 | && gimple_assign_rhs_code (stmt2) == MULT_EXPR) |
2767 | return false; |
2768 | } |
2769 | } |
2770 | |
2771 | /* We can't handle a * b + a * b. */ |
2772 | if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) |
2773 | return false; |
2774 | |
2775 | /* While it is possible to validate whether or not the exact form |
2776 | that we've recognized is available in the backend, the assumption |
2777 | is that the transformation is never a loss. For instance, suppose |
2778 | the target only has the plain FMA pattern available. Consider |
2779 | a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which |
2780 | is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we |
2781 | still have 3 operations, but in the FMA form the two NEGs are |
2782 | independent and could be run in parallel. */ |
2783 | } |
2784 | |
2785 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) |
2786 | { |
2787 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
2788 | enum tree_code use_code; |
2789 | tree addop, mulop1 = op1, result = mul_result; |
2790 | bool negate_p = false; |
2791 | |
2792 | if (is_gimple_debug (use_stmt)) |
2793 | continue; |
2794 | |
2795 | use_code = gimple_assign_rhs_code (use_stmt); |
2796 | if (use_code == NEGATE_EXPR) |
2797 | { |
2798 | result = gimple_assign_lhs (use_stmt); |
2799 | single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); |
2800 | gsi_remove (&gsi, true); |
2801 | release_defs (use_stmt); |
2802 | |
2803 | use_stmt = neguse_stmt; |
2804 | gsi = gsi_for_stmt (use_stmt); |
2805 | use_code = gimple_assign_rhs_code (use_stmt); |
2806 | negate_p = true; |
2807 | } |
2808 | |
2809 | if (gimple_assign_rhs1 (use_stmt) == result) |
2810 | { |
2811 | addop = gimple_assign_rhs2 (use_stmt); |
2812 | /* a * b - c -> a * b + (-c) */ |
2813 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) |
2814 | addop = force_gimple_operand_gsi (&gsi, |
2815 | build1 (NEGATE_EXPR, |
2816 | type, addop), |
2817 | true, NULL_TREE, true, |
2818 | GSI_SAME_STMT); |
2819 | } |
2820 | else |
2821 | { |
2822 | addop = gimple_assign_rhs1 (use_stmt); |
2823 | /* a - b * c -> (-b) * c + a */ |
2824 | if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) |
2825 | negate_p = !negate_p; |
2826 | } |
2827 | |
2828 | if (negate_p) |
2829 | mulop1 = force_gimple_operand_gsi (&gsi, |
2830 | build1 (NEGATE_EXPR, |
2831 | type, mulop1), |
2832 | true, NULL_TREE, true, |
2833 | GSI_SAME_STMT); |
2834 | |
2835 | fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt), |
2836 | FMA_EXPR, mulop1, op2, addop); |
2837 | gsi_replace (&gsi, fma_stmt, true); |
2838 | widen_mul_stats.fmas_inserted++; |
2839 | } |
2840 | |
2841 | return true; |
2842 | } |
2843 | |
2844 | |
2845 | /* Helper function of match_uaddsub_overflow. Return 1 |
2846 | if USE_STMT is unsigned overflow check ovf != 0 for |
2847 | STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 |
2848 | and 0 otherwise. */ |
2849 | |
2850 | static int |
2851 | uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt) |
2852 | { |
2853 | enum tree_code ccode = ERROR_MARK; |
2854 | tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; |
2855 | if (gimple_code (use_stmt) == GIMPLE_COND) |
2856 | { |
2857 | ccode = gimple_cond_code (use_stmt); |
2858 | crhs1 = gimple_cond_lhs (use_stmt); |
2859 | crhs2 = gimple_cond_rhs (use_stmt); |
2860 | } |
2861 | else if (is_gimple_assign (use_stmt)) |
2862 | { |
2863 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) |
2864 | { |
2865 | ccode = gimple_assign_rhs_code (use_stmt); |
2866 | crhs1 = gimple_assign_rhs1 (use_stmt); |
2867 | crhs2 = gimple_assign_rhs2 (use_stmt); |
2868 | } |
2869 | else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) |
2870 | { |
2871 | tree cond = gimple_assign_rhs1 (use_stmt); |
2872 | if (COMPARISON_CLASS_P (cond)) |
2873 | { |
2874 | ccode = TREE_CODE (cond); |
2875 | crhs1 = TREE_OPERAND (cond, 0); |
2876 | crhs2 = TREE_OPERAND (cond, 1); |
2877 | } |
2878 | else |
2879 | return 0; |
2880 | } |
2881 | else |
2882 | return 0; |
2883 | } |
2884 | else |
2885 | return 0; |
2886 | |
2887 | if (TREE_CODE_CLASS (ccode) != tcc_comparison) |
2888 | return 0; |
2889 | |
2890 | enum tree_code code = gimple_assign_rhs_code (stmt); |
2891 | tree lhs = gimple_assign_lhs (stmt); |
2892 | tree rhs1 = gimple_assign_rhs1 (stmt); |
2893 | tree rhs2 = gimple_assign_rhs2 (stmt); |
2894 | |
2895 | switch (ccode) |
2896 | { |
2897 | case GT_EXPR: |
2898 | case LE_EXPR: |
2899 | /* r = a - b; r > a or r <= a |
2900 | r = a + b; a > r or a <= r or b > r or b <= r. */ |
2901 | if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) |
2902 | || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) |
2903 | && crhs2 == lhs)) |
2904 | return ccode == GT_EXPR ? 1 : -1; |
2905 | break; |
2906 | case LT_EXPR: |
2907 | case GE_EXPR: |
2908 | /* r = a - b; a < r or a >= r |
2909 | r = a + b; r < a or r >= a or r < b or r >= b. */ |
2910 | if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) |
2911 | || (code == PLUS_EXPR && crhs1 == lhs |
2912 | && (crhs2 == rhs1 || crhs2 == rhs2))) |
2913 | return ccode == LT_EXPR ? 1 : -1; |
2914 | break; |
2915 | default: |
2916 | break; |
2917 | } |
2918 | return 0; |
2919 | } |
2920 | |
2921 | /* Recognize for unsigned x |
2922 | x = y - z; |
2923 | if (x > y) |
2924 | where there are other uses of x and replace it with |
2925 | _7 = SUB_OVERFLOW (y, z); |
2926 | x = REALPART_EXPR <_7>; |
2927 | _8 = IMAGPART_EXPR <_7>; |
2928 | if (_8) |
2929 | and similarly for addition. */ |
2930 | |
2931 | static bool |
2932 | match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, |
2933 | enum tree_code code) |
2934 | { |
2935 | tree lhs = gimple_assign_lhs (stmt); |
2936 | tree type = TREE_TYPE (lhs); |
2937 | use_operand_p use_p; |
2938 | imm_use_iterator iter; |
2939 | bool use_seen = false; |
2940 | bool ovf_use_seen = false; |
2941 | gimple *use_stmt; |
2942 | |
2943 | gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); |
2944 | if (!INTEGRAL_TYPE_P (type) |
2945 | || !TYPE_UNSIGNED (type) |
2946 | || has_zero_uses (lhs) |
2947 | || has_single_use (lhs) |
2948 | || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, |
2949 | TYPE_MODE (type)) == CODE_FOR_nothing) |
2950 | return false; |
2951 | |
2952 | FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) |
2953 | { |
2954 | use_stmt = USE_STMT (use_p); |
2955 | if (is_gimple_debug (use_stmt)) |
2956 | continue; |
2957 | |
2958 | if (uaddsub_overflow_check_p (stmt, use_stmt)) |
2959 | ovf_use_seen = true; |
2960 | else |
2961 | use_seen = true; |
2962 | if (ovf_use_seen && use_seen) |
2963 | break; |
2964 | } |
2965 | |
2966 | if (!ovf_use_seen || !use_seen) |
2967 | return false; |
2968 | |
2969 | tree ctype = build_complex_type (type); |
2970 | tree rhs1 = gimple_assign_rhs1 (stmt); |
2971 | tree rhs2 = gimple_assign_rhs2 (stmt); |
2972 | gcall *g = gimple_build_call_internal (code == PLUS_EXPR |
2973 | ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, |
2974 | 2, rhs1, rhs2); |
2975 | tree ctmp = make_ssa_name (ctype); |
2976 | gimple_call_set_lhs (g, ctmp); |
2977 | gsi_insert_before (gsi, g, GSI_SAME_STMT); |
2978 | gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR, |
2979 | build1 (REALPART_EXPR, type, ctmp)); |
2980 | gsi_replace (gsi, g2, true); |
2981 | tree ovf = make_ssa_name (type); |
2982 | g2 = gimple_build_assign (ovf, IMAGPART_EXPR, |
2983 | build1 (IMAGPART_EXPR, type, ctmp)); |
2984 | gsi_insert_after (gsi, g2, GSI_NEW_STMT); |
2985 | |
2986 | FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) |
2987 | { |
2988 | if (is_gimple_debug (use_stmt)) |
2989 | continue; |
2990 | |
2991 | int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt); |
2992 | if (ovf_use == 0) |
2993 | continue; |
2994 | if (gimple_code (use_stmt) == GIMPLE_COND) |
2995 | { |
2996 | gcond *cond_stmt = as_a <gcond *> (use_stmt); |
2997 | gimple_cond_set_lhs (cond_stmt, ovf); |
2998 | gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); |
2999 | gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); |
3000 | } |
3001 | else |
3002 | { |
3003 | gcc_checking_assert (is_gimple_assign (use_stmt)); |
3004 | if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) |
3005 | { |
3006 | gimple_assign_set_rhs1 (use_stmt, ovf); |
3007 | gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0)); |
3008 | gimple_assign_set_rhs_code (use_stmt, |
3009 | ovf_use == 1 ? NE_EXPR : EQ_EXPR); |
3010 | } |
3011 | else |
3012 | { |
3013 | gcc_checking_assert (gimple_assign_rhs_code (use_stmt) |
3014 | == COND_EXPR); |
3015 | tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, |
3016 | boolean_type_node, ovf, |
3017 | build_int_cst (type, 0)); |
3018 | gimple_assign_set_rhs1 (use_stmt, cond); |
3019 | } |
3020 | } |
3021 | update_stmt (use_stmt); |
3022 | } |
3023 | return true; |
3024 | } |
3025 | |
3026 | /* Return true if target has support for divmod. */ |
3027 | |
3028 | static bool |
3029 | target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) |
3030 | { |
3031 | /* If target supports hardware divmod insn, use it for divmod. */ |
3032 | if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) |
3033 | return true; |
3034 | |
3035 | /* Check if libfunc for divmod is available. */ |
3036 | rtx libfunc = optab_libfunc (divmod_optab, mode); |
3037 | if (libfunc != NULL_RTX) |
3038 | { |
3039 | /* If optab_handler exists for div_optab, perhaps in a wider mode, |
3040 | we don't want to use the libfunc even if it exists for given mode. */ |
3041 | machine_mode div_mode; |
3042 | FOR_EACH_MODE_FROM (div_mode, mode) |
3043 | if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) |
3044 | return false; |
3045 | |
3046 | return targetm.expand_divmod_libfunc != NULL; |
3047 | } |
3048 | |
3049 | return false; |
3050 | } |
3051 | |
3052 | /* Check if stmt is candidate for divmod transform. */ |
3053 | |
3054 | static bool |
3055 | divmod_candidate_p (gassign *stmt) |
3056 | { |
3057 | tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
3058 | machine_mode mode = TYPE_MODE (type); |
3059 | optab divmod_optab, div_optab; |
3060 | |
3061 | if (TYPE_UNSIGNED (type)) |
3062 | { |
3063 | divmod_optab = udivmod_optab; |
3064 | div_optab = udiv_optab; |
3065 | } |
3066 | else |
3067 | { |
3068 | divmod_optab = sdivmod_optab; |
3069 | div_optab = sdiv_optab; |
3070 | } |
3071 | |
3072 | tree op1 = gimple_assign_rhs1 (stmt); |
3073 | tree op2 = gimple_assign_rhs2 (stmt); |
3074 | |
3075 | /* Disable the transform if either is a constant, since division-by-constant |
3076 | may have specialized expansion. */ |
3077 | if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) |
3078 | return false; |
3079 | |
3080 | /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should |
3081 | expand using the [su]divv optabs. */ |
3082 | if (TYPE_OVERFLOW_TRAPS (type)) |
3083 | return false; |
3084 | |
3085 | if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) |
3086 | return false; |
3087 | |
3088 | return true; |
3089 | } |
3090 | |
3091 | /* This function looks for: |
3092 | t1 = a TRUNC_DIV_EXPR b; |
3093 | t2 = a TRUNC_MOD_EXPR b; |
3094 | and transforms it to the following sequence: |
3095 | complex_tmp = DIVMOD (a, b); |
3096 | t1 = REALPART_EXPR(a); |
3097 | t2 = IMAGPART_EXPR(b); |
3098 | For conditions enabling the transform see divmod_candidate_p(). |
3099 | |
3100 | The pass has three parts: |
3101 | 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all |
3102 | other trunc_div_expr and trunc_mod_expr stmts. |
3103 | 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt |
3104 | to stmts vector. |
3105 | 3) Insert DIVMOD call just before top_stmt and update entries in |
3106 | stmts vector to use return value of DIMOVD (REALEXPR_PART for div, |
3107 | IMAGPART_EXPR for mod). */ |
3108 | |
3109 | static bool |
3110 | convert_to_divmod (gassign *stmt) |
3111 | { |
3112 | if (stmt_can_throw_internal (stmt) |
3113 | || !divmod_candidate_p (stmt)) |
3114 | return false; |
3115 | |
3116 | tree op1 = gimple_assign_rhs1 (stmt); |
3117 | tree op2 = gimple_assign_rhs2 (stmt); |
3118 | |
3119 | imm_use_iterator use_iter; |
3120 | gimple *use_stmt; |
3121 | auto_vec<gimple *> stmts; |
3122 | |
3123 | gimple *top_stmt = stmt; |
3124 | basic_block top_bb = gimple_bb (stmt); |
3125 | |
3126 | /* Part 1: Try to set top_stmt to "topmost" stmt that dominates |
3127 | at-least stmt and possibly other trunc_div/trunc_mod stmts |
3128 | having same operands as stmt. */ |
3129 | |
3130 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) |
3131 | { |
3132 | if (is_gimple_assign (use_stmt) |
3133 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR |
3134 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |
3135 | && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) |
3136 | && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) |
3137 | { |
3138 | if (stmt_can_throw_internal (use_stmt)) |
3139 | continue; |
3140 | |
3141 | basic_block bb = gimple_bb (use_stmt); |
3142 | |
3143 | if (bb == top_bb) |
3144 | { |
3145 | if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) |
3146 | top_stmt = use_stmt; |
3147 | } |
3148 | else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) |
3149 | { |
3150 | top_bb = bb; |
3151 | top_stmt = use_stmt; |
3152 | } |
3153 | } |
3154 | } |
3155 | |
3156 | tree top_op1 = gimple_assign_rhs1 (top_stmt); |
3157 | tree top_op2 = gimple_assign_rhs2 (top_stmt); |
3158 | |
3159 | stmts.safe_push (top_stmt); |
3160 | bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); |
3161 | |
3162 | /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb |
3163 | to stmts vector. The 2nd loop will always add stmt to stmts vector, since |
3164 | gimple_bb (top_stmt) dominates gimple_bb (stmt), so the |
3165 | 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ |
3166 | |
3167 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) |
3168 | { |
3169 | if (is_gimple_assign (use_stmt) |
3170 | && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR |
3171 | || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) |
3172 | && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) |
3173 | && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) |
3174 | { |
3175 | if (use_stmt == top_stmt |
3176 | || stmt_can_throw_internal (use_stmt) |
3177 | || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) |
3178 | continue; |
3179 | |
3180 | stmts.safe_push (use_stmt); |
3181 | if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) |
3182 | div_seen = true; |
3183 | } |
3184 | } |
3185 | |
3186 | if (!div_seen) |
3187 | return false; |
3188 | |
3189 | /* Part 3: Create libcall to internal fn DIVMOD: |
3190 | divmod_tmp = DIVMOD (op1, op2). */ |
3191 | |
3192 | gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); |
3193 | tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), |
3194 | call_stmt, "divmod_tmp" ); |
3195 | gimple_call_set_lhs (call_stmt, res); |
3196 | /* We rejected throwing statements above. */ |
3197 | gimple_call_set_nothrow (call_stmt, true); |
3198 | |
3199 | /* Insert the call before top_stmt. */ |
3200 | gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); |
3201 | gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); |
3202 | |
3203 | widen_mul_stats.divmod_calls_inserted++; |
3204 | |
3205 | /* Update all statements in stmts vector: |
3206 | lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> |
3207 | lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ |
3208 | |
3209 | for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) |
3210 | { |
3211 | tree new_rhs; |
3212 | |
3213 | switch (gimple_assign_rhs_code (use_stmt)) |
3214 | { |
3215 | case TRUNC_DIV_EXPR: |
3216 | new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); |
3217 | break; |
3218 | |
3219 | case TRUNC_MOD_EXPR: |
3220 | new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); |
3221 | break; |
3222 | |
3223 | default: |
3224 | gcc_unreachable (); |
3225 | } |
3226 | |
3227 | gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); |
3228 | gimple_assign_set_rhs_from_tree (&gsi, new_rhs); |
3229 | update_stmt (use_stmt); |
3230 | } |
3231 | |
3232 | return true; |
3233 | } |
3234 | |
3235 | /* Find integer multiplications where the operands are extended from |
3236 | smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR |
3237 | where appropriate. */ |
3238 | |
3239 | namespace { |
3240 | |
3241 | const pass_data pass_data_optimize_widening_mul = |
3242 | { |
3243 | GIMPLE_PASS, /* type */ |
3244 | "widening_mul" , /* name */ |
3245 | OPTGROUP_NONE, /* optinfo_flags */ |
3246 | TV_TREE_WIDEN_MUL, /* tv_id */ |
3247 | PROP_ssa, /* properties_required */ |
3248 | 0, /* properties_provided */ |
3249 | 0, /* properties_destroyed */ |
3250 | 0, /* todo_flags_start */ |
3251 | TODO_update_ssa, /* todo_flags_finish */ |
3252 | }; |
3253 | |
3254 | class pass_optimize_widening_mul : public gimple_opt_pass |
3255 | { |
3256 | public: |
3257 | pass_optimize_widening_mul (gcc::context *ctxt) |
3258 | : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) |
3259 | {} |
3260 | |
3261 | /* opt_pass methods: */ |
3262 | virtual bool gate (function *) |
3263 | { |
3264 | return flag_expensive_optimizations && optimize; |
3265 | } |
3266 | |
3267 | virtual unsigned int execute (function *); |
3268 | |
3269 | }; // class pass_optimize_widening_mul |
3270 | |
3271 | unsigned int |
3272 | pass_optimize_widening_mul::execute (function *fun) |
3273 | { |
3274 | basic_block bb; |
3275 | bool cfg_changed = false; |
3276 | |
3277 | memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); |
3278 | calculate_dominance_info (CDI_DOMINATORS); |
3279 | renumber_gimple_stmt_uids (); |
3280 | |
3281 | FOR_EACH_BB_FN (bb, fun) |
3282 | { |
3283 | gimple_stmt_iterator gsi; |
3284 | |
3285 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) |
3286 | { |
3287 | gimple *stmt = gsi_stmt (gsi); |
3288 | enum tree_code code; |
3289 | |
3290 | if (is_gimple_assign (stmt)) |
3291 | { |
3292 | code = gimple_assign_rhs_code (stmt); |
3293 | switch (code) |
3294 | { |
3295 | case MULT_EXPR: |
3296 | if (!convert_mult_to_widen (stmt, &gsi) |
3297 | && !convert_expand_mult_copysign (stmt, &gsi) |
3298 | && convert_mult_to_fma (stmt, |
3299 | gimple_assign_rhs1 (stmt), |
3300 | gimple_assign_rhs2 (stmt))) |
3301 | { |
3302 | gsi_remove (&gsi, true); |
3303 | release_defs (stmt); |
3304 | continue; |
3305 | } |
3306 | break; |
3307 | |
3308 | case PLUS_EXPR: |
3309 | case MINUS_EXPR: |
3310 | if (!convert_plusminus_to_widen (&gsi, stmt, code)) |
3311 | match_uaddsub_overflow (&gsi, stmt, code); |
3312 | break; |
3313 | |
3314 | case TRUNC_MOD_EXPR: |
3315 | convert_to_divmod (as_a<gassign *> (stmt)); |
3316 | break; |
3317 | |
3318 | default:; |
3319 | } |
3320 | } |
3321 | else if (is_gimple_call (stmt) |
3322 | && gimple_call_lhs (stmt)) |
3323 | { |
3324 | tree fndecl = gimple_call_fndecl (stmt); |
3325 | if (fndecl |
3326 | && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) |
3327 | { |
3328 | switch (DECL_FUNCTION_CODE (fndecl)) |
3329 | { |
3330 | case BUILT_IN_POWF: |
3331 | case BUILT_IN_POW: |
3332 | case BUILT_IN_POWL: |
3333 | if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST |
3334 | && real_equal |
3335 | (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), |
3336 | &dconst2) |
3337 | && convert_mult_to_fma (stmt, |
3338 | gimple_call_arg (stmt, 0), |
3339 | gimple_call_arg (stmt, 0))) |
3340 | { |
3341 | unlink_stmt_vdef (stmt); |
3342 | if (gsi_remove (&gsi, true) |
3343 | && gimple_purge_dead_eh_edges (bb)) |
3344 | cfg_changed = true; |
3345 | release_defs (stmt); |
3346 | continue; |
3347 | } |
3348 | break; |
3349 | |
3350 | default:; |
3351 | } |
3352 | } |
3353 | } |
3354 | gsi_next (&gsi); |
3355 | } |
3356 | } |
3357 | |
3358 | statistics_counter_event (fun, "widening multiplications inserted" , |
3359 | widen_mul_stats.widen_mults_inserted); |
3360 | statistics_counter_event (fun, "widening maccs inserted" , |
3361 | widen_mul_stats.maccs_inserted); |
3362 | statistics_counter_event (fun, "fused multiply-adds inserted" , |
3363 | widen_mul_stats.fmas_inserted); |
3364 | statistics_counter_event (fun, "divmod calls inserted" , |
3365 | widen_mul_stats.divmod_calls_inserted); |
3366 | |
3367 | return cfg_changed ? TODO_cleanup_cfg : 0; |
3368 | } |
3369 | |
3370 | } // anon namespace |
3371 | |
3372 | gimple_opt_pass * |
3373 | make_pass_optimize_widening_mul (gcc::context *ctxt) |
3374 | { |
3375 | return new pass_optimize_widening_mul (ctxt); |
3376 | } |
3377 | |