1/* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
22
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
27
28 that can be optimized to
29
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
35
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
38
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
42
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
50
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
56
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
60
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
68
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
75
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
79
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
86
87#include "config.h"
88#include "system.h"
89#include "coretypes.h"
90#include "backend.h"
91#include "target.h"
92#include "rtl.h"
93#include "tree.h"
94#include "gimple.h"
95#include "predict.h"
96#include "alloc-pool.h"
97#include "tree-pass.h"
98#include "ssa.h"
99#include "optabs-tree.h"
100#include "gimple-pretty-print.h"
101#include "alias.h"
102#include "fold-const.h"
103#include "gimple-iterator.h"
104#include "gimple-fold.h"
105#include "gimplify.h"
106#include "gimplify-me.h"
107#include "stor-layout.h"
108#include "tree-cfg.h"
109#include "tree-dfa.h"
110#include "tree-ssa.h"
111#include "builtins.h"
112#include "internal-fn.h"
113#include "case-cfn-macros.h"
114#include "optabs-libfuncs.h"
115#include "tree-eh.h"
116#include "targhooks.h"
117#include "domwalk.h"
118#include "tree-ssa-math-opts.h"
119#include "dbgcnt.h"
120
121/* This structure represents one basic block that either computes a
122 division, or is a common dominator for basic block that compute a
123 division. */
124struct occurrence {
125 /* The basic block represented by this structure. */
126 basic_block bb = basic_block();
127
128 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 inserted in BB. */
130 tree recip_def = tree();
131
132 /* If non-NULL, the SSA_NAME holding the definition for a squared
133 reciprocal inserted in BB. */
134 tree square_recip_def = tree();
135
136 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
137 was inserted in BB. */
138 gimple *recip_def_stmt = nullptr;
139
140 /* Pointer to a list of "struct occurrence"s for blocks dominated
141 by BB. */
142 struct occurrence *children = nullptr;
143
144 /* Pointer to the next "struct occurrence"s in the list of blocks
145 sharing a common dominator. */
146 struct occurrence *next = nullptr;
147
148 /* The number of divisions that are in BB before compute_merit. The
149 number of divisions that are in BB or post-dominate it after
150 compute_merit. */
151 int num_divisions = 0;
152
153 /* True if the basic block has a division, false if it is a common
154 dominator for basic blocks that do. If it is false and trapping
155 math is active, BB is not a candidate for inserting a reciprocal. */
156 bool bb_has_division = false;
157
158 /* Construct a struct occurrence for basic block BB, and whose
159 children list is headed by CHILDREN. */
160 occurrence (basic_block bb, struct occurrence *children)
161 : bb (bb), children (children)
162 {
163 bb->aux = this;
164 }
165
166 /* Destroy a struct occurrence and remove it from its basic block. */
167 ~occurrence ()
168 {
169 bb->aux = nullptr;
170 }
171
172 /* Allocate memory for a struct occurrence from OCC_POOL. */
173 static void* operator new (size_t);
174
175 /* Return memory for a struct occurrence to OCC_POOL. */
176 static void operator delete (void*, size_t);
177};
178
179static struct
180{
181 /* Number of 1.0/X ops inserted. */
182 int rdivs_inserted;
183
184 /* Number of 1.0/FUNC ops inserted. */
185 int rfuncs_inserted;
186} reciprocal_stats;
187
188static struct
189{
190 /* Number of cexpi calls inserted. */
191 int inserted;
192
193 /* Number of conversions removed. */
194 int conv_removed;
195
196} sincos_stats;
197
198static struct
199{
200 /* Number of widening multiplication ops inserted. */
201 int widen_mults_inserted;
202
203 /* Number of integer multiply-and-accumulate ops inserted. */
204 int maccs_inserted;
205
206 /* Number of fp fused multiply-add ops inserted. */
207 int fmas_inserted;
208
209 /* Number of divmod calls inserted. */
210 int divmod_calls_inserted;
211
212 /* Number of highpart multiplication ops inserted. */
213 int highpart_mults_inserted;
214} widen_mul_stats;
215
216/* The instance of "struct occurrence" representing the highest
217 interesting block in the dominator tree. */
218static struct occurrence *occ_head;
219
220/* Allocation pool for getting instances of "struct occurrence". */
221static object_allocator<occurrence> *occ_pool;
222
223void* occurrence::operator new (size_t n)
224{
225 gcc_assert (n == sizeof(occurrence));
226 return occ_pool->allocate_raw ();
227}
228
229void occurrence::operator delete (void *occ, size_t n)
230{
231 gcc_assert (n == sizeof(occurrence));
232 occ_pool->remove_raw (object: occ);
233}
234
235/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
236 list of "struct occurrence"s, one per basic block, having IDOM as
237 their common dominator.
238
239 We try to insert NEW_OCC as deep as possible in the tree, and we also
240 insert any other block that is a common dominator for BB and one
241 block already in the tree. */
242
243static void
244insert_bb (struct occurrence *new_occ, basic_block idom,
245 struct occurrence **p_head)
246{
247 struct occurrence *occ, **p_occ;
248
249 for (p_occ = p_head; (occ = *p_occ) != NULL; )
250 {
251 basic_block bb = new_occ->bb, occ_bb = occ->bb;
252 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
253 if (dom == bb)
254 {
255 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
256 from its list. */
257 *p_occ = occ->next;
258 occ->next = new_occ->children;
259 new_occ->children = occ;
260
261 /* Try the next block (it may as well be dominated by BB). */
262 }
263
264 else if (dom == occ_bb)
265 {
266 /* OCC_BB dominates BB. Tail recurse to look deeper. */
267 insert_bb (new_occ, idom: dom, p_head: &occ->children);
268 return;
269 }
270
271 else if (dom != idom)
272 {
273 gcc_assert (!dom->aux);
274
275 /* There is a dominator between IDOM and BB, add it and make
276 two children out of NEW_OCC and OCC. First, remove OCC from
277 its list. */
278 *p_occ = occ->next;
279 new_occ->next = occ;
280 occ->next = NULL;
281
282 /* None of the previous blocks has DOM as a dominator: if we tail
283 recursed, we would reexamine them uselessly. Just switch BB with
284 DOM, and go on looking for blocks dominated by DOM. */
285 new_occ = new occurrence (dom, new_occ);
286 }
287
288 else
289 {
290 /* Nothing special, go on with the next element. */
291 p_occ = &occ->next;
292 }
293 }
294
295 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
296 new_occ->next = *p_head;
297 *p_head = new_occ;
298}
299
300/* Register that we found a division in BB.
301 IMPORTANCE is a measure of how much weighting to give
302 that division. Use IMPORTANCE = 2 to register a single
303 division. If the division is going to be found multiple
304 times use 1 (as it is with squares). */
305
306static inline void
307register_division_in (basic_block bb, int importance)
308{
309 struct occurrence *occ;
310
311 occ = (struct occurrence *) bb->aux;
312 if (!occ)
313 {
314 occ = new occurrence (bb, NULL);
315 insert_bb (new_occ: occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), p_head: &occ_head);
316 }
317
318 occ->bb_has_division = true;
319 occ->num_divisions += importance;
320}
321
322
323/* Compute the number of divisions that postdominate each block in OCC and
324 its children. */
325
326static void
327compute_merit (struct occurrence *occ)
328{
329 struct occurrence *occ_child;
330 basic_block dom = occ->bb;
331
332 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
333 {
334 basic_block bb;
335 if (occ_child->children)
336 compute_merit (occ: occ_child);
337
338 if (flag_exceptions)
339 bb = single_noncomplex_succ (bb: dom);
340 else
341 bb = dom;
342
343 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
344 occ->num_divisions += occ_child->num_divisions;
345 }
346}
347
348
349/* Return whether USE_STMT is a floating-point division by DEF. */
350static inline bool
351is_division_by (gimple *use_stmt, tree def)
352{
353 return is_gimple_assign (gs: use_stmt)
354 && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR
355 && gimple_assign_rhs2 (gs: use_stmt) == def
356 /* Do not recognize x / x as valid division, as we are getting
357 confused later by replacing all immediate uses x in such
358 a stmt. */
359 && gimple_assign_rhs1 (gs: use_stmt) != def
360 && !stmt_can_throw_internal (cfun, use_stmt);
361}
362
363/* Return TRUE if USE_STMT is a multiplication of DEF by A. */
364static inline bool
365is_mult_by (gimple *use_stmt, tree def, tree a)
366{
367 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
368 && gimple_assign_rhs_code (gs: use_stmt) == MULT_EXPR)
369 {
370 tree op0 = gimple_assign_rhs1 (gs: use_stmt);
371 tree op1 = gimple_assign_rhs2 (gs: use_stmt);
372
373 return (op0 == def && op1 == a)
374 || (op0 == a && op1 == def);
375 }
376 return 0;
377}
378
379/* Return whether USE_STMT is DEF * DEF. */
380static inline bool
381is_square_of (gimple *use_stmt, tree def)
382{
383 return is_mult_by (use_stmt, def, a: def);
384}
385
386/* Return whether USE_STMT is a floating-point division by
387 DEF * DEF. */
388static inline bool
389is_division_by_square (gimple *use_stmt, tree def)
390{
391 if (gimple_code (g: use_stmt) == GIMPLE_ASSIGN
392 && gimple_assign_rhs_code (gs: use_stmt) == RDIV_EXPR
393 && gimple_assign_rhs1 (gs: use_stmt) != gimple_assign_rhs2 (gs: use_stmt)
394 && !stmt_can_throw_internal (cfun, use_stmt))
395 {
396 tree denominator = gimple_assign_rhs2 (gs: use_stmt);
397 if (TREE_CODE (denominator) == SSA_NAME)
398 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
399 }
400 return 0;
401}
402
403/* Walk the subset of the dominator tree rooted at OCC, setting the
404 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
405 the given basic block. The field may be left NULL, of course,
406 if it is not possible or profitable to do the optimization.
407
408 DEF_BSI is an iterator pointing at the statement defining DEF.
409 If RECIP_DEF is set, a dominator already has a computation that can
410 be used.
411
412 If should_insert_square_recip is set, then this also inserts
413 the square of the reciprocal immediately after the definition
414 of the reciprocal. */
415
416static void
417insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
418 tree def, tree recip_def, tree square_recip_def,
419 int should_insert_square_recip, int threshold)
420{
421 tree type;
422 gassign *new_stmt, *new_square_stmt;
423 gimple_stmt_iterator gsi;
424 struct occurrence *occ_child;
425
426 if (!recip_def
427 && (occ->bb_has_division || !flag_trapping_math)
428 /* Divide by two as all divisions are counted twice in
429 the costing loop. */
430 && occ->num_divisions / 2 >= threshold)
431 {
432 /* Make a variable with the replacement and substitute it. */
433 type = TREE_TYPE (def);
434 recip_def = create_tmp_reg (type, "reciptmp");
435 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
436 build_one_cst (type), def);
437
438 if (should_insert_square_recip)
439 {
440 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
441 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
442 recip_def, recip_def);
443 }
444
445 if (occ->bb_has_division)
446 {
447 /* Case 1: insert before an existing division. */
448 gsi = gsi_after_labels (bb: occ->bb);
449 while (!gsi_end_p (i: gsi)
450 && (!is_division_by (use_stmt: gsi_stmt (i: gsi), def))
451 && (!is_division_by_square (use_stmt: gsi_stmt (i: gsi), def)))
452 gsi_next (i: &gsi);
453
454 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
455 if (should_insert_square_recip)
456 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
457 }
458 else if (def_gsi && occ->bb == gsi_bb (i: *def_gsi))
459 {
460 /* Case 2: insert right after the definition. Note that this will
461 never happen if the definition statement can throw, because in
462 that case the sole successor of the statement's basic block will
463 dominate all the uses as well. */
464 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
465 if (should_insert_square_recip)
466 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
467 }
468 else
469 {
470 /* Case 3: insert in a basic block not containing defs/uses. */
471 gsi = gsi_after_labels (bb: occ->bb);
472 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
473 if (should_insert_square_recip)
474 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
475 }
476
477 reciprocal_stats.rdivs_inserted++;
478
479 occ->recip_def_stmt = new_stmt;
480 }
481
482 occ->recip_def = recip_def;
483 occ->square_recip_def = square_recip_def;
484 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
485 insert_reciprocals (def_gsi, occ: occ_child, def, recip_def,
486 square_recip_def, should_insert_square_recip,
487 threshold);
488}
489
490/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
491 Take as argument the use for (x * x). */
492static inline void
493replace_reciprocal_squares (use_operand_p use_p)
494{
495 gimple *use_stmt = USE_STMT (use_p);
496 basic_block bb = gimple_bb (g: use_stmt);
497 struct occurrence *occ = (struct occurrence *) bb->aux;
498
499 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
500 && occ->recip_def)
501 {
502 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
503 gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR);
504 gimple_assign_set_rhs2 (gs: use_stmt, rhs: occ->square_recip_def);
505 SET_USE (use_p, occ->square_recip_def);
506 fold_stmt_inplace (&gsi);
507 update_stmt (s: use_stmt);
508 }
509}
510
511
512/* Replace the division at USE_P with a multiplication by the reciprocal, if
513 possible. */
514
515static inline void
516replace_reciprocal (use_operand_p use_p)
517{
518 gimple *use_stmt = USE_STMT (use_p);
519 basic_block bb = gimple_bb (g: use_stmt);
520 struct occurrence *occ = (struct occurrence *) bb->aux;
521
522 if (optimize_bb_for_speed_p (bb)
523 && occ->recip_def && use_stmt != occ->recip_def_stmt)
524 {
525 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
526 gimple_assign_set_rhs_code (s: use_stmt, code: MULT_EXPR);
527 SET_USE (use_p, occ->recip_def);
528 fold_stmt_inplace (&gsi);
529 update_stmt (s: use_stmt);
530 }
531}
532
533
534/* Free OCC and return one more "struct occurrence" to be freed. */
535
536static struct occurrence *
537free_bb (struct occurrence *occ)
538{
539 struct occurrence *child, *next;
540
541 /* First get the two pointers hanging off OCC. */
542 next = occ->next;
543 child = occ->children;
544 delete occ;
545
546 /* Now ensure that we don't recurse unless it is necessary. */
547 if (!child)
548 return next;
549 else
550 {
551 while (next)
552 next = free_bb (occ: next);
553
554 return child;
555 }
556}
557
558/* Transform sequences like
559 t = sqrt (a)
560 x = 1.0 / t;
561 r1 = x * x;
562 r2 = a * x;
563 into:
564 t = sqrt (a)
565 r1 = 1.0 / a;
566 r2 = t;
567 x = r1 * r2;
568 depending on the uses of x, r1, r2. This removes one multiplication and
569 allows the sqrt and division operations to execute in parallel.
570 DEF_GSI is the gsi of the initial division by sqrt that defines
571 DEF (x in the example above). */
572
573static void
574optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
575{
576 gimple *use_stmt;
577 imm_use_iterator use_iter;
578 gimple *stmt = gsi_stmt (i: *def_gsi);
579 tree x = def;
580 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (gs: stmt);
581 tree div_rhs1 = gimple_assign_rhs1 (gs: stmt);
582
583 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
584 || TREE_CODE (div_rhs1) != REAL_CST
585 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
586 return;
587
588 gcall *sqrt_stmt
589 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
590
591 if (!sqrt_stmt || !gimple_call_lhs (gs: sqrt_stmt))
592 return;
593
594 switch (gimple_call_combined_fn (sqrt_stmt))
595 {
596 CASE_CFN_SQRT:
597 CASE_CFN_SQRT_FN:
598 break;
599
600 default:
601 return;
602 }
603 tree a = gimple_call_arg (gs: sqrt_stmt, index: 0);
604
605 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606
607 /* Statements that use x in x * x. */
608 auto_vec<gimple *> sqr_stmts;
609 /* Statements that use x in a * x. */
610 auto_vec<gimple *> mult_stmts;
611 bool has_other_use = false;
612 bool mult_on_main_path = false;
613
614 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
615 {
616 if (is_gimple_debug (gs: use_stmt))
617 continue;
618 if (is_square_of (use_stmt, def: x))
619 {
620 sqr_stmts.safe_push (obj: use_stmt);
621 if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt))
622 mult_on_main_path = true;
623 }
624 else if (is_mult_by (use_stmt, def: x, a))
625 {
626 mult_stmts.safe_push (obj: use_stmt);
627 if (gimple_bb (g: use_stmt) == gimple_bb (g: stmt))
628 mult_on_main_path = true;
629 }
630 else
631 has_other_use = true;
632 }
633
634 /* In the x * x and a * x cases we just rewire stmt operands or
635 remove multiplications. In the has_other_use case we introduce
636 a multiplication so make sure we don't introduce a multiplication
637 on a path where there was none. */
638 if (has_other_use && !mult_on_main_path)
639 return;
640
641 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
642 return;
643
644 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
645 to be able to compose it from the sqr and mult cases. */
646 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
647 return;
648
649 if (dump_file)
650 {
651 fprintf (stream: dump_file, format: "Optimizing reciprocal sqrt multiplications of\n");
652 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
653 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
654 fprintf (stream: dump_file, format: "\n");
655 }
656
657 bool delete_div = !has_other_use;
658 tree sqr_ssa_name = NULL_TREE;
659 if (!sqr_stmts.is_empty ())
660 {
661 /* r1 = x * x. Transform the original
662 x = 1.0 / t
663 into
664 tmp1 = 1.0 / a
665 r1 = tmp1. */
666
667 sqr_ssa_name
668 = make_temp_ssa_name (TREE_TYPE (a), NULL, name: "recip_sqrt_sqr");
669
670 if (dump_file)
671 {
672 fprintf (stream: dump_file, format: "Replacing original division\n");
673 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
674 fprintf (stream: dump_file, format: "with new division\n");
675 }
676 stmt
677 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (gs: stmt),
678 gimple_assign_rhs1 (gs: stmt), a);
679 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
680 gsi_remove (def_gsi, true);
681 *def_gsi = gsi_for_stmt (stmt);
682 fold_stmt_inplace (def_gsi);
683 update_stmt (s: stmt);
684
685 if (dump_file)
686 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
687
688 delete_div = false;
689 gimple *sqr_stmt;
690 unsigned int i;
691 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
692 {
693 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
694 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
695 update_stmt (s: sqr_stmt);
696 }
697 }
698 if (!mult_stmts.is_empty ())
699 {
700 /* r2 = a * x. Transform this into:
701 r2 = t (The original sqrt (a)). */
702 unsigned int i;
703 gimple *mult_stmt = NULL;
704 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
705 {
706 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
707
708 if (dump_file)
709 {
710 fprintf (stream: dump_file, format: "Replacing squaring multiplication\n");
711 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
712 fprintf (stream: dump_file, format: "with assignment\n");
713 }
714 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
715 fold_stmt_inplace (&gsi2);
716 update_stmt (s: mult_stmt);
717 if (dump_file)
718 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
719 }
720 }
721
722 if (has_other_use)
723 {
724 /* Using the two temporaries tmp1, tmp2 from above
725 the original x is now:
726 x = tmp1 * tmp2. */
727 gcc_assert (orig_sqrt_ssa_name);
728 gcc_assert (sqr_ssa_name);
729
730 gimple *new_stmt
731 = gimple_build_assign (x, MULT_EXPR,
732 orig_sqrt_ssa_name, sqr_ssa_name);
733 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
734 update_stmt (s: stmt);
735 }
736 else if (delete_div)
737 {
738 /* Remove the original division. */
739 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
740 gsi_remove (&gsi2, true);
741 release_defs (stmt);
742 }
743 else
744 release_ssa_name (name: x);
745}
746
747/* Look for floating-point divisions among DEF's uses, and try to
748 replace them by multiplications with the reciprocal. Add
749 as many statements computing the reciprocal as needed.
750
751 DEF must be a GIMPLE register of a floating-point type. */
752
753static void
754execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
755{
756 use_operand_p use_p, square_use_p;
757 imm_use_iterator use_iter, square_use_iter;
758 tree square_def;
759 struct occurrence *occ;
760 int count = 0;
761 int threshold;
762 int square_recip_count = 0;
763 int sqrt_recip_count = 0;
764
765 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
766 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
767
768 /* If DEF is a square (x * x), count the number of divisions by x.
769 If there are more divisions by x than by (DEF * DEF), prefer to optimize
770 the reciprocal of x instead of DEF. This improves cases like:
771 def = x * x
772 t0 = a / def
773 t1 = b / def
774 t2 = c / x
775 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
776 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
777
778 if (is_gimple_assign (gs: def_stmt)
779 && gimple_assign_rhs_code (gs: def_stmt) == MULT_EXPR
780 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
781 && gimple_assign_rhs1 (gs: def_stmt) == gimple_assign_rhs2 (gs: def_stmt))
782 {
783 tree op0 = gimple_assign_rhs1 (gs: def_stmt);
784
785 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
786 {
787 gimple *use_stmt = USE_STMT (use_p);
788 if (is_division_by (use_stmt, def: op0))
789 sqrt_recip_count++;
790 }
791 }
792
793 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
794 {
795 gimple *use_stmt = USE_STMT (use_p);
796 if (is_division_by (use_stmt, def))
797 {
798 register_division_in (bb: gimple_bb (g: use_stmt), importance: 2);
799 count++;
800 }
801
802 if (is_square_of (use_stmt, def))
803 {
804 square_def = gimple_assign_lhs (gs: use_stmt);
805 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
806 {
807 gimple *square_use_stmt = USE_STMT (square_use_p);
808 if (is_division_by (use_stmt: square_use_stmt, def: square_def))
809 {
810 /* This is executed twice for each division by a square. */
811 register_division_in (bb: gimple_bb (g: square_use_stmt), importance: 1);
812 square_recip_count++;
813 }
814 }
815 }
816 }
817
818 /* Square reciprocals were counted twice above. */
819 square_recip_count /= 2;
820
821 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
822 if (sqrt_recip_count > square_recip_count)
823 goto out;
824
825 /* Do the expensive part only if we can hope to optimize something. */
826 if (count + square_recip_count >= threshold && count >= 1)
827 {
828 gimple *use_stmt;
829 for (occ = occ_head; occ; occ = occ->next)
830 {
831 compute_merit (occ);
832 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
833 should_insert_square_recip: square_recip_count, threshold);
834 }
835
836 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
837 {
838 if (is_division_by (use_stmt, def))
839 {
840 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
841 replace_reciprocal (use_p);
842 }
843 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
844 {
845 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
846 {
847 /* Find all uses of the square that are divisions and
848 * replace them by multiplications with the inverse. */
849 imm_use_iterator square_iterator;
850 gimple *powmult_use_stmt = USE_STMT (use_p);
851 tree powmult_def_name = gimple_assign_lhs (gs: powmult_use_stmt);
852
853 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
854 square_iterator, powmult_def_name)
855 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
856 {
857 gimple *powmult_use_stmt = USE_STMT (square_use_p);
858 if (is_division_by (use_stmt: powmult_use_stmt, def: powmult_def_name))
859 replace_reciprocal_squares (use_p: square_use_p);
860 }
861 }
862 }
863 }
864 }
865
866out:
867 for (occ = occ_head; occ; )
868 occ = free_bb (occ);
869
870 occ_head = NULL;
871}
872
873/* Return an internal function that implements the reciprocal of CALL,
874 or IFN_LAST if there is no such function that the target supports. */
875
876internal_fn
877internal_fn_reciprocal (gcall *call)
878{
879 internal_fn ifn;
880
881 switch (gimple_call_combined_fn (call))
882 {
883 CASE_CFN_SQRT:
884 CASE_CFN_SQRT_FN:
885 ifn = IFN_RSQRT;
886 break;
887
888 default:
889 return IFN_LAST;
890 }
891
892 tree_pair types = direct_internal_fn_types (ifn, call);
893 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
894 return IFN_LAST;
895
896 return ifn;
897}
898
899/* Go through all the floating-point SSA_NAMEs, and call
900 execute_cse_reciprocals_1 on each of them. */
901namespace {
902
903const pass_data pass_data_cse_reciprocals =
904{
905 .type: GIMPLE_PASS, /* type */
906 .name: "recip", /* name */
907 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
908 .tv_id: TV_TREE_RECIP, /* tv_id */
909 PROP_ssa, /* properties_required */
910 .properties_provided: 0, /* properties_provided */
911 .properties_destroyed: 0, /* properties_destroyed */
912 .todo_flags_start: 0, /* todo_flags_start */
913 TODO_update_ssa, /* todo_flags_finish */
914};
915
916class pass_cse_reciprocals : public gimple_opt_pass
917{
918public:
919 pass_cse_reciprocals (gcc::context *ctxt)
920 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
921 {}
922
923 /* opt_pass methods: */
924 bool gate (function *) final override
925 {
926 return optimize && flag_reciprocal_math;
927 }
928 unsigned int execute (function *) final override;
929
930}; // class pass_cse_reciprocals
931
932unsigned int
933pass_cse_reciprocals::execute (function *fun)
934{
935 basic_block bb;
936 tree arg;
937
938 occ_pool = new object_allocator<occurrence> ("dominators for recip");
939
940 memset (s: &reciprocal_stats, c: 0, n: sizeof (reciprocal_stats));
941 calculate_dominance_info (CDI_DOMINATORS);
942 calculate_dominance_info (CDI_POST_DOMINATORS);
943
944 if (flag_checking)
945 FOR_EACH_BB_FN (bb, fun)
946 gcc_assert (!bb->aux);
947
948 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
949 if (FLOAT_TYPE_P (TREE_TYPE (arg))
950 && is_gimple_reg (arg))
951 {
952 tree name = ssa_default_def (fun, arg);
953 if (name)
954 execute_cse_reciprocals_1 (NULL, def: name);
955 }
956
957 FOR_EACH_BB_FN (bb, fun)
958 {
959 tree def;
960
961 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (i: gsi);
962 gsi_next (i: &gsi))
963 {
964 gphi *phi = gsi.phi ();
965 def = PHI_RESULT (phi);
966 if (! virtual_operand_p (op: def)
967 && FLOAT_TYPE_P (TREE_TYPE (def)))
968 execute_cse_reciprocals_1 (NULL, def);
969 }
970
971 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);
972 gsi_next (i: &gsi))
973 {
974 gimple *stmt = gsi_stmt (i: gsi);
975
976 if (gimple_has_lhs (stmt)
977 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
978 && FLOAT_TYPE_P (TREE_TYPE (def))
979 && TREE_CODE (def) == SSA_NAME)
980 {
981 execute_cse_reciprocals_1 (def_gsi: &gsi, def);
982 stmt = gsi_stmt (i: gsi);
983 if (flag_unsafe_math_optimizations
984 && is_gimple_assign (gs: stmt)
985 && gimple_assign_lhs (gs: stmt) == def
986 && !stmt_can_throw_internal (cfun, stmt)
987 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR)
988 optimize_recip_sqrt (def_gsi: &gsi, def);
989 }
990 }
991
992 if (optimize_bb_for_size_p (bb))
993 continue;
994
995 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
996 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);
997 gsi_next (i: &gsi))
998 {
999 gimple *stmt = gsi_stmt (i: gsi);
1000
1001 if (is_gimple_assign (gs: stmt)
1002 && gimple_assign_rhs_code (gs: stmt) == RDIV_EXPR)
1003 {
1004 tree arg1 = gimple_assign_rhs2 (gs: stmt);
1005 gimple *stmt1;
1006
1007 if (TREE_CODE (arg1) != SSA_NAME)
1008 continue;
1009
1010 stmt1 = SSA_NAME_DEF_STMT (arg1);
1011
1012 if (is_gimple_call (gs: stmt1)
1013 && gimple_call_lhs (gs: stmt1))
1014 {
1015 bool fail;
1016 imm_use_iterator ui;
1017 use_operand_p use_p;
1018 tree fndecl = NULL_TREE;
1019
1020 gcall *call = as_a <gcall *> (p: stmt1);
1021 internal_fn ifn = internal_fn_reciprocal (call);
1022 if (ifn == IFN_LAST)
1023 {
1024 fndecl = gimple_call_fndecl (gs: call);
1025 if (!fndecl
1026 || !fndecl_built_in_p (node: fndecl, klass: BUILT_IN_MD))
1027 continue;
1028 fndecl = targetm.builtin_reciprocal (fndecl);
1029 if (!fndecl)
1030 continue;
1031 }
1032
1033 /* Check that all uses of the SSA name are divisions,
1034 otherwise replacing the defining statement will do
1035 the wrong thing. */
1036 fail = false;
1037 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1038 {
1039 gimple *stmt2 = USE_STMT (use_p);
1040 if (is_gimple_debug (gs: stmt2))
1041 continue;
1042 if (!is_gimple_assign (gs: stmt2)
1043 || gimple_assign_rhs_code (gs: stmt2) != RDIV_EXPR
1044 || gimple_assign_rhs1 (gs: stmt2) == arg1
1045 || gimple_assign_rhs2 (gs: stmt2) != arg1)
1046 {
1047 fail = true;
1048 break;
1049 }
1050 }
1051 if (fail)
1052 continue;
1053
1054 gimple_replace_ssa_lhs (call, arg1);
1055 if (gimple_call_internal_p (gs: call) != (ifn != IFN_LAST))
1056 {
1057 auto_vec<tree, 4> args;
1058 for (unsigned int i = 0;
1059 i < gimple_call_num_args (gs: call); i++)
1060 args.safe_push (obj: gimple_call_arg (gs: call, index: i));
1061 gcall *stmt2;
1062 if (ifn == IFN_LAST)
1063 stmt2 = gimple_build_call_vec (fndecl, args);
1064 else
1065 stmt2 = gimple_build_call_internal_vec (ifn, args);
1066 gimple_call_set_lhs (gs: stmt2, lhs: arg1);
1067 gimple_move_vops (stmt2, call);
1068 gimple_call_set_nothrow (s: stmt2,
1069 nothrow_p: gimple_call_nothrow_p (s: call));
1070 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1071 gsi_replace (&gsi2, stmt2, true);
1072 }
1073 else
1074 {
1075 if (ifn == IFN_LAST)
1076 gimple_call_set_fndecl (gs: call, decl: fndecl);
1077 else
1078 gimple_call_set_internal_fn (call_stmt: call, fn: ifn);
1079 update_stmt (s: call);
1080 }
1081 reciprocal_stats.rfuncs_inserted++;
1082
1083 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1084 {
1085 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1086 gimple_assign_set_rhs_code (s: stmt, code: MULT_EXPR);
1087 fold_stmt_inplace (&gsi);
1088 update_stmt (s: stmt);
1089 }
1090 }
1091 }
1092 }
1093 }
1094
1095 statistics_counter_event (fun, "reciprocal divs inserted",
1096 reciprocal_stats.rdivs_inserted);
1097 statistics_counter_event (fun, "reciprocal functions inserted",
1098 reciprocal_stats.rfuncs_inserted);
1099
1100 free_dominance_info (CDI_DOMINATORS);
1101 free_dominance_info (CDI_POST_DOMINATORS);
1102 delete occ_pool;
1103 return 0;
1104}
1105
1106} // anon namespace
1107
1108gimple_opt_pass *
1109make_pass_cse_reciprocals (gcc::context *ctxt)
1110{
1111 return new pass_cse_reciprocals (ctxt);
1112}
1113
1114/* If NAME is the result of a type conversion, look for other
1115 equivalent dominating or dominated conversions, and replace all
1116 uses with the earliest dominating name, removing the redundant
1117 conversions. Return the prevailing name. */
1118
1119static tree
1120execute_cse_conv_1 (tree name, bool *cfg_changed)
1121{
1122 if (SSA_NAME_IS_DEFAULT_DEF (name)
1123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1124 return name;
1125
1126 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1127
1128 if (!gimple_assign_cast_p (s: def_stmt))
1129 return name;
1130
1131 tree src = gimple_assign_rhs1 (gs: def_stmt);
1132
1133 if (TREE_CODE (src) != SSA_NAME)
1134 return name;
1135
1136 imm_use_iterator use_iter;
1137 gimple *use_stmt;
1138
1139 /* Find the earliest dominating def. */
1140 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1141 {
1142 if (use_stmt == def_stmt
1143 || !gimple_assign_cast_p (s: use_stmt))
1144 continue;
1145
1146 tree lhs = gimple_assign_lhs (gs: use_stmt);
1147
1148 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1149 || (gimple_assign_rhs1 (gs: use_stmt)
1150 != gimple_assign_rhs1 (gs: def_stmt))
1151 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1152 continue;
1153
1154 bool use_dominates;
1155 if (gimple_bb (g: def_stmt) == gimple_bb (g: use_stmt))
1156 {
1157 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1158 while (!gsi_end_p (i: gsi) && gsi_stmt (i: gsi) != def_stmt)
1159 gsi_next (i: &gsi);
1160 use_dominates = !gsi_end_p (i: gsi);
1161 }
1162 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt),
1163 gimple_bb (g: def_stmt)))
1164 use_dominates = false;
1165 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (g: def_stmt),
1166 gimple_bb (g: use_stmt)))
1167 use_dominates = true;
1168 else
1169 continue;
1170
1171 if (use_dominates)
1172 {
1173 std::swap (a&: name, b&: lhs);
1174 std::swap (a&: def_stmt, b&: use_stmt);
1175 }
1176 }
1177
1178 /* Now go through all uses of SRC again, replacing the equivalent
1179 dominated conversions. We may replace defs that were not
1180 dominated by the then-prevailing defs when we first visited
1181 them. */
1182 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1183 {
1184 if (use_stmt == def_stmt
1185 || !gimple_assign_cast_p (s: use_stmt))
1186 continue;
1187
1188 tree lhs = gimple_assign_lhs (gs: use_stmt);
1189
1190 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1191 || (gimple_assign_rhs1 (gs: use_stmt)
1192 != gimple_assign_rhs1 (gs: def_stmt))
1193 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1194 continue;
1195
1196 basic_block use_bb = gimple_bb (g: use_stmt);
1197 if (gimple_bb (g: def_stmt) == use_bb
1198 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (g: def_stmt)))
1199 {
1200 sincos_stats.conv_removed++;
1201
1202 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1203 replace_uses_by (lhs, name);
1204 if (gsi_remove (&gsi, true)
1205 && gimple_purge_dead_eh_edges (use_bb))
1206 *cfg_changed = true;
1207 release_defs (use_stmt);
1208 }
1209 }
1210
1211 return name;
1212}
1213
1214/* Records an occurrence at statement USE_STMT in the vector of trees
1215 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1216 is not yet initialized. Returns true if the occurrence was pushed on
1217 the vector. Adjusts *TOP_BB to be the basic block dominating all
1218 statements in the vector. */
1219
1220static bool
1221maybe_record_sincos (vec<gimple *> *stmts,
1222 basic_block *top_bb, gimple *use_stmt)
1223{
1224 basic_block use_bb = gimple_bb (g: use_stmt);
1225 if (*top_bb
1226 && (*top_bb == use_bb
1227 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1228 stmts->safe_push (obj: use_stmt);
1229 else if (!*top_bb
1230 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1231 {
1232 stmts->safe_push (obj: use_stmt);
1233 *top_bb = use_bb;
1234 }
1235 else
1236 return false;
1237
1238 return true;
1239}
1240
1241/* Look for sin, cos and cexpi calls with the same argument NAME and
1242 create a single call to cexpi CSEing the result in this case.
1243 We first walk over all immediate uses of the argument collecting
1244 statements that we can CSE in a vector and in a second pass replace
1245 the statement rhs with a REALPART or IMAGPART expression on the
1246 result of the cexpi call we insert before the use statement that
1247 dominates all other candidates. */
1248
1249static bool
1250execute_cse_sincos_1 (tree name)
1251{
1252 gimple_stmt_iterator gsi;
1253 imm_use_iterator use_iter;
1254 tree fndecl, res, type = NULL_TREE;
1255 gimple *def_stmt, *use_stmt, *stmt;
1256 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1257 auto_vec<gimple *> stmts;
1258 basic_block top_bb = NULL;
1259 int i;
1260 bool cfg_changed = false;
1261
1262 name = execute_cse_conv_1 (name, cfg_changed: &cfg_changed);
1263
1264 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1265 {
1266 if (gimple_code (g: use_stmt) != GIMPLE_CALL
1267 || !gimple_call_lhs (gs: use_stmt))
1268 continue;
1269
1270 switch (gimple_call_combined_fn (use_stmt))
1271 {
1272 CASE_CFN_COS:
1273 seen_cos |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1274 break;
1275
1276 CASE_CFN_SIN:
1277 seen_sin |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1278 break;
1279
1280 CASE_CFN_CEXPI:
1281 seen_cexpi |= maybe_record_sincos (stmts: &stmts, top_bb: &top_bb, use_stmt) ? 1 : 0;
1282 break;
1283
1284 default:;
1285 continue;
1286 }
1287
1288 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1289 if (!type)
1290 {
1291 type = t;
1292 t = TREE_TYPE (name);
1293 }
1294 /* This checks that NAME has the right type in the first round,
1295 and, in subsequent rounds, that the built_in type is the same
1296 type, or a compatible type. */
1297 if (type != t && !types_compatible_p (type1: type, type2: t))
1298 return false;
1299 }
1300 if (seen_cos + seen_sin + seen_cexpi <= 1)
1301 return false;
1302
1303 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1304 the name def statement. */
1305 fndecl = mathfn_built_in (type, fn: BUILT_IN_CEXPI);
1306 if (!fndecl)
1307 return false;
1308 stmt = gimple_build_call (fndecl, 1, name);
1309 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, name: "sincostmp");
1310 gimple_call_set_lhs (gs: stmt, lhs: res);
1311
1312 def_stmt = SSA_NAME_DEF_STMT (name);
1313 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1314 && gimple_code (g: def_stmt) != GIMPLE_PHI
1315 && gimple_bb (g: def_stmt) == top_bb)
1316 {
1317 gsi = gsi_for_stmt (def_stmt);
1318 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1319 }
1320 else
1321 {
1322 gsi = gsi_after_labels (bb: top_bb);
1323 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1324 }
1325 sincos_stats.inserted++;
1326
1327 /* And adjust the recorded old call sites. */
1328 for (i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i)
1329 {
1330 tree rhs = NULL;
1331
1332 switch (gimple_call_combined_fn (use_stmt))
1333 {
1334 CASE_CFN_COS:
1335 rhs = fold_build1 (REALPART_EXPR, type, res);
1336 break;
1337
1338 CASE_CFN_SIN:
1339 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1340 break;
1341
1342 CASE_CFN_CEXPI:
1343 rhs = res;
1344 break;
1345
1346 default:;
1347 gcc_unreachable ();
1348 }
1349
1350 /* Replace call with a copy. */
1351 stmt = gimple_build_assign (gimple_call_lhs (gs: use_stmt), rhs);
1352
1353 gsi = gsi_for_stmt (use_stmt);
1354 gsi_replace (&gsi, stmt, true);
1355 if (gimple_purge_dead_eh_edges (gimple_bb (g: stmt)))
1356 cfg_changed = true;
1357 }
1358
1359 return cfg_changed;
1360}
1361
1362/* To evaluate powi(x,n), the floating point value x raised to the
1363 constant integer exponent n, we use a hybrid algorithm that
1364 combines the "window method" with look-up tables. For an
1365 introduction to exponentiation algorithms and "addition chains",
1366 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1367 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1368 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1369 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1370
1371/* Provide a default value for POWI_MAX_MULTS, the maximum number of
1372 multiplications to inline before calling the system library's pow
1373 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1374 so this default never requires calling pow, powf or powl. */
1375
1376#ifndef POWI_MAX_MULTS
1377#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1378#endif
1379
1380/* The size of the "optimal power tree" lookup table. All
1381 exponents less than this value are simply looked up in the
1382 powi_table below. This threshold is also used to size the
1383 cache of pseudo registers that hold intermediate results. */
1384#define POWI_TABLE_SIZE 256
1385
1386/* The size, in bits of the window, used in the "window method"
1387 exponentiation algorithm. This is equivalent to a radix of
1388 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1389#define POWI_WINDOW_SIZE 3
1390
1391/* The following table is an efficient representation of an
1392 "optimal power tree". For each value, i, the corresponding
1393 value, j, in the table states than an optimal evaluation
1394 sequence for calculating pow(x,i) can be found by evaluating
1395 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1396 100 integers is given in Knuth's "Seminumerical algorithms". */
1397
1398static const unsigned char powi_table[POWI_TABLE_SIZE] =
1399 {
1400 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1401 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1402 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1403 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1404 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1405 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1406 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1407 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1408 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1409 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1410 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1411 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1412 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1413 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1414 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1415 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1416 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1417 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1418 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1419 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1420 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1421 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1422 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1423 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1424 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1425 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1426 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1427 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1428 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1429 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1430 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1431 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1432 };
1433
1434
1435/* Return the number of multiplications required to calculate
1436 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1437 subroutine of powi_cost. CACHE is an array indicating
1438 which exponents have already been calculated. */
1439
1440static int
1441powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1442{
1443 /* If we've already calculated this exponent, then this evaluation
1444 doesn't require any additional multiplications. */
1445 if (cache[n])
1446 return 0;
1447
1448 cache[n] = true;
1449 return powi_lookup_cost (n: n - powi_table[n], cache)
1450 + powi_lookup_cost (n: powi_table[n], cache) + 1;
1451}
1452
1453/* Return the number of multiplications required to calculate
1454 powi(x,n) for an arbitrary x, given the exponent N. This
1455 function needs to be kept in sync with powi_as_mults below. */
1456
1457static int
1458powi_cost (HOST_WIDE_INT n)
1459{
1460 bool cache[POWI_TABLE_SIZE];
1461 unsigned HOST_WIDE_INT digit;
1462 unsigned HOST_WIDE_INT val;
1463 int result;
1464
1465 if (n == 0)
1466 return 0;
1467
1468 /* Ignore the reciprocal when calculating the cost. */
1469 val = absu_hwi (x: n);
1470
1471 /* Initialize the exponent cache. */
1472 memset (s: cache, c: 0, POWI_TABLE_SIZE * sizeof (bool));
1473 cache[1] = true;
1474
1475 result = 0;
1476
1477 while (val >= POWI_TABLE_SIZE)
1478 {
1479 if (val & 1)
1480 {
1481 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1482 result += powi_lookup_cost (n: digit, cache)
1483 + POWI_WINDOW_SIZE + 1;
1484 val >>= POWI_WINDOW_SIZE;
1485 }
1486 else
1487 {
1488 val >>= 1;
1489 result++;
1490 }
1491 }
1492
1493 return result + powi_lookup_cost (n: val, cache);
1494}
1495
1496/* Recursive subroutine of powi_as_mults. This function takes the
1497 array, CACHE, of already calculated exponents and an exponent N and
1498 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1499
1500static tree
1501powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1502 unsigned HOST_WIDE_INT n, tree *cache)
1503{
1504 tree op0, op1, ssa_target;
1505 unsigned HOST_WIDE_INT digit;
1506 gassign *mult_stmt;
1507
1508 if (n < POWI_TABLE_SIZE && cache[n])
1509 return cache[n];
1510
1511 ssa_target = make_temp_ssa_name (type, NULL, name: "powmult");
1512
1513 if (n < POWI_TABLE_SIZE)
1514 {
1515 cache[n] = ssa_target;
1516 op0 = powi_as_mults_1 (gsi, loc, type, n: n - powi_table[n], cache);
1517 op1 = powi_as_mults_1 (gsi, loc, type, n: powi_table[n], cache);
1518 }
1519 else if (n & 1)
1520 {
1521 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1522 op0 = powi_as_mults_1 (gsi, loc, type, n: n - digit, cache);
1523 op1 = powi_as_mults_1 (gsi, loc, type, n: digit, cache);
1524 }
1525 else
1526 {
1527 op0 = powi_as_mults_1 (gsi, loc, type, n: n >> 1, cache);
1528 op1 = op0;
1529 }
1530
1531 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1532 gimple_set_location (g: mult_stmt, location: loc);
1533 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1534
1535 return ssa_target;
1536}
1537
1538/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1539 This function needs to be kept in sync with powi_cost above. */
1540
1541tree
1542powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1543 tree arg0, HOST_WIDE_INT n)
1544{
1545 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1546 gassign *div_stmt;
1547 tree target;
1548
1549 if (n == 0)
1550 return build_one_cst (type);
1551
1552 memset (s: cache, c: 0, n: sizeof (cache));
1553 cache[1] = arg0;
1554
1555 result = powi_as_mults_1 (gsi, loc, type, n: absu_hwi (x: n), cache);
1556 if (n >= 0)
1557 return result;
1558
1559 /* If the original exponent was negative, reciprocate the result. */
1560 target = make_temp_ssa_name (type, NULL, name: "powmult");
1561 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1562 build_real (type, dconst1), result);
1563 gimple_set_location (g: div_stmt, location: loc);
1564 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1565
1566 return target;
1567}
1568
1569/* ARG0 and N are the two arguments to a powi builtin in GSI with
1570 location info LOC. If the arguments are appropriate, create an
1571 equivalent sequence of statements prior to GSI using an optimal
1572 number of multiplications, and return an expession holding the
1573 result. */
1574
1575static tree
1576gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1577 tree arg0, HOST_WIDE_INT n)
1578{
1579 if ((n >= -1 && n <= 2)
1580 || (optimize_function_for_speed_p (cfun)
1581 && powi_cost (n) <= POWI_MAX_MULTS))
1582 return powi_as_mults (gsi, loc, arg0, n);
1583
1584 return NULL_TREE;
1585}
1586
1587/* Build a gimple call statement that calls FN with argument ARG.
1588 Set the lhs of the call statement to a fresh SSA name. Insert the
1589 statement prior to GSI's current position, and return the fresh
1590 SSA name. */
1591
1592static tree
1593build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1594 tree fn, tree arg)
1595{
1596 gcall *call_stmt;
1597 tree ssa_target;
1598
1599 call_stmt = gimple_build_call (fn, 1, arg);
1600 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, name: "powroot");
1601 gimple_set_lhs (call_stmt, ssa_target);
1602 gimple_set_location (g: call_stmt, location: loc);
1603 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1604
1605 return ssa_target;
1606}
1607
1608/* Build a gimple binary operation with the given CODE and arguments
1609 ARG0, ARG1, assigning the result to a new SSA name for variable
1610 TARGET. Insert the statement prior to GSI's current position, and
1611 return the fresh SSA name.*/
1612
1613static tree
1614build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1615 const char *name, enum tree_code code,
1616 tree arg0, tree arg1)
1617{
1618 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1619 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1620 gimple_set_location (g: stmt, location: loc);
1621 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1622 return result;
1623}
1624
1625/* Build a gimple reference operation with the given CODE and argument
1626 ARG, assigning the result to a new SSA name of TYPE with NAME.
1627 Insert the statement prior to GSI's current position, and return
1628 the fresh SSA name. */
1629
1630static inline tree
1631build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1632 const char *name, enum tree_code code, tree arg0)
1633{
1634 tree result = make_temp_ssa_name (type, NULL, name);
1635 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1636 gimple_set_location (g: stmt, location: loc);
1637 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1638 return result;
1639}
1640
1641/* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1642 prior to GSI's current position, and return the fresh SSA name. */
1643
1644static tree
1645build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1646 tree type, tree val)
1647{
1648 tree result = make_ssa_name (var: type);
1649 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1650 gimple_set_location (g: stmt, location: loc);
1651 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1652 return result;
1653}
1654
1655struct pow_synth_sqrt_info
1656{
1657 bool *factors;
1658 unsigned int deepest;
1659 unsigned int num_mults;
1660};
1661
1662/* Return true iff the real value C can be represented as a
1663 sum of powers of 0.5 up to N. That is:
1664 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1665 Record in INFO the various parameters of the synthesis algorithm such
1666 as the factors a[i], the maximum 0.5 power and the number of
1667 multiplications that will be required. */
1668
1669bool
1670representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1671 struct pow_synth_sqrt_info *info)
1672{
1673 REAL_VALUE_TYPE factor = dconsthalf;
1674 REAL_VALUE_TYPE remainder = c;
1675
1676 info->deepest = 0;
1677 info->num_mults = 0;
1678 memset (s: info->factors, c: 0, n: n * sizeof (bool));
1679
1680 for (unsigned i = 0; i < n; i++)
1681 {
1682 REAL_VALUE_TYPE res;
1683
1684 /* If something inexact happened bail out now. */
1685 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1686 return false;
1687
1688 /* We have hit zero. The number is representable as a sum
1689 of powers of 0.5. */
1690 if (real_equal (&res, &dconst0))
1691 {
1692 info->factors[i] = true;
1693 info->deepest = i + 1;
1694 return true;
1695 }
1696 else if (!REAL_VALUE_NEGATIVE (res))
1697 {
1698 remainder = res;
1699 info->factors[i] = true;
1700 info->num_mults++;
1701 }
1702 else
1703 info->factors[i] = false;
1704
1705 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1706 }
1707 return false;
1708}
1709
1710/* Return the tree corresponding to FN being applied
1711 to ARG N times at GSI and LOC.
1712 Look up previous results from CACHE if need be.
1713 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1714
1715static tree
1716get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1717 tree fn, location_t loc, tree *cache)
1718{
1719 tree res = cache[n];
1720 if (!res)
1721 {
1722 tree prev = get_fn_chain (arg, n: n - 1, gsi, fn, loc, cache);
1723 res = build_and_insert_call (gsi, loc, fn, arg: prev);
1724 cache[n] = res;
1725 }
1726
1727 return res;
1728}
1729
1730/* Print to STREAM the repeated application of function FNAME to ARG
1731 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1732 "foo (foo (x))". */
1733
1734static void
1735print_nested_fn (FILE* stream, const char *fname, const char* arg,
1736 unsigned int n)
1737{
1738 if (n == 0)
1739 fprintf (stream: stream, format: "%s", arg);
1740 else
1741 {
1742 fprintf (stream: stream, format: "%s (", fname);
1743 print_nested_fn (stream, fname, arg, n: n - 1);
1744 fprintf (stream: stream, format: ")");
1745 }
1746}
1747
1748/* Print to STREAM the fractional sequence of sqrt chains
1749 applied to ARG, described by INFO. Used for the dump file. */
1750
1751static void
1752dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1753 struct pow_synth_sqrt_info *info)
1754{
1755 for (unsigned int i = 0; i < info->deepest; i++)
1756 {
1757 bool is_set = info->factors[i];
1758 if (is_set)
1759 {
1760 print_nested_fn (stream, fname: "sqrt", arg, n: i + 1);
1761 if (i != info->deepest - 1)
1762 fprintf (stream: stream, format: " * ");
1763 }
1764 }
1765}
1766
1767/* Print to STREAM a representation of raising ARG to an integer
1768 power N. Used for the dump file. */
1769
1770static void
1771dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1772{
1773 if (n > 1)
1774 fprintf (stream: stream, format: "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1775 else if (n == 1)
1776 fprintf (stream: stream, format: "%s", arg);
1777}
1778
1779/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1780 square roots. Place at GSI and LOC. Limit the maximum depth
1781 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1782 result of the expanded sequence or NULL_TREE if the expansion failed.
1783
1784 This routine assumes that ARG1 is a real number with a fractional part
1785 (the integer exponent case will have been handled earlier in
1786 gimple_expand_builtin_pow).
1787
1788 For ARG1 > 0.0:
1789 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1790 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1791 FRAC_PART == ARG1 - WHOLE_PART:
1792 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1793 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1794 if it can be expressed as such, that is if FRAC_PART satisfies:
1795 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1796 where integer a[i] is either 0 or 1.
1797
1798 Example:
1799 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1800 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1801
1802 For ARG1 < 0.0 there are two approaches:
1803 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1804 is calculated as above.
1805
1806 Example:
1807 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1808 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1809
1810 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1811 FRAC_PART := ARG1 - WHOLE_PART
1812 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1813 Example:
1814 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1815 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1816
1817 For ARG1 < 0.0 we choose between (A) and (B) depending on
1818 how many multiplications we'd have to do.
1819 So, for the example in (B): POW (x, -5.875), if we were to
1820 follow algorithm (A) we would produce:
1821 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1822 which contains more multiplications than approach (B).
1823
1824 Hopefully, this approach will eliminate potentially expensive POW library
1825 calls when unsafe floating point math is enabled and allow the compiler to
1826 further optimise the multiplies, square roots and divides produced by this
1827 function. */
1828
1829static tree
1830expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1831 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1832{
1833 tree type = TREE_TYPE (arg0);
1834 machine_mode mode = TYPE_MODE (type);
1835 tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT);
1836 bool one_over = true;
1837
1838 if (!sqrtfn)
1839 return NULL_TREE;
1840
1841 if (TREE_CODE (arg1) != REAL_CST)
1842 return NULL_TREE;
1843
1844 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1845
1846 gcc_assert (max_depth > 0);
1847 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1848
1849 struct pow_synth_sqrt_info synth_info;
1850 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1851 synth_info.deepest = 0;
1852 synth_info.num_mults = 0;
1853
1854 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1855 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1856
1857 /* The whole and fractional parts of exp. */
1858 REAL_VALUE_TYPE whole_part;
1859 REAL_VALUE_TYPE frac_part;
1860
1861 real_floor (&whole_part, mode, &exp);
1862 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1863
1864
1865 REAL_VALUE_TYPE ceil_whole = dconst0;
1866 REAL_VALUE_TYPE ceil_fract = dconst0;
1867
1868 if (neg_exp)
1869 {
1870 real_ceil (&ceil_whole, mode, &exp);
1871 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1872 }
1873
1874 if (!representable_as_half_series_p (c: frac_part, n: max_depth, info: &synth_info))
1875 return NULL_TREE;
1876
1877 /* Check whether it's more profitable to not use 1.0 / ... */
1878 if (neg_exp)
1879 {
1880 struct pow_synth_sqrt_info alt_synth_info;
1881 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1882 alt_synth_info.deepest = 0;
1883 alt_synth_info.num_mults = 0;
1884
1885 if (representable_as_half_series_p (c: ceil_fract, n: max_depth,
1886 info: &alt_synth_info)
1887 && alt_synth_info.deepest <= synth_info.deepest
1888 && alt_synth_info.num_mults < synth_info.num_mults)
1889 {
1890 whole_part = ceil_whole;
1891 frac_part = ceil_fract;
1892 synth_info.deepest = alt_synth_info.deepest;
1893 synth_info.num_mults = alt_synth_info.num_mults;
1894 memcpy (dest: synth_info.factors, src: alt_synth_info.factors,
1895 n: (max_depth + 1) * sizeof (bool));
1896 one_over = false;
1897 }
1898 }
1899
1900 HOST_WIDE_INT n = real_to_integer (&whole_part);
1901 REAL_VALUE_TYPE cint;
1902 real_from_integer (&cint, VOIDmode, n, SIGNED);
1903
1904 if (!real_identical (&whole_part, &cint))
1905 return NULL_TREE;
1906
1907 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1908 return NULL_TREE;
1909
1910 memset (s: cache, c: 0, n: (max_depth + 1) * sizeof (tree));
1911
1912 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1913
1914 /* Calculate the integer part of the exponent. */
1915 if (n > 1)
1916 {
1917 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1918 if (!integer_res)
1919 return NULL_TREE;
1920 }
1921
1922 if (dump_file)
1923 {
1924 char string[64];
1925
1926 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1927 fprintf (stream: dump_file, format: "synthesizing pow (x, %s) as:\n", string);
1928
1929 if (neg_exp)
1930 {
1931 if (one_over)
1932 {
1933 fprintf (stream: dump_file, format: "1.0 / (");
1934 dump_integer_part (stream: dump_file, arg: "x", n);
1935 if (n > 0)
1936 fprintf (stream: dump_file, format: " * ");
1937 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1938 fprintf (stream: dump_file, format: ")");
1939 }
1940 else
1941 {
1942 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1943 fprintf (stream: dump_file, format: " / (");
1944 dump_integer_part (stream: dump_file, arg: "x", n);
1945 fprintf (stream: dump_file, format: ")");
1946 }
1947 }
1948 else
1949 {
1950 dump_fractional_sqrt_sequence (stream: dump_file, arg: "x", info: &synth_info);
1951 if (n > 0)
1952 fprintf (stream: dump_file, format: " * ");
1953 dump_integer_part (stream: dump_file, arg: "x", n);
1954 }
1955
1956 fprintf (stream: dump_file, format: "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1957 }
1958
1959
1960 tree fract_res = NULL_TREE;
1961 cache[0] = arg0;
1962
1963 /* Calculate the fractional part of the exponent. */
1964 for (unsigned i = 0; i < synth_info.deepest; i++)
1965 {
1966 if (synth_info.factors[i])
1967 {
1968 tree sqrt_chain = get_fn_chain (arg: arg0, n: i + 1, gsi, fn: sqrtfn, loc, cache);
1969
1970 if (!fract_res)
1971 fract_res = sqrt_chain;
1972
1973 else
1974 fract_res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
1975 arg0: fract_res, arg1: sqrt_chain);
1976 }
1977 }
1978
1979 tree res = NULL_TREE;
1980
1981 if (neg_exp)
1982 {
1983 if (one_over)
1984 {
1985 if (n > 0)
1986 res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
1987 arg0: fract_res, arg1: integer_res);
1988 else
1989 res = fract_res;
1990
1991 res = build_and_insert_binop (gsi, loc, name: "powrootrecip", code: RDIV_EXPR,
1992 arg0: build_real (type, dconst1), arg1: res);
1993 }
1994 else
1995 {
1996 res = build_and_insert_binop (gsi, loc, name: "powroot", code: RDIV_EXPR,
1997 arg0: fract_res, arg1: integer_res);
1998 }
1999 }
2000 else
2001 res = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
2002 arg0: fract_res, arg1: integer_res);
2003 return res;
2004}
2005
2006/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2007 with location info LOC. If possible, create an equivalent and
2008 less expensive sequence of statements prior to GSI, and return an
2009 expession holding the result. */
2010
2011static tree
2012gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2013 tree arg0, tree arg1)
2014{
2015 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2016 REAL_VALUE_TYPE c2, dconst3;
2017 HOST_WIDE_INT n;
2018 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2019 machine_mode mode;
2020 bool speed_p = optimize_bb_for_speed_p (gsi_bb (i: *gsi));
2021 bool hw_sqrt_exists, c_is_int, c2_is_int;
2022
2023 dconst1_4 = dconst1;
2024 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2025
2026 /* If the exponent isn't a constant, there's nothing of interest
2027 to be done. */
2028 if (TREE_CODE (arg1) != REAL_CST)
2029 return NULL_TREE;
2030
2031 /* Don't perform the operation if flag_signaling_nans is on
2032 and the operand is a signaling NaN. */
2033 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2034 && ((TREE_CODE (arg0) == REAL_CST
2035 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2036 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2037 return NULL_TREE;
2038
2039 /* If the exponent is equivalent to an integer, expand to an optimal
2040 multiplication sequence when profitable. */
2041 c = TREE_REAL_CST (arg1);
2042 n = real_to_integer (&c);
2043 real_from_integer (&cint, VOIDmode, n, SIGNED);
2044 c_is_int = real_identical (&c, &cint);
2045
2046 if (c_is_int
2047 && ((n >= -1 && n <= 2)
2048 || (flag_unsafe_math_optimizations
2049 && speed_p
2050 && powi_cost (n) <= POWI_MAX_MULTS)))
2051 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2052
2053 /* Attempt various optimizations using sqrt and cbrt. */
2054 type = TREE_TYPE (arg0);
2055 mode = TYPE_MODE (type);
2056 sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT);
2057
2058 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2059 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2060 sqrt(-0) = -0. */
2061 if (sqrtfn
2062 && real_equal (&c, &dconsthalf)
2063 && !HONOR_SIGNED_ZEROS (mode))
2064 return build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0);
2065
2066 hw_sqrt_exists = optab_handler (op: sqrt_optab, mode) != CODE_FOR_nothing;
2067
2068 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2069 optimizations since 1./3. is not exactly representable. If x
2070 is negative and finite, the correct value of pow(x,1./3.) is
2071 a NaN with the "invalid" exception raised, because the value
2072 of 1./3. actually has an even denominator. The correct value
2073 of cbrt(x) is a negative real value. */
2074 cbrtfn = mathfn_built_in (type, fn: BUILT_IN_CBRT);
2075 dconst1_3 = real_value_truncate (mode, dconst_third ());
2076
2077 if (flag_unsafe_math_optimizations
2078 && cbrtfn
2079 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2080 && real_equal (&c, &dconst1_3))
2081 return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0);
2082
2083 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2084 if we don't have a hardware sqrt insn. */
2085 dconst1_6 = dconst1_3;
2086 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2087
2088 if (flag_unsafe_math_optimizations
2089 && sqrtfn
2090 && cbrtfn
2091 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2092 && speed_p
2093 && hw_sqrt_exists
2094 && real_equal (&c, &dconst1_6))
2095 {
2096 /* sqrt(x) */
2097 sqrt_arg0 = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: arg0);
2098
2099 /* cbrt(sqrt(x)) */
2100 return build_and_insert_call (gsi, loc, fn: cbrtfn, arg: sqrt_arg0);
2101 }
2102
2103
2104 /* Attempt to expand the POW as a product of square root chains.
2105 Expand the 0.25 case even when otpimising for size. */
2106 if (flag_unsafe_math_optimizations
2107 && sqrtfn
2108 && hw_sqrt_exists
2109 && (speed_p || real_equal (&c, &dconst1_4))
2110 && !HONOR_SIGNED_ZEROS (mode))
2111 {
2112 unsigned int max_depth = speed_p
2113 ? param_max_pow_sqrt_depth
2114 : 2;
2115
2116 tree expand_with_sqrts
2117 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2118
2119 if (expand_with_sqrts)
2120 return expand_with_sqrts;
2121 }
2122
2123 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2124 n = real_to_integer (&c2);
2125 real_from_integer (&cint, VOIDmode, n, SIGNED);
2126 c2_is_int = real_identical (&c2, &cint);
2127
2128 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2129
2130 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2131 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2132
2133 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2134 different from pow(x, 1./3.) due to rounding and behavior with
2135 negative x, we need to constrain this transformation to unsafe
2136 math and positive x or finite math. */
2137 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2138 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2139 real_round (&c2, mode, &c2);
2140 n = real_to_integer (&c2);
2141 real_from_integer (&cint, VOIDmode, n, SIGNED);
2142 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2143 real_convert (&c2, mode, &c2);
2144
2145 if (flag_unsafe_math_optimizations
2146 && cbrtfn
2147 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2148 && real_identical (&c2, &c)
2149 && !c2_is_int
2150 && optimize_function_for_speed_p (cfun)
2151 && powi_cost (n: n / 3) <= POWI_MAX_MULTS)
2152 {
2153 tree powi_x_ndiv3 = NULL_TREE;
2154
2155 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2156 possible or profitable, give up. Skip the degenerate case when
2157 abs(n) < 3, where the result is always 1. */
2158 if (absu_hwi (x: n) >= 3)
2159 {
2160 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2161 n: abs_hwi (x: n / 3));
2162 if (!powi_x_ndiv3)
2163 return NULL_TREE;
2164 }
2165
2166 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2167 as that creates an unnecessary variable. Instead, just produce
2168 either cbrt(x) or cbrt(x) * cbrt(x). */
2169 cbrt_x = build_and_insert_call (gsi, loc, fn: cbrtfn, arg: arg0);
2170
2171 if (absu_hwi (x: n) % 3 == 1)
2172 powi_cbrt_x = cbrt_x;
2173 else
2174 powi_cbrt_x = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
2175 arg0: cbrt_x, arg1: cbrt_x);
2176
2177 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2178 if (absu_hwi (x: n) < 3)
2179 result = powi_cbrt_x;
2180 else
2181 result = build_and_insert_binop (gsi, loc, name: "powroot", code: MULT_EXPR,
2182 arg0: powi_x_ndiv3, arg1: powi_cbrt_x);
2183
2184 /* If n is negative, reciprocate the result. */
2185 if (n < 0)
2186 result = build_and_insert_binop (gsi, loc, name: "powroot", code: RDIV_EXPR,
2187 arg0: build_real (type, dconst1), arg1: result);
2188
2189 return result;
2190 }
2191
2192 /* No optimizations succeeded. */
2193 return NULL_TREE;
2194}
2195
2196/* ARG is the argument to a cabs builtin call in GSI with location info
2197 LOC. Create a sequence of statements prior to GSI that calculates
2198 sqrt(R*R + I*I), where R and I are the real and imaginary components
2199 of ARG, respectively. Return an expression holding the result. */
2200
2201static tree
2202gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2203{
2204 tree real_part, imag_part, addend1, addend2, sum, result;
2205 tree type = TREE_TYPE (TREE_TYPE (arg));
2206 tree sqrtfn = mathfn_built_in (type, fn: BUILT_IN_SQRT);
2207 machine_mode mode = TYPE_MODE (type);
2208
2209 if (!flag_unsafe_math_optimizations
2210 || !optimize_bb_for_speed_p (gimple_bb (g: gsi_stmt (i: *gsi)))
2211 || !sqrtfn
2212 || optab_handler (op: sqrt_optab, mode) == CODE_FOR_nothing)
2213 return NULL_TREE;
2214
2215 real_part = build_and_insert_ref (gsi, loc, type, name: "cabs",
2216 code: REALPART_EXPR, arg0: arg);
2217 addend1 = build_and_insert_binop (gsi, loc, name: "cabs", code: MULT_EXPR,
2218 arg0: real_part, arg1: real_part);
2219 imag_part = build_and_insert_ref (gsi, loc, type, name: "cabs",
2220 code: IMAGPART_EXPR, arg0: arg);
2221 addend2 = build_and_insert_binop (gsi, loc, name: "cabs", code: MULT_EXPR,
2222 arg0: imag_part, arg1: imag_part);
2223 sum = build_and_insert_binop (gsi, loc, name: "cabs", code: PLUS_EXPR, arg0: addend1, arg1: addend2);
2224 result = build_and_insert_call (gsi, loc, fn: sqrtfn, arg: sum);
2225
2226 return result;
2227}
2228
2229/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2230 on the SSA_NAME argument of each of them. */
2231
2232namespace {
2233
2234const pass_data pass_data_cse_sincos =
2235{
2236 .type: GIMPLE_PASS, /* type */
2237 .name: "sincos", /* name */
2238 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
2239 .tv_id: TV_TREE_SINCOS, /* tv_id */
2240 PROP_ssa, /* properties_required */
2241 .properties_provided: 0, /* properties_provided */
2242 .properties_destroyed: 0, /* properties_destroyed */
2243 .todo_flags_start: 0, /* todo_flags_start */
2244 TODO_update_ssa, /* todo_flags_finish */
2245};
2246
2247class pass_cse_sincos : public gimple_opt_pass
2248{
2249public:
2250 pass_cse_sincos (gcc::context *ctxt)
2251 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2252 {}
2253
2254 /* opt_pass methods: */
2255 bool gate (function *) final override
2256 {
2257 return optimize;
2258 }
2259
2260 unsigned int execute (function *) final override;
2261
2262}; // class pass_cse_sincos
2263
2264unsigned int
2265pass_cse_sincos::execute (function *fun)
2266{
2267 basic_block bb;
2268 bool cfg_changed = false;
2269
2270 calculate_dominance_info (CDI_DOMINATORS);
2271 memset (s: &sincos_stats, c: 0, n: sizeof (sincos_stats));
2272
2273 FOR_EACH_BB_FN (bb, fun)
2274 {
2275 gimple_stmt_iterator gsi;
2276
2277 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2278 {
2279 gimple *stmt = gsi_stmt (i: gsi);
2280
2281 if (is_gimple_call (gs: stmt)
2282 && gimple_call_lhs (gs: stmt))
2283 {
2284 tree arg;
2285 switch (gimple_call_combined_fn (stmt))
2286 {
2287 CASE_CFN_COS:
2288 CASE_CFN_SIN:
2289 CASE_CFN_CEXPI:
2290 arg = gimple_call_arg (gs: stmt, index: 0);
2291 /* Make sure we have either sincos or cexp. */
2292 if (!targetm.libc_has_function (function_c99_math_complex,
2293 TREE_TYPE (arg))
2294 && !targetm.libc_has_function (function_sincos,
2295 TREE_TYPE (arg)))
2296 break;
2297
2298 if (TREE_CODE (arg) == SSA_NAME)
2299 cfg_changed |= execute_cse_sincos_1 (name: arg);
2300 break;
2301 default:
2302 break;
2303 }
2304 }
2305 }
2306 }
2307
2308 statistics_counter_event (fun, "sincos statements inserted",
2309 sincos_stats.inserted);
2310 statistics_counter_event (fun, "conv statements removed",
2311 sincos_stats.conv_removed);
2312
2313 return cfg_changed ? TODO_cleanup_cfg : 0;
2314}
2315
2316} // anon namespace
2317
2318gimple_opt_pass *
2319make_pass_cse_sincos (gcc::context *ctxt)
2320{
2321 return new pass_cse_sincos (ctxt);
2322}
2323
2324/* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2325 Also expand CABS. */
2326namespace {
2327
2328const pass_data pass_data_expand_powcabs =
2329{
2330 .type: GIMPLE_PASS, /* type */
2331 .name: "powcabs", /* name */
2332 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
2333 .tv_id: TV_TREE_POWCABS, /* tv_id */
2334 PROP_ssa, /* properties_required */
2335 PROP_gimple_opt_math, /* properties_provided */
2336 .properties_destroyed: 0, /* properties_destroyed */
2337 .todo_flags_start: 0, /* todo_flags_start */
2338 TODO_update_ssa, /* todo_flags_finish */
2339};
2340
2341class pass_expand_powcabs : public gimple_opt_pass
2342{
2343public:
2344 pass_expand_powcabs (gcc::context *ctxt)
2345 : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2346 {}
2347
2348 /* opt_pass methods: */
2349 bool gate (function *) final override
2350 {
2351 return optimize;
2352 }
2353
2354 unsigned int execute (function *) final override;
2355
2356}; // class pass_expand_powcabs
2357
2358unsigned int
2359pass_expand_powcabs::execute (function *fun)
2360{
2361 basic_block bb;
2362 bool cfg_changed = false;
2363
2364 calculate_dominance_info (CDI_DOMINATORS);
2365
2366 FOR_EACH_BB_FN (bb, fun)
2367 {
2368 gimple_stmt_iterator gsi;
2369 bool cleanup_eh = false;
2370
2371 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2372 {
2373 gimple *stmt = gsi_stmt (i: gsi);
2374
2375 /* Only the last stmt in a bb could throw, no need to call
2376 gimple_purge_dead_eh_edges if we change something in the middle
2377 of a basic block. */
2378 cleanup_eh = false;
2379
2380 if (is_gimple_call (gs: stmt)
2381 && gimple_call_lhs (gs: stmt))
2382 {
2383 tree arg0, arg1, result;
2384 HOST_WIDE_INT n;
2385 location_t loc;
2386
2387 switch (gimple_call_combined_fn (stmt))
2388 {
2389 CASE_CFN_POW:
2390 arg0 = gimple_call_arg (gs: stmt, index: 0);
2391 arg1 = gimple_call_arg (gs: stmt, index: 1);
2392
2393 loc = gimple_location (g: stmt);
2394 result = gimple_expand_builtin_pow (gsi: &gsi, loc, arg0, arg1);
2395
2396 if (result)
2397 {
2398 tree lhs = gimple_get_lhs (stmt);
2399 gassign *new_stmt = gimple_build_assign (lhs, result);
2400 gimple_set_location (g: new_stmt, location: loc);
2401 unlink_stmt_vdef (stmt);
2402 gsi_replace (&gsi, new_stmt, true);
2403 cleanup_eh = true;
2404 if (gimple_vdef (g: stmt))
2405 release_ssa_name (name: gimple_vdef (g: stmt));
2406 }
2407 break;
2408
2409 CASE_CFN_POWI:
2410 arg0 = gimple_call_arg (gs: stmt, index: 0);
2411 arg1 = gimple_call_arg (gs: stmt, index: 1);
2412 loc = gimple_location (g: stmt);
2413
2414 if (real_minus_onep (arg0))
2415 {
2416 tree t0, t1, cond, one, minus_one;
2417 gassign *stmt;
2418
2419 t0 = TREE_TYPE (arg0);
2420 t1 = TREE_TYPE (arg1);
2421 one = build_real (t0, dconst1);
2422 minus_one = build_real (t0, dconstm1);
2423
2424 cond = make_temp_ssa_name (type: t1, NULL, name: "powi_cond");
2425 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2426 arg1, build_int_cst (t1, 1));
2427 gimple_set_location (g: stmt, location: loc);
2428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2429
2430 result = make_temp_ssa_name (type: t0, NULL, name: "powi");
2431 stmt = gimple_build_assign (result, COND_EXPR, cond,
2432 minus_one, one);
2433 gimple_set_location (g: stmt, location: loc);
2434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2435 }
2436 else
2437 {
2438 if (!tree_fits_shwi_p (arg1))
2439 break;
2440
2441 n = tree_to_shwi (arg1);
2442 result = gimple_expand_builtin_powi (gsi: &gsi, loc, arg0, n);
2443 }
2444
2445 if (result)
2446 {
2447 tree lhs = gimple_get_lhs (stmt);
2448 gassign *new_stmt = gimple_build_assign (lhs, result);
2449 gimple_set_location (g: new_stmt, location: loc);
2450 unlink_stmt_vdef (stmt);
2451 gsi_replace (&gsi, new_stmt, true);
2452 cleanup_eh = true;
2453 if (gimple_vdef (g: stmt))
2454 release_ssa_name (name: gimple_vdef (g: stmt));
2455 }
2456 break;
2457
2458 CASE_CFN_CABS:
2459 arg0 = gimple_call_arg (gs: stmt, index: 0);
2460 loc = gimple_location (g: stmt);
2461 result = gimple_expand_builtin_cabs (gsi: &gsi, loc, arg: arg0);
2462
2463 if (result)
2464 {
2465 tree lhs = gimple_get_lhs (stmt);
2466 gassign *new_stmt = gimple_build_assign (lhs, result);
2467 gimple_set_location (g: new_stmt, location: loc);
2468 unlink_stmt_vdef (stmt);
2469 gsi_replace (&gsi, new_stmt, true);
2470 cleanup_eh = true;
2471 if (gimple_vdef (g: stmt))
2472 release_ssa_name (name: gimple_vdef (g: stmt));
2473 }
2474 break;
2475
2476 default:;
2477 }
2478 }
2479 }
2480 if (cleanup_eh)
2481 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2482 }
2483
2484 return cfg_changed ? TODO_cleanup_cfg : 0;
2485}
2486
2487} // anon namespace
2488
2489gimple_opt_pass *
2490make_pass_expand_powcabs (gcc::context *ctxt)
2491{
2492 return new pass_expand_powcabs (ctxt);
2493}
2494
2495/* Return true if stmt is a type conversion operation that can be stripped
2496 when used in a widening multiply operation. */
2497static bool
2498widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2499{
2500 enum tree_code rhs_code = gimple_assign_rhs_code (gs: stmt);
2501
2502 if (TREE_CODE (result_type) == INTEGER_TYPE)
2503 {
2504 tree op_type;
2505 tree inner_op_type;
2506
2507 if (!CONVERT_EXPR_CODE_P (rhs_code))
2508 return false;
2509
2510 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2511
2512 /* If the type of OP has the same precision as the result, then
2513 we can strip this conversion. The multiply operation will be
2514 selected to create the correct extension as a by-product. */
2515 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2516 return true;
2517
2518 /* We can also strip a conversion if it preserves the signed-ness of
2519 the operation and doesn't narrow the range. */
2520 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2521
2522 /* If the inner-most type is unsigned, then we can strip any
2523 intermediate widening operation. If it's signed, then the
2524 intermediate widening operation must also be signed. */
2525 if ((TYPE_UNSIGNED (inner_op_type)
2526 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2527 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2528 return true;
2529
2530 return false;
2531 }
2532
2533 return rhs_code == FIXED_CONVERT_EXPR;
2534}
2535
2536/* Return true if RHS is a suitable operand for a widening multiplication,
2537 assuming a target type of TYPE.
2538 There are two cases:
2539
2540 - RHS makes some value at least twice as wide. Store that value
2541 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2542
2543 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2544 but leave *TYPE_OUT untouched. */
2545
2546static bool
2547is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2548 tree *new_rhs_out)
2549{
2550 gimple *stmt;
2551 tree type1, rhs1;
2552
2553 if (TREE_CODE (rhs) == SSA_NAME)
2554 {
2555 /* Use tree_non_zero_bits to see if this operand is zero_extended
2556 for unsigned widening multiplications or non-negative for
2557 signed widening multiplications. */
2558 if (TREE_CODE (type) == INTEGER_TYPE
2559 && (TYPE_PRECISION (type) & 1) == 0
2560 && int_mode_for_size (TYPE_PRECISION (type) / 2, limit: 1).exists ())
2561 {
2562 unsigned int prec = TYPE_PRECISION (type);
2563 unsigned int hprec = prec / 2;
2564 wide_int bits = wide_int::from (x: tree_nonzero_bits (rhs), precision: prec,
2565 TYPE_SIGN (TREE_TYPE (rhs)));
2566 if (TYPE_UNSIGNED (type)
2567 && wi::bit_and (x: bits, y: wi::mask (width: hprec, negate_p: true, precision: prec)) == 0)
2568 {
2569 *type_out = build_nonstandard_integer_type (hprec, true);
2570 /* X & MODE_MASK can be simplified to (T)X. */
2571 stmt = SSA_NAME_DEF_STMT (rhs);
2572 if (is_gimple_assign (gs: stmt)
2573 && gimple_assign_rhs_code (gs: stmt) == BIT_AND_EXPR
2574 && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
2575 && wide_int::from (x: wi::to_wide (t: gimple_assign_rhs2 (gs: stmt)),
2576 precision: prec, TYPE_SIGN (TREE_TYPE (rhs)))
2577 == wi::mask (width: hprec, negate_p: false, precision: prec))
2578 *new_rhs_out = gimple_assign_rhs1 (gs: stmt);
2579 else
2580 *new_rhs_out = rhs;
2581 return true;
2582 }
2583 else if (!TYPE_UNSIGNED (type)
2584 && wi::bit_and (x: bits, y: wi::mask (width: hprec - 1, negate_p: true, precision: prec)) == 0)
2585 {
2586 *type_out = build_nonstandard_integer_type (hprec, false);
2587 *new_rhs_out = rhs;
2588 return true;
2589 }
2590 }
2591
2592 stmt = SSA_NAME_DEF_STMT (rhs);
2593 if (is_gimple_assign (gs: stmt))
2594 {
2595
2596 if (widening_mult_conversion_strippable_p (result_type: type, stmt))
2597 {
2598 rhs1 = gimple_assign_rhs1 (gs: stmt);
2599
2600 if (TREE_CODE (rhs1) == INTEGER_CST)
2601 {
2602 *new_rhs_out = rhs1;
2603 *type_out = NULL;
2604 return true;
2605 }
2606 }
2607 else
2608 rhs1 = rhs;
2609 }
2610 else
2611 rhs1 = rhs;
2612
2613 type1 = TREE_TYPE (rhs1);
2614
2615 if (TREE_CODE (type1) != TREE_CODE (type)
2616 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2617 return false;
2618
2619 *new_rhs_out = rhs1;
2620 *type_out = type1;
2621 return true;
2622 }
2623
2624 if (TREE_CODE (rhs) == INTEGER_CST)
2625 {
2626 *new_rhs_out = rhs;
2627 *type_out = NULL;
2628 return true;
2629 }
2630
2631 return false;
2632}
2633
2634/* Return true if STMT performs a widening multiplication, assuming the
2635 output type is TYPE. If so, store the unwidened types of the operands
2636 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2637 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2638 and *TYPE2_OUT would give the operands of the multiplication. */
2639
2640static bool
2641is_widening_mult_p (gimple *stmt,
2642 tree *type1_out, tree *rhs1_out,
2643 tree *type2_out, tree *rhs2_out)
2644{
2645 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2646
2647 if (TREE_CODE (type) == INTEGER_TYPE)
2648 {
2649 if (TYPE_OVERFLOW_TRAPS (type))
2650 return false;
2651 }
2652 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2653 return false;
2654
2655 if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs1 (gs: stmt), type_out: type1_out,
2656 new_rhs_out: rhs1_out))
2657 return false;
2658
2659 if (!is_widening_mult_rhs_p (type, rhs: gimple_assign_rhs2 (gs: stmt), type_out: type2_out,
2660 new_rhs_out: rhs2_out))
2661 return false;
2662
2663 if (*type1_out == NULL)
2664 {
2665 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2666 return false;
2667 *type1_out = *type2_out;
2668 }
2669
2670 if (*type2_out == NULL)
2671 {
2672 if (!int_fits_type_p (*rhs2_out, *type1_out))
2673 return false;
2674 *type2_out = *type1_out;
2675 }
2676
2677 /* Ensure that the larger of the two operands comes first. */
2678 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2679 {
2680 std::swap (a&: *type1_out, b&: *type2_out);
2681 std::swap (a&: *rhs1_out, b&: *rhs2_out);
2682 }
2683
2684 return true;
2685}
2686
2687/* Check to see if the CALL statement is an invocation of copysign
2688 with 1. being the first argument. */
2689static bool
2690is_copysign_call_with_1 (gimple *call)
2691{
2692 gcall *c = dyn_cast <gcall *> (p: call);
2693 if (! c)
2694 return false;
2695
2696 enum combined_fn code = gimple_call_combined_fn (c);
2697
2698 if (code == CFN_LAST)
2699 return false;
2700
2701 if (builtin_fn_p (code))
2702 {
2703 switch (as_builtin_fn (code))
2704 {
2705 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2706 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2707 return real_onep (gimple_call_arg (gs: c, index: 0));
2708 default:
2709 return false;
2710 }
2711 }
2712
2713 if (internal_fn_p (code))
2714 {
2715 switch (as_internal_fn (code))
2716 {
2717 case IFN_COPYSIGN:
2718 return real_onep (gimple_call_arg (gs: c, index: 0));
2719 default:
2720 return false;
2721 }
2722 }
2723
2724 return false;
2725}
2726
2727/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2728 This only happens when the xorsign optab is defined, if the
2729 pattern is not a xorsign pattern or if expansion fails FALSE is
2730 returned, otherwise TRUE is returned. */
2731static bool
2732convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2733{
2734 tree treeop0, treeop1, lhs, type;
2735 location_t loc = gimple_location (g: stmt);
2736 lhs = gimple_assign_lhs (gs: stmt);
2737 treeop0 = gimple_assign_rhs1 (gs: stmt);
2738 treeop1 = gimple_assign_rhs2 (gs: stmt);
2739 type = TREE_TYPE (lhs);
2740 machine_mode mode = TYPE_MODE (type);
2741
2742 if (HONOR_SNANS (type))
2743 return false;
2744
2745 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2746 {
2747 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2748 if (!has_single_use (var: treeop0) || !is_copysign_call_with_1 (call: call0))
2749 {
2750 call0 = SSA_NAME_DEF_STMT (treeop1);
2751 if (!has_single_use (var: treeop1) || !is_copysign_call_with_1 (call: call0))
2752 return false;
2753
2754 treeop1 = treeop0;
2755 }
2756 if (optab_handler (op: xorsign_optab, mode) == CODE_FOR_nothing)
2757 return false;
2758
2759 gcall *c = as_a<gcall*> (p: call0);
2760 treeop0 = gimple_call_arg (gs: c, index: 1);
2761
2762 gcall *call_stmt
2763 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2764 gimple_set_lhs (call_stmt, lhs);
2765 gimple_set_location (g: call_stmt, location: loc);
2766 gsi_replace (gsi, call_stmt, true);
2767 return true;
2768 }
2769
2770 return false;
2771}
2772
2773/* Process a single gimple statement STMT, which has a MULT_EXPR as
2774 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2775 value is true iff we converted the statement. */
2776
2777static bool
2778convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2779{
2780 tree lhs, rhs1, rhs2, type, type1, type2;
2781 enum insn_code handler;
2782 scalar_int_mode to_mode, from_mode, actual_mode;
2783 optab op;
2784 int actual_precision;
2785 location_t loc = gimple_location (g: stmt);
2786 bool from_unsigned1, from_unsigned2;
2787
2788 lhs = gimple_assign_lhs (gs: stmt);
2789 type = TREE_TYPE (lhs);
2790 if (TREE_CODE (type) != INTEGER_TYPE)
2791 return false;
2792
2793 if (!is_widening_mult_p (stmt, type1_out: &type1, rhs1_out: &rhs1, type2_out: &type2, rhs2_out: &rhs2))
2794 return false;
2795
2796 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2797 avoid the tranform. */
2798 if ((TREE_CODE (rhs1) == SSA_NAME
2799 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1))
2800 || (TREE_CODE (rhs2) == SSA_NAME
2801 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2)))
2802 return false;
2803
2804 to_mode = SCALAR_INT_TYPE_MODE (type);
2805 from_mode = SCALAR_INT_TYPE_MODE (type1);
2806 if (to_mode == from_mode)
2807 return false;
2808
2809 from_unsigned1 = TYPE_UNSIGNED (type1);
2810 from_unsigned2 = TYPE_UNSIGNED (type2);
2811
2812 if (from_unsigned1 && from_unsigned2)
2813 op = umul_widen_optab;
2814 else if (!from_unsigned1 && !from_unsigned2)
2815 op = smul_widen_optab;
2816 else
2817 op = usmul_widen_optab;
2818
2819 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2820 found_mode: &actual_mode);
2821
2822 if (handler == CODE_FOR_nothing)
2823 {
2824 if (op != smul_widen_optab)
2825 {
2826 /* We can use a signed multiply with unsigned types as long as
2827 there is a wider mode to use, or it is the smaller of the two
2828 types that is unsigned. Note that type1 >= type2, always. */
2829 if ((TYPE_UNSIGNED (type1)
2830 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode))
2831 || (TYPE_UNSIGNED (type2)
2832 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode)))
2833 {
2834 if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode)
2835 || GET_MODE_SIZE (mode: to_mode) <= GET_MODE_SIZE (mode: from_mode))
2836 return false;
2837 }
2838
2839 op = smul_widen_optab;
2840 handler = find_widening_optab_handler_and_mode (op, to_mode,
2841 from_mode,
2842 found_mode: &actual_mode);
2843
2844 if (handler == CODE_FOR_nothing)
2845 return false;
2846
2847 from_unsigned1 = from_unsigned2 = false;
2848 }
2849 else
2850 {
2851 /* Expand can synthesize smul_widen_optab if the target
2852 supports umul_widen_optab. */
2853 op = umul_widen_optab;
2854 handler = find_widening_optab_handler_and_mode (op, to_mode,
2855 from_mode,
2856 found_mode: &actual_mode);
2857 if (handler == CODE_FOR_nothing)
2858 return false;
2859 }
2860 }
2861
2862 /* Ensure that the inputs to the handler are in the correct precison
2863 for the opcode. This will be the full mode size. */
2864 actual_precision = GET_MODE_PRECISION (mode: actual_mode);
2865 if (2 * actual_precision > TYPE_PRECISION (type))
2866 return false;
2867 if (actual_precision != TYPE_PRECISION (type1)
2868 || from_unsigned1 != TYPE_UNSIGNED (type1))
2869 type1 = build_nonstandard_integer_type (actual_precision, from_unsigned1);
2870 if (!useless_type_conversion_p (type1, TREE_TYPE (rhs1)))
2871 {
2872 if (TREE_CODE (rhs1) == INTEGER_CST)
2873 rhs1 = fold_convert (type1, rhs1);
2874 else
2875 rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: rhs1);
2876 }
2877 if (actual_precision != TYPE_PRECISION (type2)
2878 || from_unsigned2 != TYPE_UNSIGNED (type2))
2879 type2 = build_nonstandard_integer_type (actual_precision, from_unsigned2);
2880 if (!useless_type_conversion_p (type2, TREE_TYPE (rhs2)))
2881 {
2882 if (TREE_CODE (rhs2) == INTEGER_CST)
2883 rhs2 = fold_convert (type2, rhs2);
2884 else
2885 rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: rhs2);
2886 }
2887
2888 gimple_assign_set_rhs1 (gs: stmt, rhs: rhs1);
2889 gimple_assign_set_rhs2 (gs: stmt, rhs: rhs2);
2890 gimple_assign_set_rhs_code (s: stmt, code: WIDEN_MULT_EXPR);
2891 update_stmt (s: stmt);
2892 widen_mul_stats.widen_mults_inserted++;
2893 return true;
2894}
2895
2896/* Process a single gimple statement STMT, which is found at the
2897 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2898 rhs (given by CODE), and try to convert it into a
2899 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2900 is true iff we converted the statement. */
2901
2902static bool
2903convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2904 enum tree_code code)
2905{
2906 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2907 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2908 tree type, type1, type2, optype;
2909 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2910 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2911 optab this_optab;
2912 enum tree_code wmult_code;
2913 enum insn_code handler;
2914 scalar_mode to_mode, from_mode, actual_mode;
2915 location_t loc = gimple_location (g: stmt);
2916 int actual_precision;
2917 bool from_unsigned1, from_unsigned2;
2918
2919 lhs = gimple_assign_lhs (gs: stmt);
2920 type = TREE_TYPE (lhs);
2921 if ((TREE_CODE (type) != INTEGER_TYPE
2922 && TREE_CODE (type) != FIXED_POINT_TYPE)
2923 || !type_has_mode_precision_p (t: type))
2924 return false;
2925
2926 if (code == MINUS_EXPR)
2927 wmult_code = WIDEN_MULT_MINUS_EXPR;
2928 else
2929 wmult_code = WIDEN_MULT_PLUS_EXPR;
2930
2931 rhs1 = gimple_assign_rhs1 (gs: stmt);
2932 rhs2 = gimple_assign_rhs2 (gs: stmt);
2933
2934 if (TREE_CODE (rhs1) == SSA_NAME)
2935 {
2936 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2937 if (is_gimple_assign (gs: rhs1_stmt))
2938 rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt);
2939 }
2940
2941 if (TREE_CODE (rhs2) == SSA_NAME)
2942 {
2943 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2944 if (is_gimple_assign (gs: rhs2_stmt))
2945 rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt);
2946 }
2947
2948 /* Allow for one conversion statement between the multiply
2949 and addition/subtraction statement. If there are more than
2950 one conversions then we assume they would invalidate this
2951 transformation. If that's not the case then they should have
2952 been folded before now. */
2953 if (CONVERT_EXPR_CODE_P (rhs1_code))
2954 {
2955 conv1_stmt = rhs1_stmt;
2956 rhs1 = gimple_assign_rhs1 (gs: rhs1_stmt);
2957 if (TREE_CODE (rhs1) == SSA_NAME)
2958 {
2959 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2960 if (is_gimple_assign (gs: rhs1_stmt))
2961 rhs1_code = gimple_assign_rhs_code (gs: rhs1_stmt);
2962 }
2963 else
2964 return false;
2965 }
2966 if (CONVERT_EXPR_CODE_P (rhs2_code))
2967 {
2968 conv2_stmt = rhs2_stmt;
2969 rhs2 = gimple_assign_rhs1 (gs: rhs2_stmt);
2970 if (TREE_CODE (rhs2) == SSA_NAME)
2971 {
2972 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2973 if (is_gimple_assign (gs: rhs2_stmt))
2974 rhs2_code = gimple_assign_rhs_code (gs: rhs2_stmt);
2975 }
2976 else
2977 return false;
2978 }
2979
2980 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2981 is_widening_mult_p, but we still need the rhs returns.
2982
2983 It might also appear that it would be sufficient to use the existing
2984 operands of the widening multiply, but that would limit the choice of
2985 multiply-and-accumulate instructions.
2986
2987 If the widened-multiplication result has more than one uses, it is
2988 probably wiser not to do the conversion. Also restrict this operation
2989 to single basic block to avoid moving the multiply to a different block
2990 with a higher execution frequency. */
2991 if (code == PLUS_EXPR
2992 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2993 {
2994 if (!has_single_use (var: rhs1)
2995 || gimple_bb (g: rhs1_stmt) != gimple_bb (g: stmt)
2996 || !is_widening_mult_p (stmt: rhs1_stmt, type1_out: &type1, rhs1_out: &mult_rhs1,
2997 type2_out: &type2, rhs2_out: &mult_rhs2))
2998 return false;
2999 add_rhs = rhs2;
3000 conv_stmt = conv1_stmt;
3001 }
3002 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3003 {
3004 if (!has_single_use (var: rhs2)
3005 || gimple_bb (g: rhs2_stmt) != gimple_bb (g: stmt)
3006 || !is_widening_mult_p (stmt: rhs2_stmt, type1_out: &type1, rhs1_out: &mult_rhs1,
3007 type2_out: &type2, rhs2_out: &mult_rhs2))
3008 return false;
3009 add_rhs = rhs1;
3010 conv_stmt = conv2_stmt;
3011 }
3012 else
3013 return false;
3014
3015 to_mode = SCALAR_TYPE_MODE (type);
3016 from_mode = SCALAR_TYPE_MODE (type1);
3017 if (to_mode == from_mode)
3018 return false;
3019
3020 from_unsigned1 = TYPE_UNSIGNED (type1);
3021 from_unsigned2 = TYPE_UNSIGNED (type2);
3022 optype = type1;
3023
3024 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3025 if (from_unsigned1 != from_unsigned2)
3026 {
3027 if (!INTEGRAL_TYPE_P (type))
3028 return false;
3029 /* We can use a signed multiply with unsigned types as long as
3030 there is a wider mode to use, or it is the smaller of the two
3031 types that is unsigned. Note that type1 >= type2, always. */
3032 if ((from_unsigned1
3033 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (mode: from_mode))
3034 || (from_unsigned2
3035 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (mode: from_mode)))
3036 {
3037 if (!GET_MODE_WIDER_MODE (m: from_mode).exists (mode: &from_mode)
3038 || GET_MODE_SIZE (mode: from_mode) >= GET_MODE_SIZE (mode: to_mode))
3039 return false;
3040 }
3041
3042 from_unsigned1 = from_unsigned2 = false;
3043 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (mode: from_mode),
3044 false);
3045 }
3046
3047 /* If there was a conversion between the multiply and addition
3048 then we need to make sure it fits a multiply-and-accumulate.
3049 The should be a single mode change which does not change the
3050 value. */
3051 if (conv_stmt)
3052 {
3053 /* We use the original, unmodified data types for this. */
3054 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3055 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3056 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3057 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3058
3059 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3060 {
3061 /* Conversion is a truncate. */
3062 if (TYPE_PRECISION (to_type) < data_size)
3063 return false;
3064 }
3065 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3066 {
3067 /* Conversion is an extend. Check it's the right sort. */
3068 if (TYPE_UNSIGNED (from_type) != is_unsigned
3069 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3070 return false;
3071 }
3072 /* else convert is a no-op for our purposes. */
3073 }
3074
3075 /* Verify that the machine can perform a widening multiply
3076 accumulate in this mode/signedness combination, otherwise
3077 this transformation is likely to pessimize code. */
3078 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3079 handler = find_widening_optab_handler_and_mode (op: this_optab, to_mode,
3080 from_mode, found_mode: &actual_mode);
3081
3082 if (handler == CODE_FOR_nothing)
3083 return false;
3084
3085 /* Ensure that the inputs to the handler are in the correct precison
3086 for the opcode. This will be the full mode size. */
3087 actual_precision = GET_MODE_PRECISION (mode: actual_mode);
3088 if (actual_precision != TYPE_PRECISION (type1)
3089 || from_unsigned1 != TYPE_UNSIGNED (type1))
3090 type1 = build_nonstandard_integer_type (actual_precision, from_unsigned1);
3091 if (!useless_type_conversion_p (type1, TREE_TYPE (mult_rhs1)))
3092 {
3093 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3094 mult_rhs1 = fold_convert (type1, mult_rhs1);
3095 else
3096 mult_rhs1 = build_and_insert_cast (gsi, loc, type: type1, val: mult_rhs1);
3097 }
3098 if (actual_precision != TYPE_PRECISION (type2)
3099 || from_unsigned2 != TYPE_UNSIGNED (type2))
3100 type2 = build_nonstandard_integer_type (actual_precision, from_unsigned2);
3101 if (!useless_type_conversion_p (type2, TREE_TYPE (mult_rhs2)))
3102 {
3103 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3104 mult_rhs2 = fold_convert (type2, mult_rhs2);
3105 else
3106 mult_rhs2 = build_and_insert_cast (gsi, loc, type: type2, val: mult_rhs2);
3107 }
3108
3109 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3110 add_rhs = build_and_insert_cast (gsi, loc, type, val: add_rhs);
3111
3112 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3113 add_rhs);
3114 update_stmt (s: gsi_stmt (i: *gsi));
3115 widen_mul_stats.maccs_inserted++;
3116 return true;
3117}
3118
3119/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3120 OP2 and which we know is used in statements that can be, together with the
3121 multiplication, converted to FMAs, perform the transformation. */
3122
3123static void
3124convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3125{
3126 tree type = TREE_TYPE (mul_result);
3127 gimple *use_stmt;
3128 imm_use_iterator imm_iter;
3129 gcall *fma_stmt;
3130
3131 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3132 {
3133 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3134 tree addop, mulop1 = op1, result = mul_result;
3135 bool negate_p = false;
3136 gimple_seq seq = NULL;
3137
3138 if (is_gimple_debug (gs: use_stmt))
3139 continue;
3140
3141 if (is_gimple_assign (gs: use_stmt)
3142 && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR)
3143 {
3144 result = gimple_assign_lhs (gs: use_stmt);
3145 use_operand_p use_p;
3146 gimple *neguse_stmt;
3147 single_imm_use (var: gimple_assign_lhs (gs: use_stmt), use_p: &use_p, stmt: &neguse_stmt);
3148 gsi_remove (&gsi, true);
3149 release_defs (use_stmt);
3150
3151 use_stmt = neguse_stmt;
3152 gsi = gsi_for_stmt (use_stmt);
3153 negate_p = true;
3154 }
3155
3156 tree cond, else_value, ops[3], len, bias;
3157 tree_code code;
3158 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3159 ops, &else_value,
3160 &len, &bias))
3161 gcc_unreachable ();
3162 addop = ops[0] == result ? ops[1] : ops[0];
3163
3164 if (code == MINUS_EXPR)
3165 {
3166 if (ops[0] == result)
3167 /* a * b - c -> a * b + (-c) */
3168 addop = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: addop);
3169 else
3170 /* a - b * c -> (-b) * c + a */
3171 negate_p = !negate_p;
3172 }
3173
3174 if (negate_p)
3175 mulop1 = gimple_build (seq: &seq, code: NEGATE_EXPR, type, ops: mulop1);
3176
3177 if (seq)
3178 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3179
3180 if (len)
3181 fma_stmt
3182 = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3183 addop, else_value, len, bias);
3184 else if (cond)
3185 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3186 op2, addop, else_value);
3187 else
3188 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3189 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3190 gimple_call_set_nothrow (s: fma_stmt, nothrow_p: !stmt_can_throw_internal (cfun,
3191 use_stmt));
3192 gsi_replace (&gsi, fma_stmt, true);
3193 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3194 regardless of where the negation occurs. */
3195 gimple *orig_stmt = gsi_stmt (i: gsi);
3196 if (fold_stmt (&gsi, follow_all_ssa_edges))
3197 {
3198 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (i: gsi)))
3199 gcc_unreachable ();
3200 update_stmt (s: gsi_stmt (i: gsi));
3201 }
3202
3203 if (dump_file && (dump_flags & TDF_DETAILS))
3204 {
3205 fprintf (stream: dump_file, format: "Generated FMA ");
3206 print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE);
3207 fprintf (stream: dump_file, format: "\n");
3208 }
3209
3210 /* If the FMA result is negated in a single use, fold the negation
3211 too. */
3212 orig_stmt = gsi_stmt (i: gsi);
3213 use_operand_p use_p;
3214 gimple *neg_stmt;
3215 if (is_gimple_call (gs: orig_stmt)
3216 && gimple_call_internal_p (gs: orig_stmt)
3217 && gimple_call_lhs (gs: orig_stmt)
3218 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3219 && single_imm_use (var: gimple_call_lhs (gs: orig_stmt), use_p: &use_p, stmt: &neg_stmt)
3220 && is_gimple_assign (gs: neg_stmt)
3221 && gimple_assign_rhs_code (gs: neg_stmt) == NEGATE_EXPR
3222 && !stmt_could_throw_p (cfun, neg_stmt))
3223 {
3224 gsi = gsi_for_stmt (neg_stmt);
3225 if (fold_stmt (&gsi, follow_all_ssa_edges))
3226 {
3227 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (i: gsi)))
3228 gcc_unreachable ();
3229 update_stmt (s: gsi_stmt (i: gsi));
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3231 {
3232 fprintf (stream: dump_file, format: "Folded FMA negation ");
3233 print_gimple_stmt (dump_file, gsi_stmt (i: gsi), 0, TDF_NONE);
3234 fprintf (stream: dump_file, format: "\n");
3235 }
3236 }
3237 }
3238
3239 widen_mul_stats.fmas_inserted++;
3240 }
3241}
3242
3243/* Data necessary to perform the actual transformation from a multiplication
3244 and an addition to an FMA after decision is taken it should be done and to
3245 then delete the multiplication statement from the function IL. */
3246
3247struct fma_transformation_info
3248{
3249 gimple *mul_stmt;
3250 tree mul_result;
3251 tree op1;
3252 tree op2;
3253};
3254
3255/* Structure containing the current state of FMA deferring, i.e. whether we are
3256 deferring, whether to continue deferring, and all data necessary to come
3257 back and perform all deferred transformations. */
3258
3259class fma_deferring_state
3260{
3261public:
3262 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3263 do any deferring. */
3264
3265 fma_deferring_state (bool perform_deferring)
3266 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3267 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3268
3269 /* List of FMA candidates for which we the transformation has been determined
3270 possible but we at this point in BB analysis we do not consider them
3271 beneficial. */
3272 auto_vec<fma_transformation_info, 8> m_candidates;
3273
3274 /* Set of results of multiplication that are part of an already deferred FMA
3275 candidates. */
3276 hash_set<tree> m_mul_result_set;
3277
3278 /* The PHI that supposedly feeds back result of a FMA to another over loop
3279 boundary. */
3280 gphi *m_initial_phi;
3281
3282 /* Result of the last produced FMA candidate or NULL if there has not been
3283 one. */
3284 tree m_last_result;
3285
3286 /* If true, deferring might still be profitable. If false, transform all
3287 candidates and no longer defer. */
3288 bool m_deferring_p;
3289};
3290
3291/* Transform all deferred FMA candidates and mark STATE as no longer
3292 deferring. */
3293
3294static void
3295cancel_fma_deferring (fma_deferring_state *state)
3296{
3297 if (!state->m_deferring_p)
3298 return;
3299
3300 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3301 {
3302 if (dump_file && (dump_flags & TDF_DETAILS))
3303 fprintf (stream: dump_file, format: "Generating deferred FMA\n");
3304
3305 const fma_transformation_info &fti = state->m_candidates[i];
3306 convert_mult_to_fma_1 (mul_result: fti.mul_result, op1: fti.op1, op2: fti.op2);
3307
3308 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3309 gsi_remove (&gsi, true);
3310 release_defs (fti.mul_stmt);
3311 }
3312 state->m_deferring_p = false;
3313}
3314
3315/* If OP is an SSA name defined by a PHI node, return the PHI statement.
3316 Otherwise return NULL. */
3317
3318static gphi *
3319result_of_phi (tree op)
3320{
3321 if (TREE_CODE (op) != SSA_NAME)
3322 return NULL;
3323
3324 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3325}
3326
3327/* After processing statements of a BB and recording STATE, return true if the
3328 initial phi is fed by the last FMA candidate result ore one such result from
3329 previously processed BBs marked in LAST_RESULT_SET. */
3330
3331static bool
3332last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3333 hash_set<tree> *last_result_set)
3334{
3335 ssa_op_iter iter;
3336 use_operand_p use;
3337 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3338 {
3339 tree t = USE_FROM_PTR (use);
3340 if (t == state->m_last_result
3341 || last_result_set->contains (k: t))
3342 return true;
3343 }
3344
3345 return false;
3346}
3347
3348/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3349 with uses in additions and subtractions to form fused multiply-add
3350 operations. Returns true if successful and MUL_STMT should be removed.
3351 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3352 on MUL_COND, otherwise it is unconditional.
3353
3354 If STATE indicates that we are deferring FMA transformation, that means
3355 that we do not produce FMAs for basic blocks which look like:
3356
3357 <bb 6>
3358 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3359 _65 = _14 * _16;
3360 accumulator_66 = _65 + accumulator_111;
3361
3362 or its unrolled version, i.e. with several FMA candidates that feed result
3363 of one into the addend of another. Instead, we add them to a list in STATE
3364 and if we later discover an FMA candidate that is not part of such a chain,
3365 we go back and perform all deferred past candidates. */
3366
3367static bool
3368convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3369 fma_deferring_state *state, tree mul_cond = NULL_TREE,
3370 tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3371{
3372 tree mul_result = gimple_get_lhs (mul_stmt);
3373 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3374 if the statement was left just for the side-effects. */
3375 if (!mul_result)
3376 return false;
3377 tree type = TREE_TYPE (mul_result);
3378 gimple *use_stmt, *neguse_stmt;
3379 use_operand_p use_p;
3380 imm_use_iterator imm_iter;
3381
3382 if (FLOAT_TYPE_P (type)
3383 && flag_fp_contract_mode != FP_CONTRACT_FAST)
3384 return false;
3385
3386 /* We don't want to do bitfield reduction ops. */
3387 if (INTEGRAL_TYPE_P (type)
3388 && (!type_has_mode_precision_p (t: type) || TYPE_OVERFLOW_TRAPS (type)))
3389 return false;
3390
3391 /* If the target doesn't support it, don't generate it. We assume that
3392 if fma isn't available then fms, fnma or fnms are not either. */
3393 optimization_type opt_type = bb_optimization_type (gimple_bb (g: mul_stmt));
3394 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3395 return false;
3396
3397 /* If the multiplication has zero uses, it is kept around probably because
3398 of -fnon-call-exceptions. Don't optimize it away in that case,
3399 it is DCE job. */
3400 if (has_zero_uses (var: mul_result))
3401 return false;
3402
3403 bool check_defer
3404 = (state->m_deferring_p
3405 && maybe_le (a: tree_to_poly_int64 (TYPE_SIZE (type)),
3406 param_avoid_fma_max_bits));
3407 bool defer = check_defer;
3408 bool seen_negate_p = false;
3409
3410 /* There is no numerical difference between fused and unfused integer FMAs,
3411 and the assumption below that FMA is as cheap as addition is unlikely
3412 to be true, especially if the multiplication occurs multiple times on
3413 the same chain. E.g., for something like:
3414
3415 (((a * b) + c) >> 1) + (a * b)
3416
3417 we do not want to duplicate the a * b into two additions, not least
3418 because the result is not a natural FMA chain. */
3419 if (ANY_INTEGRAL_TYPE_P (type)
3420 && !has_single_use (var: mul_result))
3421 return false;
3422
3423 if (!dbg_cnt (index: form_fma))
3424 return false;
3425
3426 /* Make sure that the multiplication statement becomes dead after
3427 the transformation, thus that all uses are transformed to FMAs.
3428 This means we assume that an FMA operation has the same cost
3429 as an addition. */
3430 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3431 {
3432 tree result = mul_result;
3433 bool negate_p = false;
3434
3435 use_stmt = USE_STMT (use_p);
3436
3437 if (is_gimple_debug (gs: use_stmt))
3438 continue;
3439
3440 /* For now restrict this operations to single basic blocks. In theory
3441 we would want to support sinking the multiplication in
3442 m = a*b;
3443 if ()
3444 ma = m + c;
3445 else
3446 d = m;
3447 to form a fma in the then block and sink the multiplication to the
3448 else block. */
3449 if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt))
3450 return false;
3451
3452 /* A negate on the multiplication leads to FNMA. */
3453 if (is_gimple_assign (gs: use_stmt)
3454 && gimple_assign_rhs_code (gs: use_stmt) == NEGATE_EXPR)
3455 {
3456 ssa_op_iter iter;
3457 use_operand_p usep;
3458
3459 /* If (due to earlier missed optimizations) we have two
3460 negates of the same value, treat them as equivalent
3461 to a single negate with multiple uses. */
3462 if (seen_negate_p)
3463 return false;
3464
3465 result = gimple_assign_lhs (gs: use_stmt);
3466
3467 /* Make sure the negate statement becomes dead with this
3468 single transformation. */
3469 if (!single_imm_use (var: gimple_assign_lhs (gs: use_stmt),
3470 use_p: &use_p, stmt: &neguse_stmt))
3471 return false;
3472
3473 /* Make sure the multiplication isn't also used on that stmt. */
3474 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3475 if (USE_FROM_PTR (usep) == mul_result)
3476 return false;
3477
3478 /* Re-validate. */
3479 use_stmt = neguse_stmt;
3480 if (gimple_bb (g: use_stmt) != gimple_bb (g: mul_stmt))
3481 return false;
3482
3483 negate_p = seen_negate_p = true;
3484 }
3485
3486 tree cond, else_value, ops[3], len, bias;
3487 tree_code code;
3488 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3489 &else_value, &len, &bias))
3490 return false;
3491
3492 switch (code)
3493 {
3494 case MINUS_EXPR:
3495 if (ops[1] == result)
3496 negate_p = !negate_p;
3497 break;
3498 case PLUS_EXPR:
3499 break;
3500 default:
3501 /* FMA can only be formed from PLUS and MINUS. */
3502 return false;
3503 }
3504
3505 if (len)
3506 {
3507 /* For COND_LEN_* operations, we may have dummpy mask which is
3508 the all true mask. Such TREE type may be mul_cond != cond
3509 but we still consider they are equal. */
3510 if (mul_cond && cond != mul_cond
3511 && !(integer_truep (mul_cond) && integer_truep (cond)))
3512 return false;
3513
3514 if (else_value == result)
3515 return false;
3516
3517 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3518 opt_type))
3519 return false;
3520
3521 if (mul_len)
3522 {
3523 poly_int64 mul_value, value;
3524 if (poly_int_tree_p (t: mul_len, value: &mul_value)
3525 && poly_int_tree_p (t: len, value: &value)
3526 && maybe_ne (a: mul_value, b: value))
3527 return false;
3528 else if (mul_len != len)
3529 return false;
3530
3531 if (wi::to_widest (t: mul_bias) != wi::to_widest (t: bias))
3532 return false;
3533 }
3534 }
3535 else
3536 {
3537 if (mul_cond && cond != mul_cond)
3538 return false;
3539
3540 if (cond)
3541 {
3542 if (cond == result || else_value == result)
3543 return false;
3544 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3545 opt_type))
3546 return false;
3547 }
3548 }
3549
3550 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3551 we'll visit later, we might be able to get a more profitable
3552 match with fnma.
3553 OTOH, if we don't, a negate / fma pair has likely lower latency
3554 that a mult / subtract pair. */
3555 if (code == MINUS_EXPR
3556 && !negate_p
3557 && ops[0] == result
3558 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3559 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3560 && TREE_CODE (ops[1]) == SSA_NAME
3561 && has_single_use (var: ops[1]))
3562 {
3563 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3564 if (is_gimple_assign (gs: stmt2)
3565 && gimple_assign_rhs_code (gs: stmt2) == MULT_EXPR)
3566 return false;
3567 }
3568
3569 /* We can't handle a * b + a * b. */
3570 if (ops[0] == ops[1])
3571 return false;
3572 /* If deferring, make sure we are not looking at an instruction that
3573 wouldn't have existed if we were not. */
3574 if (state->m_deferring_p
3575 && (state->m_mul_result_set.contains (k: ops[0])
3576 || state->m_mul_result_set.contains (k: ops[1])))
3577 return false;
3578
3579 if (check_defer)
3580 {
3581 tree use_lhs = gimple_get_lhs (use_stmt);
3582 if (state->m_last_result)
3583 {
3584 if (ops[1] == state->m_last_result
3585 || ops[0] == state->m_last_result)
3586 defer = true;
3587 else
3588 defer = false;
3589 }
3590 else
3591 {
3592 gcc_checking_assert (!state->m_initial_phi);
3593 gphi *phi;
3594 if (ops[0] == result)
3595 phi = result_of_phi (op: ops[1]);
3596 else
3597 {
3598 gcc_assert (ops[1] == result);
3599 phi = result_of_phi (op: ops[0]);
3600 }
3601
3602 if (phi)
3603 {
3604 state->m_initial_phi = phi;
3605 defer = true;
3606 }
3607 else
3608 defer = false;
3609 }
3610
3611 state->m_last_result = use_lhs;
3612 check_defer = false;
3613 }
3614 else
3615 defer = false;
3616
3617 /* While it is possible to validate whether or not the exact form that
3618 we've recognized is available in the backend, the assumption is that
3619 if the deferring logic above did not trigger, the transformation is
3620 never a loss. For instance, suppose the target only has the plain FMA
3621 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3622 MUL+SUB for FMA+NEG, which is still two operations. Consider
3623 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3624 form the two NEGs are independent and could be run in parallel. */
3625 }
3626
3627 if (defer)
3628 {
3629 fma_transformation_info fti;
3630 fti.mul_stmt = mul_stmt;
3631 fti.mul_result = mul_result;
3632 fti.op1 = op1;
3633 fti.op2 = op2;
3634 state->m_candidates.safe_push (obj: fti);
3635 state->m_mul_result_set.add (k: mul_result);
3636
3637 if (dump_file && (dump_flags & TDF_DETAILS))
3638 {
3639 fprintf (stream: dump_file, format: "Deferred generating FMA for multiplication ");
3640 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3641 fprintf (stream: dump_file, format: "\n");
3642 }
3643
3644 return false;
3645 }
3646 else
3647 {
3648 if (state->m_deferring_p)
3649 cancel_fma_deferring (state);
3650 convert_mult_to_fma_1 (mul_result, op1, op2);
3651 return true;
3652 }
3653}
3654
3655
3656/* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3657 a check for non-zero like:
3658 _1 = x_4(D) * y_5(D);
3659 *res_7(D) = _1;
3660 if (x_4(D) != 0)
3661 goto <bb 3>; [50.00%]
3662 else
3663 goto <bb 4>; [50.00%]
3664
3665 <bb 3> [local count: 536870913]:
3666 _2 = _1 / x_4(D);
3667 _9 = _2 != y_5(D);
3668 _10 = (int) _9;
3669
3670 <bb 4> [local count: 1073741824]:
3671 # iftmp.0_3 = PHI <_10(3), 0(2)>
3672 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3673 optimize the x_4(D) != 0 condition to 1. */
3674
3675static void
3676maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3677 gimple *div_stmt, bool *cfg_changed)
3678{
3679 basic_block bb = gimple_bb (g: cond_stmt);
3680 if (gimple_bb (g: div_stmt) != bb || !single_pred_p (bb))
3681 return;
3682 edge pred_edge = single_pred_edge (bb);
3683 basic_block pred_bb = pred_edge->src;
3684 if (EDGE_COUNT (pred_bb->succs) != 2)
3685 return;
3686 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3687 edge other_succ_edge = NULL;
3688 if (gimple_code (g: cond_stmt) == GIMPLE_COND)
3689 {
3690 if (EDGE_COUNT (bb->succs) != 2)
3691 return;
3692 other_succ_edge = EDGE_SUCC (bb, 0);
3693 if (gimple_cond_code (gs: cond_stmt) == NE_EXPR)
3694 {
3695 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3696 other_succ_edge = EDGE_SUCC (bb, 1);
3697 }
3698 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3699 other_succ_edge = EDGE_SUCC (bb, 0);
3700 if (other_edge->dest != other_succ_edge->dest)
3701 return;
3702 }
3703 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3704 return;
3705 gcond *zero_cond = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: pred_bb));
3706 if (zero_cond == NULL
3707 || (gimple_cond_code (gs: zero_cond)
3708 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3709 || !integer_zerop (gimple_cond_rhs (gs: zero_cond)))
3710 return;
3711 tree zero_cond_lhs = gimple_cond_lhs (gs: zero_cond);
3712 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3713 return;
3714 if (gimple_assign_rhs2 (gs: div_stmt) != zero_cond_lhs)
3715 {
3716 /* Allow the divisor to be result of a same precision cast
3717 from zero_cond_lhs. */
3718 tree rhs2 = gimple_assign_rhs2 (gs: div_stmt);
3719 if (TREE_CODE (rhs2) != SSA_NAME)
3720 return;
3721 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3722 if (!gimple_assign_cast_p (s: g)
3723 || gimple_assign_rhs1 (gs: g) != gimple_cond_lhs (gs: zero_cond)
3724 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3725 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3726 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3727 return;
3728 }
3729 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3730 mul_stmts.quick_push (obj: div_stmt);
3731 if (is_gimple_debug (gs: gsi_stmt (i: gsi)))
3732 gsi_next_nondebug (i: &gsi);
3733 unsigned cast_count = 0;
3734 while (gsi_stmt (i: gsi) != cond_stmt)
3735 {
3736 /* If original mul_stmt has a single use, allow it in the same bb,
3737 we are looking then just at __builtin_mul_overflow_p.
3738 Though, in that case the original mul_stmt will be replaced
3739 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3740 gimple *mul_stmt;
3741 unsigned int i;
3742 bool ok = false;
3743 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3744 {
3745 if (gsi_stmt (i: gsi) == mul_stmt)
3746 {
3747 ok = true;
3748 break;
3749 }
3750 }
3751 if (!ok && gimple_assign_cast_p (s: gsi_stmt (i: gsi)) && ++cast_count < 4)
3752 ok = true;
3753 if (!ok)
3754 return;
3755 gsi_next_nondebug (i: &gsi);
3756 }
3757 if (gimple_code (g: cond_stmt) == GIMPLE_COND)
3758 {
3759 basic_block succ_bb = other_edge->dest;
3760 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (i: gpi);
3761 gsi_next (i: &gpi))
3762 {
3763 gphi *phi = gpi.phi ();
3764 tree v1 = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx);
3765 tree v2 = gimple_phi_arg_def (gs: phi, index: other_succ_edge->dest_idx);
3766 if (!operand_equal_p (v1, v2, flags: 0))
3767 return;
3768 }
3769 }
3770 else
3771 {
3772 tree lhs = gimple_assign_lhs (gs: cond_stmt);
3773 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3774 return;
3775 gsi_next_nondebug (i: &gsi);
3776 if (!gsi_end_p (i: gsi))
3777 {
3778 if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR)
3779 return;
3780 gimple *cast_stmt = gsi_stmt (i: gsi);
3781 if (!gimple_assign_cast_p (s: cast_stmt))
3782 return;
3783 tree new_lhs = gimple_assign_lhs (gs: cast_stmt);
3784 gsi_next_nondebug (i: &gsi);
3785 if (!gsi_end_p (i: gsi)
3786 || !new_lhs
3787 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3788 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3789 return;
3790 lhs = new_lhs;
3791 }
3792 edge succ_edge = single_succ_edge (bb);
3793 basic_block succ_bb = succ_edge->dest;
3794 gsi = gsi_start_phis (succ_bb);
3795 if (gsi_end_p (i: gsi))
3796 return;
3797 gphi *phi = as_a <gphi *> (p: gsi_stmt (i: gsi));
3798 gsi_next (i: &gsi);
3799 if (!gsi_end_p (i: gsi))
3800 return;
3801 if (gimple_phi_arg_def (gs: phi, index: succ_edge->dest_idx) != lhs)
3802 return;
3803 tree other_val = gimple_phi_arg_def (gs: phi, index: other_edge->dest_idx);
3804 if (gimple_assign_rhs_code (gs: cond_stmt) == COND_EXPR)
3805 {
3806 tree cond = gimple_assign_rhs1 (gs: cond_stmt);
3807 if (TREE_CODE (cond) == NE_EXPR)
3808 {
3809 if (!operand_equal_p (other_val,
3810 gimple_assign_rhs3 (gs: cond_stmt), flags: 0))
3811 return;
3812 }
3813 else if (!operand_equal_p (other_val,
3814 gimple_assign_rhs2 (gs: cond_stmt), flags: 0))
3815 return;
3816 }
3817 else if (gimple_assign_rhs_code (gs: cond_stmt) == NE_EXPR)
3818 {
3819 if (!integer_zerop (other_val))
3820 return;
3821 }
3822 else if (!integer_onep (other_val))
3823 return;
3824 }
3825 if (pred_edge->flags & EDGE_TRUE_VALUE)
3826 gimple_cond_make_true (gs: zero_cond);
3827 else
3828 gimple_cond_make_false (gs: zero_cond);
3829 update_stmt (s: zero_cond);
3830 *cfg_changed = true;
3831}
3832
3833/* Helper function for arith_overflow_check_p. Return true
3834 if VAL1 is equal to VAL2 cast to corresponding integral type
3835 with other signedness or vice versa. */
3836
3837static bool
3838arith_cast_equal_p (tree val1, tree val2)
3839{
3840 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3841 return wi::eq_p (x: wi::to_wide (t: val1), y: wi::to_wide (t: val2));
3842 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3843 return false;
3844 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3845 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3846 return true;
3847 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3848 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3849 return true;
3850 return false;
3851}
3852
3853/* Helper function of match_arith_overflow. Return 1
3854 if USE_STMT is unsigned overflow check ovf != 0 for
3855 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3856 and 0 otherwise. */
3857
3858static int
3859arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3860 tree maxval, tree *other)
3861{
3862 enum tree_code ccode = ERROR_MARK;
3863 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3864 enum tree_code code = gimple_assign_rhs_code (gs: stmt);
3865 tree lhs = gimple_assign_lhs (gs: cast_stmt ? cast_stmt : stmt);
3866 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
3867 tree rhs2 = gimple_assign_rhs2 (gs: stmt);
3868 tree multop = NULL_TREE, divlhs = NULL_TREE;
3869 gimple *cur_use_stmt = use_stmt;
3870
3871 if (code == MULT_EXPR)
3872 {
3873 if (!is_gimple_assign (gs: use_stmt))
3874 return 0;
3875 if (gimple_assign_rhs_code (gs: use_stmt) != TRUNC_DIV_EXPR)
3876 return 0;
3877 if (gimple_assign_rhs1 (gs: use_stmt) != lhs)
3878 return 0;
3879 if (cast_stmt)
3880 {
3881 if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs1))
3882 multop = rhs2;
3883 else if (arith_cast_equal_p (val1: gimple_assign_rhs2 (gs: use_stmt), val2: rhs2))
3884 multop = rhs1;
3885 else
3886 return 0;
3887 }
3888 else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1)
3889 multop = rhs2;
3890 else if (operand_equal_p (gimple_assign_rhs2 (gs: use_stmt), rhs2, flags: 0))
3891 multop = rhs1;
3892 else
3893 return 0;
3894 if (stmt_ends_bb_p (use_stmt))
3895 return 0;
3896 divlhs = gimple_assign_lhs (gs: use_stmt);
3897 if (!divlhs)
3898 return 0;
3899 use_operand_p use;
3900 if (!single_imm_use (var: divlhs, use_p: &use, stmt: &cur_use_stmt))
3901 return 0;
3902 if (cast_stmt && gimple_assign_cast_p (s: cur_use_stmt))
3903 {
3904 tree cast_lhs = gimple_assign_lhs (gs: cur_use_stmt);
3905 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3906 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3907 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3908 == TYPE_PRECISION (TREE_TYPE (divlhs)))
3909 && single_imm_use (var: cast_lhs, use_p: &use, stmt: &cur_use_stmt))
3910 {
3911 cast_stmt = NULL;
3912 divlhs = cast_lhs;
3913 }
3914 else
3915 return 0;
3916 }
3917 }
3918 if (gimple_code (g: cur_use_stmt) == GIMPLE_COND)
3919 {
3920 ccode = gimple_cond_code (gs: cur_use_stmt);
3921 crhs1 = gimple_cond_lhs (gs: cur_use_stmt);
3922 crhs2 = gimple_cond_rhs (gs: cur_use_stmt);
3923 }
3924 else if (is_gimple_assign (gs: cur_use_stmt))
3925 {
3926 if (gimple_assign_rhs_class (gs: cur_use_stmt) == GIMPLE_BINARY_RHS)
3927 {
3928 ccode = gimple_assign_rhs_code (gs: cur_use_stmt);
3929 crhs1 = gimple_assign_rhs1 (gs: cur_use_stmt);
3930 crhs2 = gimple_assign_rhs2 (gs: cur_use_stmt);
3931 }
3932 else if (gimple_assign_rhs_code (gs: cur_use_stmt) == COND_EXPR)
3933 {
3934 tree cond = gimple_assign_rhs1 (gs: cur_use_stmt);
3935 if (COMPARISON_CLASS_P (cond))
3936 {
3937 ccode = TREE_CODE (cond);
3938 crhs1 = TREE_OPERAND (cond, 0);
3939 crhs2 = TREE_OPERAND (cond, 1);
3940 }
3941 else
3942 return 0;
3943 }
3944 else
3945 return 0;
3946 }
3947 else
3948 return 0;
3949
3950 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3951 return 0;
3952
3953 switch (ccode)
3954 {
3955 case GT_EXPR:
3956 case LE_EXPR:
3957 if (maxval)
3958 {
3959 /* r = a + b; r > maxval or r <= maxval */
3960 if (crhs1 == lhs
3961 && TREE_CODE (crhs2) == INTEGER_CST
3962 && tree_int_cst_equal (crhs2, maxval))
3963 return ccode == GT_EXPR ? 1 : -1;
3964 break;
3965 }
3966 /* r = a - b; r > a or r <= a
3967 r = a + b; a > r or a <= r or b > r or b <= r. */
3968 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3969 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3970 && crhs2 == lhs))
3971 return ccode == GT_EXPR ? 1 : -1;
3972 /* r = ~a; b > r or b <= r. */
3973 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3974 {
3975 if (other)
3976 *other = crhs1;
3977 return ccode == GT_EXPR ? 1 : -1;
3978 }
3979 break;
3980 case LT_EXPR:
3981 case GE_EXPR:
3982 if (maxval)
3983 break;
3984 /* r = a - b; a < r or a >= r
3985 r = a + b; r < a or r >= a or r < b or r >= b. */
3986 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3987 || (code == PLUS_EXPR && crhs1 == lhs
3988 && (crhs2 == rhs1 || crhs2 == rhs2)))
3989 return ccode == LT_EXPR ? 1 : -1;
3990 /* r = ~a; r < b or r >= b. */
3991 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3992 {
3993 if (other)
3994 *other = crhs2;
3995 return ccode == LT_EXPR ? 1 : -1;
3996 }
3997 break;
3998 case EQ_EXPR:
3999 case NE_EXPR:
4000 /* r = a * b; _1 = r / a; _1 == b
4001 r = a * b; _1 = r / b; _1 == a
4002 r = a * b; _1 = r / a; _1 != b
4003 r = a * b; _1 = r / b; _1 != a. */
4004 if (code == MULT_EXPR)
4005 {
4006 if (cast_stmt)
4007 {
4008 if ((crhs1 == divlhs && arith_cast_equal_p (val1: crhs2, val2: multop))
4009 || (crhs2 == divlhs && arith_cast_equal_p (val1: crhs1, val2: multop)))
4010 {
4011 use_stmt = cur_use_stmt;
4012 return ccode == NE_EXPR ? 1 : -1;
4013 }
4014 }
4015 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, flags: 0))
4016 || (crhs2 == divlhs && crhs1 == multop))
4017 {
4018 use_stmt = cur_use_stmt;
4019 return ccode == NE_EXPR ? 1 : -1;
4020 }
4021 }
4022 break;
4023 default:
4024 break;
4025 }
4026 return 0;
4027}
4028
4029/* Recognize for unsigned x
4030 x = y - z;
4031 if (x > y)
4032 where there are other uses of x and replace it with
4033 _7 = .SUB_OVERFLOW (y, z);
4034 x = REALPART_EXPR <_7>;
4035 _8 = IMAGPART_EXPR <_7>;
4036 if (_8)
4037 and similarly for addition.
4038
4039 Also recognize:
4040 yc = (type) y;
4041 zc = (type) z;
4042 x = yc + zc;
4043 if (x > max)
4044 where y and z have unsigned types with maximum max
4045 and there are other uses of x and all of those cast x
4046 back to that unsigned type and again replace it with
4047 _7 = .ADD_OVERFLOW (y, z);
4048 _9 = REALPART_EXPR <_7>;
4049 _8 = IMAGPART_EXPR <_7>;
4050 if (_8)
4051 and replace (utype) x with _9.
4052
4053 Also recognize:
4054 x = ~z;
4055 if (y > x)
4056 and replace it with
4057 _7 = .ADD_OVERFLOW (y, z);
4058 _8 = IMAGPART_EXPR <_7>;
4059 if (_8)
4060
4061 And also recognize:
4062 z = x * y;
4063 if (x != 0)
4064 goto <bb 3>; [50.00%]
4065 else
4066 goto <bb 4>; [50.00%]
4067
4068 <bb 3> [local count: 536870913]:
4069 _2 = z / x;
4070 _9 = _2 != y;
4071 _10 = (int) _9;
4072
4073 <bb 4> [local count: 1073741824]:
4074 # iftmp.0_3 = PHI <_10(3), 0(2)>
4075 and replace it with
4076 _7 = .MUL_OVERFLOW (x, y);
4077 z = IMAGPART_EXPR <_7>;
4078 _8 = IMAGPART_EXPR <_7>;
4079 _9 = _8 != 0;
4080 iftmp.0_3 = (int) _9; */
4081
4082static bool
4083match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4084 enum tree_code code, bool *cfg_changed)
4085{
4086 tree lhs = gimple_assign_lhs (gs: stmt);
4087 tree type = TREE_TYPE (lhs);
4088 use_operand_p use_p;
4089 imm_use_iterator iter;
4090 bool use_seen = false;
4091 bool ovf_use_seen = false;
4092 gimple *use_stmt;
4093 gimple *add_stmt = NULL;
4094 bool add_first = false;
4095 gimple *cond_stmt = NULL;
4096 gimple *cast_stmt = NULL;
4097 tree cast_lhs = NULL_TREE;
4098
4099 gcc_checking_assert (code == PLUS_EXPR
4100 || code == MINUS_EXPR
4101 || code == MULT_EXPR
4102 || code == BIT_NOT_EXPR);
4103 if (!INTEGRAL_TYPE_P (type)
4104 || !TYPE_UNSIGNED (type)
4105 || has_zero_uses (var: lhs)
4106 || (code != PLUS_EXPR
4107 && code != MULT_EXPR
4108 && optab_handler (op: code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4109 TYPE_MODE (type)) == CODE_FOR_nothing))
4110 return false;
4111
4112 tree rhs1 = gimple_assign_rhs1 (gs: stmt);
4113 tree rhs2 = gimple_assign_rhs2 (gs: stmt);
4114 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4115 {
4116 use_stmt = USE_STMT (use_p);
4117 if (is_gimple_debug (gs: use_stmt))
4118 continue;
4119
4120 tree other = NULL_TREE;
4121 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, other: &other))
4122 {
4123 if (code == BIT_NOT_EXPR)
4124 {
4125 gcc_assert (other);
4126 if (TREE_CODE (other) != SSA_NAME)
4127 return false;
4128 if (rhs2 == NULL)
4129 rhs2 = other;
4130 else
4131 return false;
4132 cond_stmt = use_stmt;
4133 }
4134 ovf_use_seen = true;
4135 }
4136 else
4137 {
4138 use_seen = true;
4139 if (code == MULT_EXPR
4140 && cast_stmt == NULL
4141 && gimple_assign_cast_p (s: use_stmt))
4142 {
4143 cast_lhs = gimple_assign_lhs (gs: use_stmt);
4144 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4145 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4146 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4147 == TYPE_PRECISION (TREE_TYPE (lhs))))
4148 cast_stmt = use_stmt;
4149 else
4150 cast_lhs = NULL_TREE;
4151 }
4152 }
4153 if (ovf_use_seen && use_seen)
4154 break;
4155 }
4156
4157 if (!ovf_use_seen
4158 && code == MULT_EXPR
4159 && cast_stmt)
4160 {
4161 if (TREE_CODE (rhs1) != SSA_NAME
4162 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4163 return false;
4164 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4165 {
4166 use_stmt = USE_STMT (use_p);
4167 if (is_gimple_debug (gs: use_stmt))
4168 continue;
4169
4170 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4171 NULL_TREE, NULL))
4172 ovf_use_seen = true;
4173 }
4174 }
4175 else
4176 {
4177 cast_stmt = NULL;
4178 cast_lhs = NULL_TREE;
4179 }
4180
4181 tree maxval = NULL_TREE;
4182 if (!ovf_use_seen
4183 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4184 || (code == PLUS_EXPR
4185 && optab_handler (op: uaddv4_optab,
4186 TYPE_MODE (type)) == CODE_FOR_nothing)
4187 || (code == MULT_EXPR
4188 && optab_handler (op: cast_stmt ? mulv4_optab : umulv4_optab,
4189 TYPE_MODE (type)) == CODE_FOR_nothing
4190 && (use_seen
4191 || cast_stmt
4192 || !can_mult_highpart_p (TYPE_MODE (type), true))))
4193 {
4194 if (code != PLUS_EXPR)
4195 return false;
4196 if (TREE_CODE (rhs1) != SSA_NAME
4197 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4198 return false;
4199 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4200 tree type1 = TREE_TYPE (rhs1);
4201 if (!INTEGRAL_TYPE_P (type1)
4202 || !TYPE_UNSIGNED (type1)
4203 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4204 || (TYPE_PRECISION (type1)
4205 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4206 return false;
4207 if (TREE_CODE (rhs2) == INTEGER_CST)
4208 {
4209 if (wi::ne_p (x: wi::rshift (x: wi::to_wide (t: rhs2),
4210 TYPE_PRECISION (type1),
4211 sgn: UNSIGNED), y: 0))
4212 return false;
4213 rhs2 = fold_convert (type1, rhs2);
4214 }
4215 else
4216 {
4217 if (TREE_CODE (rhs2) != SSA_NAME
4218 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4219 return false;
4220 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4221 tree type2 = TREE_TYPE (rhs2);
4222 if (!INTEGRAL_TYPE_P (type2)
4223 || !TYPE_UNSIGNED (type2)
4224 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4225 || (TYPE_PRECISION (type2)
4226 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4227 return false;
4228 }
4229 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4230 type = type1;
4231 else
4232 type = TREE_TYPE (rhs2);
4233
4234 if (TREE_CODE (type) != INTEGER_TYPE
4235 || optab_handler (op: uaddv4_optab,
4236 TYPE_MODE (type)) == CODE_FOR_nothing)
4237 return false;
4238
4239 maxval = wide_int_to_tree (type, cst: wi::max_value (TYPE_PRECISION (type),
4240 UNSIGNED));
4241 ovf_use_seen = false;
4242 use_seen = false;
4243 basic_block use_bb = NULL;
4244 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4245 {
4246 use_stmt = USE_STMT (use_p);
4247 if (is_gimple_debug (gs: use_stmt))
4248 continue;
4249
4250 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4251 {
4252 ovf_use_seen = true;
4253 use_bb = gimple_bb (g: use_stmt);
4254 }
4255 else
4256 {
4257 if (!gimple_assign_cast_p (s: use_stmt)
4258 || gimple_assign_rhs_code (gs: use_stmt) == VIEW_CONVERT_EXPR)
4259 return false;
4260 tree use_lhs = gimple_assign_lhs (gs: use_stmt);
4261 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4262 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4263 > TYPE_PRECISION (type)))
4264 return false;
4265 use_seen = true;
4266 }
4267 }
4268 if (!ovf_use_seen)
4269 return false;
4270 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4271 {
4272 if (!use_seen)
4273 return false;
4274 tree new_rhs1 = make_ssa_name (var: type);
4275 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4276 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4277 rhs1 = new_rhs1;
4278 }
4279 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4280 {
4281 if (!use_seen)
4282 return false;
4283 tree new_rhs2 = make_ssa_name (var: type);
4284 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4285 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4286 rhs2 = new_rhs2;
4287 }
4288 else if (!use_seen)
4289 {
4290 /* If there are no uses of the wider addition, check if
4291 forwprop has not created a narrower addition.
4292 Require it to be in the same bb as the overflow check. */
4293 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4294 {
4295 use_stmt = USE_STMT (use_p);
4296 if (is_gimple_debug (gs: use_stmt))
4297 continue;
4298
4299 if (use_stmt == stmt)
4300 continue;
4301
4302 if (!is_gimple_assign (gs: use_stmt)
4303 || gimple_bb (g: use_stmt) != use_bb
4304 || gimple_assign_rhs_code (gs: use_stmt) != PLUS_EXPR)
4305 continue;
4306
4307 if (gimple_assign_rhs1 (gs: use_stmt) == rhs1)
4308 {
4309 if (!operand_equal_p (gimple_assign_rhs2 (gs: use_stmt),
4310 rhs2, flags: 0))
4311 continue;
4312 }
4313 else if (gimple_assign_rhs2 (gs: use_stmt) == rhs1)
4314 {
4315 if (gimple_assign_rhs1 (gs: use_stmt) != rhs2)
4316 continue;
4317 }
4318 else
4319 continue;
4320
4321 add_stmt = use_stmt;
4322 break;
4323 }
4324 if (add_stmt == NULL)
4325 return false;
4326
4327 /* If stmt and add_stmt are in the same bb, we need to find out
4328 which one is earlier. If they are in different bbs, we've
4329 checked add_stmt is in the same bb as one of the uses of the
4330 stmt lhs, so stmt needs to dominate add_stmt too. */
4331 if (gimple_bb (g: stmt) == gimple_bb (g: add_stmt))
4332 {
4333 gimple_stmt_iterator gsif = *gsi;
4334 gimple_stmt_iterator gsib = *gsi;
4335 int i;
4336 /* Search both forward and backward from stmt and have a small
4337 upper bound. */
4338 for (i = 0; i < 128; i++)
4339 {
4340 if (!gsi_end_p (i: gsib))
4341 {
4342 gsi_prev_nondebug (i: &gsib);
4343 if (gsi_stmt (i: gsib) == add_stmt)
4344 {
4345 add_first = true;
4346 break;
4347 }
4348 }
4349 else if (gsi_end_p (i: gsif))
4350 break;
4351 if (!gsi_end_p (i: gsif))
4352 {
4353 gsi_next_nondebug (i: &gsif);
4354 if (gsi_stmt (i: gsif) == add_stmt)
4355 break;
4356 }
4357 }
4358 if (i == 128)
4359 return false;
4360 if (add_first)
4361 *gsi = gsi_for_stmt (add_stmt);
4362 }
4363 }
4364 }
4365
4366 if (code == BIT_NOT_EXPR)
4367 *gsi = gsi_for_stmt (cond_stmt);
4368
4369 auto_vec<gimple *, 8> mul_stmts;
4370 if (code == MULT_EXPR && cast_stmt)
4371 {
4372 type = TREE_TYPE (cast_lhs);
4373 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4374 if (gimple_assign_cast_p (s: g)
4375 && useless_type_conversion_p (type,
4376 TREE_TYPE (gimple_assign_rhs1 (g)))
4377 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4378 rhs1 = gimple_assign_rhs1 (gs: g);
4379 else
4380 {
4381 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs1);
4382 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4383 rhs1 = gimple_assign_lhs (gs: g);
4384 mul_stmts.quick_push (obj: g);
4385 }
4386 if (TREE_CODE (rhs2) == INTEGER_CST)
4387 rhs2 = fold_convert (type, rhs2);
4388 else
4389 {
4390 g = SSA_NAME_DEF_STMT (rhs2);
4391 if (gimple_assign_cast_p (s: g)
4392 && useless_type_conversion_p (type,
4393 TREE_TYPE (gimple_assign_rhs1 (g)))
4394 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4395 rhs2 = gimple_assign_rhs1 (gs: g);
4396 else
4397 {
4398 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, rhs2);
4399 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4400 rhs2 = gimple_assign_lhs (gs: g);
4401 mul_stmts.quick_push (obj: g);
4402 }
4403 }
4404 }
4405 tree ctype = build_complex_type (type);
4406 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4407 ? IFN_MUL_OVERFLOW
4408 : code != MINUS_EXPR
4409 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4410 2, rhs1, rhs2);
4411 tree ctmp = make_ssa_name (var: ctype);
4412 gimple_call_set_lhs (gs: g, lhs: ctmp);
4413 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4414 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (var: type) : lhs;
4415 gassign *g2;
4416 if (code != BIT_NOT_EXPR)
4417 {
4418 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4419 build1 (REALPART_EXPR, type, ctmp));
4420 if (maxval || cast_stmt)
4421 {
4422 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4423 if (add_first)
4424 *gsi = gsi_for_stmt (stmt);
4425 }
4426 else
4427 gsi_replace (gsi, g2, true);
4428 if (code == MULT_EXPR)
4429 {
4430 mul_stmts.quick_push (obj: g);
4431 mul_stmts.quick_push (obj: g2);
4432 if (cast_stmt)
4433 {
4434 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4435 gsi_replace (gsi, g2, true);
4436 mul_stmts.quick_push (obj: g2);
4437 }
4438 }
4439 }
4440 tree ovf = make_ssa_name (var: type);
4441 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4442 build1 (IMAGPART_EXPR, type, ctmp));
4443 if (code != BIT_NOT_EXPR)
4444 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4445 else
4446 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4447 if (code == MULT_EXPR)
4448 mul_stmts.quick_push (obj: g2);
4449
4450 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4451 {
4452 if (is_gimple_debug (gs: use_stmt))
4453 continue;
4454
4455 gimple *orig_use_stmt = use_stmt;
4456 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4457 maxval, NULL);
4458 if (ovf_use == 0)
4459 {
4460 gcc_assert (code != BIT_NOT_EXPR);
4461 if (maxval)
4462 {
4463 tree use_lhs = gimple_assign_lhs (gs: use_stmt);
4464 gimple_assign_set_rhs1 (gs: use_stmt, rhs: new_lhs);
4465 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4466 TREE_TYPE (new_lhs)))
4467 gimple_assign_set_rhs_code (s: use_stmt, code: SSA_NAME);
4468 update_stmt (s: use_stmt);
4469 }
4470 continue;
4471 }
4472 if (gimple_code (g: use_stmt) == GIMPLE_COND)
4473 {
4474 gcond *cond_stmt = as_a <gcond *> (p: use_stmt);
4475 gimple_cond_set_lhs (gs: cond_stmt, lhs: ovf);
4476 gimple_cond_set_rhs (gs: cond_stmt, rhs: build_int_cst (type, 0));
4477 gimple_cond_set_code (gs: cond_stmt, code: ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4478 }
4479 else
4480 {
4481 gcc_checking_assert (is_gimple_assign (use_stmt));
4482 if (gimple_assign_rhs_class (gs: use_stmt) == GIMPLE_BINARY_RHS)
4483 {
4484 gimple_assign_set_rhs1 (gs: use_stmt, rhs: ovf);
4485 gimple_assign_set_rhs2 (gs: use_stmt, rhs: build_int_cst (type, 0));
4486 gimple_assign_set_rhs_code (s: use_stmt,
4487 code: ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4488 }
4489 else
4490 {
4491 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4492 == COND_EXPR);
4493 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4494 boolean_type_node, ovf,
4495 build_int_cst (type, 0));
4496 gimple_assign_set_rhs1 (gs: use_stmt, rhs: cond);
4497 }
4498 }
4499 update_stmt (s: use_stmt);
4500 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4501 {
4502 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4503 maybe_optimize_guarding_check (mul_stmts, cond_stmt: use_stmt, div_stmt: orig_use_stmt,
4504 cfg_changed);
4505 use_operand_p use;
4506 gimple *cast_stmt;
4507 if (single_imm_use (var: gimple_assign_lhs (gs: orig_use_stmt), use_p: &use,
4508 stmt: &cast_stmt)
4509 && gimple_assign_cast_p (s: cast_stmt))
4510 {
4511 gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4512 gsi_remove (&gsi3, true);
4513 release_ssa_name (name: gimple_assign_lhs (gs: cast_stmt));
4514 }
4515 gsi_remove (&gsi2, true);
4516 release_ssa_name (name: gimple_assign_lhs (gs: orig_use_stmt));
4517 }
4518 }
4519 if (maxval)
4520 {
4521 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4522 gsi_remove (&gsi2, true);
4523 if (add_stmt)
4524 {
4525 gimple *g = gimple_build_assign (gimple_assign_lhs (gs: add_stmt),
4526 new_lhs);
4527 gsi2 = gsi_for_stmt (add_stmt);
4528 gsi_replace (&gsi2, g, true);
4529 }
4530 }
4531 else if (code == BIT_NOT_EXPR)
4532 {
4533 *gsi = gsi_for_stmt (stmt);
4534 gsi_remove (gsi, true);
4535 release_ssa_name (name: lhs);
4536 return true;
4537 }
4538 return false;
4539}
4540
4541/* Helper of match_uaddc_usubc. Look through an integral cast
4542 which should preserve [0, 1] range value (unless source has
4543 1-bit signed type) and the cast has single use. */
4544
4545static gimple *
4546uaddc_cast (gimple *g)
4547{
4548 if (!gimple_assign_cast_p (s: g))
4549 return g;
4550 tree op = gimple_assign_rhs1 (gs: g);
4551 if (TREE_CODE (op) == SSA_NAME
4552 && INTEGRAL_TYPE_P (TREE_TYPE (op))
4553 && (TYPE_PRECISION (TREE_TYPE (op)) > 1
4554 || TYPE_UNSIGNED (TREE_TYPE (op)))
4555 && has_single_use (var: gimple_assign_lhs (gs: g)))
4556 return SSA_NAME_DEF_STMT (op);
4557 return g;
4558}
4559
4560/* Helper of match_uaddc_usubc. Look through a NE_EXPR
4561 comparison with 0 which also preserves [0, 1] value range. */
4562
4563static gimple *
4564uaddc_ne0 (gimple *g)
4565{
4566 if (is_gimple_assign (gs: g)
4567 && gimple_assign_rhs_code (gs: g) == NE_EXPR
4568 && integer_zerop (gimple_assign_rhs2 (gs: g))
4569 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
4570 && has_single_use (var: gimple_assign_lhs (gs: g)))
4571 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
4572 return g;
4573}
4574
4575/* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4576 operand. */
4577
4578static bool
4579uaddc_is_cplxpart (gimple *g, tree_code part)
4580{
4581 return (is_gimple_assign (gs: g)
4582 && gimple_assign_rhs_code (gs: g) == part
4583 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
4584}
4585
4586/* Try to match e.g.
4587 _29 = .ADD_OVERFLOW (_3, _4);
4588 _30 = REALPART_EXPR <_29>;
4589 _31 = IMAGPART_EXPR <_29>;
4590 _32 = .ADD_OVERFLOW (_30, _38);
4591 _33 = REALPART_EXPR <_32>;
4592 _34 = IMAGPART_EXPR <_32>;
4593 _35 = _31 + _34;
4594 as
4595 _36 = .UADDC (_3, _4, _38);
4596 _33 = REALPART_EXPR <_36>;
4597 _35 = IMAGPART_EXPR <_36>;
4598 or
4599 _22 = .SUB_OVERFLOW (_6, _5);
4600 _23 = REALPART_EXPR <_22>;
4601 _24 = IMAGPART_EXPR <_22>;
4602 _25 = .SUB_OVERFLOW (_23, _37);
4603 _26 = REALPART_EXPR <_25>;
4604 _27 = IMAGPART_EXPR <_25>;
4605 _28 = _24 | _27;
4606 as
4607 _29 = .USUBC (_6, _5, _37);
4608 _26 = REALPART_EXPR <_29>;
4609 _288 = IMAGPART_EXPR <_29>;
4610 provided _38 or _37 above have [0, 1] range
4611 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4612 integral types with the same precision. Whether + or | or ^ is
4613 used on the IMAGPART_EXPR results doesn't matter, with one of
4614 added or subtracted operands in [0, 1] range at most one
4615 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4616
4617static bool
4618match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
4619{
4620 tree rhs[4];
4621 rhs[0] = gimple_assign_rhs1 (gs: stmt);
4622 rhs[1] = gimple_assign_rhs2 (gs: stmt);
4623 rhs[2] = NULL_TREE;
4624 rhs[3] = NULL_TREE;
4625 tree type = TREE_TYPE (rhs[0]);
4626 if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
4627 return false;
4628
4629 auto_vec<gimple *, 2> temp_stmts;
4630 if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
4631 {
4632 /* If overflow flag is ignored on the MSB limb, we can end up with
4633 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4634 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4635 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4636 the limb below the MSB, but also create another .UADDC/.USUBC call
4637 for the last limb.
4638
4639 First look through assignments with the same rhs code as CODE,
4640 with the exception that subtraction of a constant is canonicalized
4641 into addition of its negation. rhs[0] will be minuend for
4642 subtractions and one of addends for addition, all other assigned
4643 rhs[i] operands will be subtrahends or other addends. */
4644 while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
4645 {
4646 gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
4647 if (has_single_use (var: rhs[0])
4648 && is_gimple_assign (gs: g)
4649 && (gimple_assign_rhs_code (gs: g) == code
4650 || (code == MINUS_EXPR
4651 && gimple_assign_rhs_code (gs: g) == PLUS_EXPR
4652 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
4653 {
4654 tree r2 = gimple_assign_rhs2 (gs: g);
4655 if (gimple_assign_rhs_code (gs: g) != code)
4656 {
4657 r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
4658 if (!r2)
4659 break;
4660 }
4661 rhs[0] = gimple_assign_rhs1 (gs: g);
4662 tree &r = rhs[2] ? rhs[3] : rhs[2];
4663 r = r2;
4664 temp_stmts.quick_push (obj: g);
4665 }
4666 else
4667 break;
4668 }
4669 for (int i = 1; i <= 2; ++i)
4670 while (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME && !rhs[3])
4671 {
4672 gimple *g = SSA_NAME_DEF_STMT (rhs[i]);
4673 if (has_single_use (var: rhs[i])
4674 && is_gimple_assign (gs: g)
4675 && gimple_assign_rhs_code (gs: g) == PLUS_EXPR)
4676 {
4677 rhs[i] = gimple_assign_rhs1 (gs: g);
4678 if (rhs[2])
4679 rhs[3] = gimple_assign_rhs2 (gs: g);
4680 else
4681 rhs[2] = gimple_assign_rhs2 (gs: g);
4682 temp_stmts.quick_push (obj: g);
4683 }
4684 else
4685 break;
4686 }
4687 /* If there are just 3 addends or one minuend and two subtrahends,
4688 check for UADDC or USUBC being pattern recognized earlier.
4689 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4690 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4691 etc. */
4692 if (rhs[2] && !rhs[3])
4693 {
4694 for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
4695 if (TREE_CODE (rhs[i]) == SSA_NAME)
4696 {
4697 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4698 im = uaddc_ne0 (g: im);
4699 if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR))
4700 {
4701 /* We found one of the 3 addends or 2 subtrahends to be
4702 __imag__ of something, verify it is .UADDC/.USUBC. */
4703 tree rhs1 = gimple_assign_rhs1 (gs: im);
4704 gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
4705 tree ovf_lhs = NULL_TREE;
4706 tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
4707 if (gimple_call_internal_p (gs: ovf, fn: code == PLUS_EXPR
4708 ? IFN_ADD_OVERFLOW
4709 : IFN_SUB_OVERFLOW))
4710 {
4711 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
4712 This is for the case of 2 chained .UADDC/.USUBC,
4713 where the first one uses 0 carry-in and the second
4714 one ignores the carry-out.
4715 So, something like:
4716 _16 = .ADD_OVERFLOW (_1, _2);
4717 _17 = REALPART_EXPR <_16>;
4718 _18 = IMAGPART_EXPR <_16>;
4719 _15 = _3 + _4;
4720 _12 = _15 + _18;
4721 where the first 3 statements come from the lower
4722 limb addition and the last 2 from the higher limb
4723 which ignores carry-out. */
4724 ovf_lhs = gimple_call_lhs (gs: ovf);
4725 tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
4726 ovf_arg1 = gimple_call_arg (gs: ovf, index: 0);
4727 ovf_arg2 = gimple_call_arg (gs: ovf, index: 1);
4728 /* In that case we need to punt if the types don't
4729 mismatch. */
4730 if (!types_compatible_p (type1: type, type2: ovf_lhs_type)
4731 || !types_compatible_p (type1: type, TREE_TYPE (ovf_arg1))
4732 || !types_compatible_p (type1: type,
4733 TREE_TYPE (ovf_arg2)))
4734 ovf_lhs = NULL_TREE;
4735 else
4736 {
4737 for (int i = (code == PLUS_EXPR ? 1 : 0);
4738 i >= 0; --i)
4739 {
4740 tree r = gimple_call_arg (gs: ovf, index: i);
4741 if (TREE_CODE (r) != SSA_NAME)
4742 continue;
4743 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
4744 part: REALPART_EXPR))
4745 {
4746 /* Punt if one of the args which isn't
4747 subtracted isn't __real__; that could
4748 then prevent better match later.
4749 Consider:
4750 _3 = .ADD_OVERFLOW (_1, _2);
4751 _4 = REALPART_EXPR <_3>;
4752 _5 = IMAGPART_EXPR <_3>;
4753 _7 = .ADD_OVERFLOW (_4, _6);
4754 _8 = REALPART_EXPR <_7>;
4755 _9 = IMAGPART_EXPR <_7>;
4756 _12 = _10 + _11;
4757 _13 = _12 + _9;
4758 _14 = _13 + _5;
4759 We want to match this when called on
4760 the last stmt as a pair of .UADDC calls,
4761 but without this check we could turn
4762 that prematurely on _13 = _12 + _9;
4763 stmt into .UADDC with 0 carry-in just
4764 on the second .ADD_OVERFLOW call and
4765 another replacing the _12 and _13
4766 additions. */
4767 ovf_lhs = NULL_TREE;
4768 break;
4769 }
4770 }
4771 }
4772 if (ovf_lhs)
4773 {
4774 use_operand_p use_p;
4775 imm_use_iterator iter;
4776 tree re_lhs = NULL_TREE;
4777 FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
4778 {
4779 gimple *use_stmt = USE_STMT (use_p);
4780 if (is_gimple_debug (gs: use_stmt))
4781 continue;
4782 if (use_stmt == im)
4783 continue;
4784 if (!uaddc_is_cplxpart (g: use_stmt,
4785 part: REALPART_EXPR))
4786 {
4787 ovf_lhs = NULL_TREE;
4788 break;
4789 }
4790 re_lhs = gimple_assign_lhs (gs: use_stmt);
4791 }
4792 if (ovf_lhs && re_lhs)
4793 {
4794 FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
4795 {
4796 gimple *use_stmt = USE_STMT (use_p);
4797 if (is_gimple_debug (gs: use_stmt))
4798 continue;
4799 internal_fn ifn
4800 = gimple_call_internal_fn (gs: ovf);
4801 /* Punt if the __real__ of lhs is used
4802 in the same .*_OVERFLOW call.
4803 Consider:
4804 _3 = .ADD_OVERFLOW (_1, _2);
4805 _4 = REALPART_EXPR <_3>;
4806 _5 = IMAGPART_EXPR <_3>;
4807 _7 = .ADD_OVERFLOW (_4, _6);
4808 _8 = REALPART_EXPR <_7>;
4809 _9 = IMAGPART_EXPR <_7>;
4810 _12 = _10 + _11;
4811 _13 = _12 + _5;
4812 _14 = _13 + _9;
4813 We want to match this when called on
4814 the last stmt as a pair of .UADDC calls,
4815 but without this check we could turn
4816 that prematurely on _13 = _12 + _5;
4817 stmt into .UADDC with 0 carry-in just
4818 on the first .ADD_OVERFLOW call and
4819 another replacing the _12 and _13
4820 additions. */
4821 if (gimple_call_internal_p (gs: use_stmt, fn: ifn))
4822 {
4823 ovf_lhs = NULL_TREE;
4824 break;
4825 }
4826 }
4827 }
4828 }
4829 }
4830 if ((ovf_lhs
4831 || gimple_call_internal_p (gs: ovf,
4832 fn: code == PLUS_EXPR
4833 ? IFN_UADDC : IFN_USUBC))
4834 && (optab_handler (op: code == PLUS_EXPR
4835 ? uaddc5_optab : usubc5_optab,
4836 TYPE_MODE (type))
4837 != CODE_FOR_nothing))
4838 {
4839 /* And in that case build another .UADDC/.USUBC
4840 call for the most significand limb addition.
4841 Overflow bit is ignored here. */
4842 if (i != 2)
4843 std::swap (a&: rhs[i], b&: rhs[2]);
4844 gimple *g
4845 = gimple_build_call_internal (code == PLUS_EXPR
4846 ? IFN_UADDC
4847 : IFN_USUBC,
4848 3, rhs[0], rhs[1],
4849 rhs[2]);
4850 tree nlhs = make_ssa_name (var: build_complex_type (type));
4851 gimple_call_set_lhs (gs: g, lhs: nlhs);
4852 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4853 tree ilhs = gimple_assign_lhs (gs: stmt);
4854 g = gimple_build_assign (ilhs, REALPART_EXPR,
4855 build1 (REALPART_EXPR,
4856 TREE_TYPE (ilhs),
4857 nlhs));
4858 gsi_replace (gsi, g, true);
4859 /* And if it is initialized from result of __imag__
4860 of .{ADD,SUB}_OVERFLOW call, replace that
4861 call with .U{ADD,SUB}C call with the same arguments,
4862 just 0 added as third argument. This isn't strictly
4863 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
4864 produce the same result, but may result in better
4865 generated code on some targets where the backend can
4866 better prepare in how the result will be used. */
4867 if (ovf_lhs)
4868 {
4869 tree zero = build_zero_cst (type);
4870 g = gimple_build_call_internal (code == PLUS_EXPR
4871 ? IFN_UADDC
4872 : IFN_USUBC,
4873 3, ovf_arg1,
4874 ovf_arg2, zero);
4875 gimple_call_set_lhs (gs: g, lhs: ovf_lhs);
4876 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
4877 gsi_replace (&gsi2, g, true);
4878 }
4879 return true;
4880 }
4881 }
4882 }
4883 return false;
4884 }
4885 if (code == MINUS_EXPR && !rhs[2])
4886 return false;
4887 if (code == MINUS_EXPR)
4888 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
4889 So, for MINUS_EXPR swap the single added rhs operand (others are
4890 subtracted) to rhs[3]. */
4891 std::swap (a&: rhs[0], b&: rhs[3]);
4892 }
4893 /* Walk from both operands of STMT (for +/- even sometimes from
4894 all the 4 addends or 3 subtrahends), see through casts and != 0
4895 statements which would preserve [0, 1] range of values and
4896 check which is initialized from __imag__. */
4897 gimple *im1 = NULL, *im2 = NULL;
4898 for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
4899 if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
4900 {
4901 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4902 im = uaddc_ne0 (g: im);
4903 if (uaddc_is_cplxpart (g: im, part: IMAGPART_EXPR))
4904 {
4905 if (im1 == NULL)
4906 {
4907 im1 = im;
4908 if (i != 0)
4909 std::swap (a&: rhs[0], b&: rhs[i]);
4910 }
4911 else
4912 {
4913 im2 = im;
4914 if (i != 1)
4915 std::swap (a&: rhs[1], b&: rhs[i]);
4916 break;
4917 }
4918 }
4919 }
4920 /* If we don't find at least two, punt. */
4921 if (!im2)
4922 return false;
4923 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
4924 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
4925 uaddc5/usubc5 named pattern for the corresponding mode. */
4926 gimple *ovf1
4927 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
4928 gimple *ovf2
4929 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
4930 internal_fn ifn;
4931 if (!is_gimple_call (gs: ovf1)
4932 || !gimple_call_internal_p (gs: ovf1)
4933 || ((ifn = gimple_call_internal_fn (gs: ovf1)) != IFN_ADD_OVERFLOW
4934 && ifn != IFN_SUB_OVERFLOW)
4935 || !gimple_call_internal_p (gs: ovf2, fn: ifn)
4936 || optab_handler (op: ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
4937 TYPE_MODE (type)) == CODE_FOR_nothing
4938 || (rhs[2]
4939 && optab_handler (op: code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
4940 TYPE_MODE (type)) == CODE_FOR_nothing))
4941 return false;
4942 tree arg1, arg2, arg3 = NULL_TREE;
4943 gimple *re1 = NULL, *re2 = NULL;
4944 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
4945 should be initialized from __real__ of the other of the two calls.
4946 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
4947 second one. */
4948 for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
4949 for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
4950 {
4951 tree arg = gimple_call_arg (gs: ovf, index: i);
4952 if (TREE_CODE (arg) != SSA_NAME)
4953 continue;
4954 re1 = SSA_NAME_DEF_STMT (arg);
4955 if (uaddc_is_cplxpart (g: re1, part: REALPART_EXPR)
4956 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
4957 == (ovf == ovf1 ? ovf2 : ovf1)))
4958 {
4959 if (ovf == ovf1)
4960 {
4961 /* Make sure ovf2 is the .*_OVERFLOW call with argument
4962 initialized from __real__ of ovf1. */
4963 std::swap (a&: rhs[0], b&: rhs[1]);
4964 std::swap (a&: im1, b&: im2);
4965 std::swap (a&: ovf1, b&: ovf2);
4966 }
4967 arg3 = gimple_call_arg (gs: ovf, index: 1 - i);
4968 i = -1;
4969 break;
4970 }
4971 }
4972 if (!arg3)
4973 return false;
4974 arg1 = gimple_call_arg (gs: ovf1, index: 0);
4975 arg2 = gimple_call_arg (gs: ovf1, index: 1);
4976 if (!types_compatible_p (type1: type, TREE_TYPE (arg1)))
4977 return false;
4978 int kind[2] = { 0, 0 };
4979 tree arg_im[2] = { NULL_TREE, NULL_TREE };
4980 /* At least one of arg2 and arg3 should have type compatible
4981 with arg1/rhs[0], and the other one should have value in [0, 1]
4982 range. If both are in [0, 1] range and type compatible with
4983 arg1/rhs[0], try harder to find after looking through casts,
4984 != 0 comparisons which one is initialized to __imag__ of
4985 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
4986 for (int i = 0; i < 2; ++i)
4987 {
4988 tree arg = i == 0 ? arg2 : arg3;
4989 if (types_compatible_p (type1: type, TREE_TYPE (arg)))
4990 kind[i] = 1;
4991 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
4992 || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
4993 && !TYPE_UNSIGNED (TREE_TYPE (arg))))
4994 continue;
4995 if (tree_zero_one_valued_p (arg))
4996 kind[i] |= 2;
4997 if (TREE_CODE (arg) == SSA_NAME)
4998 {
4999 gimple *g = SSA_NAME_DEF_STMT (arg);
5000 if (gimple_assign_cast_p (s: g))
5001 {
5002 tree op = gimple_assign_rhs1 (gs: g);
5003 if (TREE_CODE (op) == SSA_NAME
5004 && INTEGRAL_TYPE_P (TREE_TYPE (op)))
5005 g = SSA_NAME_DEF_STMT (op);
5006 }
5007 g = uaddc_ne0 (g);
5008 if (!uaddc_is_cplxpart (g, part: IMAGPART_EXPR))
5009 continue;
5010 arg_im[i] = gimple_assign_lhs (gs: g);
5011 g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
5012 if (!is_gimple_call (gs: g) || !gimple_call_internal_p (gs: g))
5013 continue;
5014 switch (gimple_call_internal_fn (gs: g))
5015 {
5016 case IFN_ADD_OVERFLOW:
5017 case IFN_SUB_OVERFLOW:
5018 case IFN_UADDC:
5019 case IFN_USUBC:
5020 break;
5021 default:
5022 continue;
5023 }
5024 kind[i] |= 4;
5025 }
5026 }
5027 /* Make arg2 the one with compatible type and arg3 the one
5028 with [0, 1] range. If both is true for both operands,
5029 prefer as arg3 result of __imag__ of some ifn. */
5030 if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
5031 {
5032 std::swap (a&: arg2, b&: arg3);
5033 std::swap (a&: kind[0], b&: kind[1]);
5034 std::swap (a&: arg_im[0], b&: arg_im[1]);
5035 }
5036 if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
5037 return false;
5038 if (!has_single_use (var: gimple_assign_lhs (gs: im1))
5039 || !has_single_use (var: gimple_assign_lhs (gs: im2))
5040 || !has_single_use (var: gimple_assign_lhs (gs: re1))
5041 || num_imm_uses (var: gimple_call_lhs (gs: ovf1)) != 2)
5042 return false;
5043 /* Check that ovf2's result is used in __real__ and set re2
5044 to that statement. */
5045 use_operand_p use_p;
5046 imm_use_iterator iter;
5047 tree lhs = gimple_call_lhs (gs: ovf2);
5048 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5049 {
5050 gimple *use_stmt = USE_STMT (use_p);
5051 if (is_gimple_debug (gs: use_stmt))
5052 continue;
5053 if (use_stmt == im2)
5054 continue;
5055 if (re2)
5056 return false;
5057 if (!uaddc_is_cplxpart (g: use_stmt, part: REALPART_EXPR))
5058 return false;
5059 re2 = use_stmt;
5060 }
5061 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5062 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5063 gimple *g;
5064 if ((kind[1] & 4) != 0 && types_compatible_p (type1: type, TREE_TYPE (arg_im[1])))
5065 arg3 = arg_im[1];
5066 if ((kind[1] & 1) == 0)
5067 {
5068 if (TREE_CODE (arg3) == INTEGER_CST)
5069 arg3 = fold_convert (type, arg3);
5070 else
5071 {
5072 g = gimple_build_assign (make_ssa_name (var: type), NOP_EXPR, arg3);
5073 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5074 arg3 = gimple_assign_lhs (gs: g);
5075 }
5076 }
5077 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5078 ? IFN_UADDC : IFN_USUBC,
5079 3, arg1, arg2, arg3);
5080 tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5081 gimple_call_set_lhs (gs: g, lhs: nlhs);
5082 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5083 /* In the case where stmt is | or ^ of two overflow flags
5084 or addition of those, replace stmt with __imag__ of the above
5085 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5086 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5087 tree ilhs = rhs[2] ? make_ssa_name (var: type) : gimple_assign_lhs (gs: stmt);
5088 g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5089 build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5090 if (rhs[2])
5091 {
5092 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5093 /* Remove some further statements which can't be kept in the IL because
5094 they can use SSA_NAMEs whose setter is going to be removed too. */
5095 for (gimple *g2 : temp_stmts)
5096 {
5097 gsi2 = gsi_for_stmt (g2);
5098 gsi_remove (&gsi2, true);
5099 release_defs (g2);
5100 }
5101 }
5102 else
5103 gsi_replace (gsi, g, true);
5104 /* Remove some statements which can't be kept in the IL because they
5105 use SSA_NAME whose setter is going to be removed too. */
5106 tree rhs1 = rhs[1];
5107 for (int i = 0; i < 2; i++)
5108 if (rhs1 == gimple_assign_lhs (gs: im2))
5109 break;
5110 else
5111 {
5112 g = SSA_NAME_DEF_STMT (rhs1);
5113 rhs1 = gimple_assign_rhs1 (gs: g);
5114 gsi2 = gsi_for_stmt (g);
5115 gsi_remove (&gsi2, true);
5116 release_defs (g);
5117 }
5118 gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5119 gsi2 = gsi_for_stmt (im2);
5120 gsi_remove (&gsi2, true);
5121 release_defs (im2);
5122 /* Replace the re2 statement with __real__ of the newly added
5123 .UADDC/.USUBC call. */
5124 if (re2)
5125 {
5126 gsi2 = gsi_for_stmt (re2);
5127 tree rlhs = gimple_assign_lhs (gs: re2);
5128 g = gimple_build_assign (rlhs, REALPART_EXPR,
5129 build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5130 gsi_replace (&gsi2, g, true);
5131 }
5132 if (rhs[2])
5133 {
5134 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5135 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5136 replace stmt with __real__ of another .UADDC/.USUBC call which
5137 handles the most significant limb. Overflow flag from this is
5138 ignored. */
5139 g = gimple_build_call_internal (code == PLUS_EXPR
5140 ? IFN_UADDC : IFN_USUBC,
5141 3, rhs[3], rhs[2], ilhs);
5142 nlhs = make_ssa_name (TREE_TYPE (lhs));
5143 gimple_call_set_lhs (gs: g, lhs: nlhs);
5144 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5145 ilhs = gimple_assign_lhs (gs: stmt);
5146 g = gimple_build_assign (ilhs, REALPART_EXPR,
5147 build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5148 gsi_replace (gsi, g, true);
5149 }
5150 if (TREE_CODE (arg3) == SSA_NAME)
5151 {
5152 /* When pattern recognizing the second least significant limb
5153 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5154 check if the [0, 1] range argument (i.e. carry in) isn't the
5155 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5156 least significant limb). Again look through casts and != 0. */
5157 gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5158 for (int i = 0; i < 2; ++i)
5159 {
5160 gimple *im4 = uaddc_cast (g: im3);
5161 if (im4 == im3)
5162 break;
5163 else
5164 im3 = im4;
5165 }
5166 im3 = uaddc_ne0 (g: im3);
5167 if (uaddc_is_cplxpart (g: im3, part: IMAGPART_EXPR))
5168 {
5169 gimple *ovf3
5170 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5171 if (gimple_call_internal_p (gs: ovf3, fn: ifn))
5172 {
5173 lhs = gimple_call_lhs (gs: ovf3);
5174 arg1 = gimple_call_arg (gs: ovf3, index: 0);
5175 arg2 = gimple_call_arg (gs: ovf3, index: 1);
5176 if (types_compatible_p (type1: type, TREE_TYPE (TREE_TYPE (lhs)))
5177 && types_compatible_p (type1: type, TREE_TYPE (arg1))
5178 && types_compatible_p (type1: type, TREE_TYPE (arg2)))
5179 {
5180 /* And if it is initialized from result of __imag__
5181 of .{ADD,SUB}_OVERFLOW call, replace that
5182 call with .U{ADD,SUB}C call with the same arguments,
5183 just 0 added as third argument. This isn't strictly
5184 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5185 produce the same result, but may result in better
5186 generated code on some targets where the backend can
5187 better prepare in how the result will be used. */
5188 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5189 ? IFN_UADDC : IFN_USUBC,
5190 3, arg1, arg2,
5191 build_zero_cst (type));
5192 gimple_call_set_lhs (gs: g, lhs);
5193 gsi2 = gsi_for_stmt (ovf3);
5194 gsi_replace (&gsi2, g, true);
5195 }
5196 }
5197 }
5198 }
5199 return true;
5200}
5201
5202/* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5203 (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5204 isn't a direct optab. */
5205
5206static void
5207match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
5208{
5209 tree clhs, crhs;
5210 enum tree_code code;
5211 if (gimple_code (g: stmt) == GIMPLE_COND)
5212 {
5213 clhs = gimple_cond_lhs (gs: stmt);
5214 crhs = gimple_cond_rhs (gs: stmt);
5215 code = gimple_cond_code (gs: stmt);
5216 }
5217 else
5218 {
5219 clhs = gimple_assign_rhs1 (gs: stmt);
5220 crhs = gimple_assign_rhs2 (gs: stmt);
5221 code = gimple_assign_rhs_code (gs: stmt);
5222 }
5223 if (code != EQ_EXPR && code != NE_EXPR)
5224 return;
5225 if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
5226 return;
5227 gimple *call = SSA_NAME_DEF_STMT (clhs);
5228 combined_fn cfn = gimple_call_combined_fn (call);
5229 switch (cfn)
5230 {
5231 CASE_CFN_POPCOUNT:
5232 break;
5233 default:
5234 return;
5235 }
5236 if (!has_single_use (var: clhs))
5237 return;
5238 tree arg = gimple_call_arg (gs: call, index: 0);
5239 tree type = TREE_TYPE (arg);
5240 if (!INTEGRAL_TYPE_P (type))
5241 return;
5242 bool nonzero_arg = tree_expr_nonzero_p (arg);
5243 if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
5244 {
5245 /* Tell expand_POPCOUNT the popcount result is only used in equality
5246 comparison with one, so that it can decide based on rtx costs. */
5247 gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
5248 nonzero_arg ? integer_zero_node
5249 : integer_one_node);
5250 gimple_call_set_lhs (gs: g, lhs: gimple_call_lhs (gs: call));
5251 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5252 gsi_replace (&gsi2, g, true);
5253 return;
5254 }
5255 tree argm1 = make_ssa_name (var: type);
5256 gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
5257 build_int_cst (type, -1));
5258 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5259 g = gimple_build_assign (make_ssa_name (var: type),
5260 nonzero_arg ? BIT_AND_EXPR : BIT_XOR_EXPR,
5261 arg, argm1);
5262 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5263 tree_code cmpcode;
5264 if (nonzero_arg)
5265 {
5266 argm1 = build_zero_cst (type);
5267 cmpcode = code;
5268 }
5269 else
5270 cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
5271 if (gcond *cond = dyn_cast <gcond *> (p: stmt))
5272 {
5273 gimple_cond_set_lhs (gs: cond, lhs: gimple_assign_lhs (gs: g));
5274 gimple_cond_set_rhs (gs: cond, rhs: argm1);
5275 gimple_cond_set_code (gs: cond, code: cmpcode);
5276 }
5277 else
5278 {
5279 gimple_assign_set_rhs1 (gs: stmt, rhs: gimple_assign_lhs (gs: g));
5280 gimple_assign_set_rhs2 (gs: stmt, rhs: argm1);
5281 gimple_assign_set_rhs_code (s: stmt, code: cmpcode);
5282 }
5283 update_stmt (s: stmt);
5284 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
5285 gsi_remove (&gsi2, true);
5286 release_defs (call);
5287}
5288
5289/* Return true if target has support for divmod. */
5290
5291static bool
5292target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5293{
5294 /* If target supports hardware divmod insn, use it for divmod. */
5295 if (optab_handler (op: divmod_optab, mode) != CODE_FOR_nothing)
5296 return true;
5297
5298 /* Check if libfunc for divmod is available. */
5299 rtx libfunc = optab_libfunc (divmod_optab, mode);
5300 if (libfunc != NULL_RTX)
5301 {
5302 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5303 we don't want to use the libfunc even if it exists for given mode. */
5304 machine_mode div_mode;
5305 FOR_EACH_MODE_FROM (div_mode, mode)
5306 if (optab_handler (op: div_optab, mode: div_mode) != CODE_FOR_nothing)
5307 return false;
5308
5309 return targetm.expand_divmod_libfunc != NULL;
5310 }
5311
5312 return false;
5313}
5314
5315/* Check if stmt is candidate for divmod transform. */
5316
5317static bool
5318divmod_candidate_p (gassign *stmt)
5319{
5320 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5321 machine_mode mode = TYPE_MODE (type);
5322 optab divmod_optab, div_optab;
5323
5324 if (TYPE_UNSIGNED (type))
5325 {
5326 divmod_optab = udivmod_optab;
5327 div_optab = udiv_optab;
5328 }
5329 else
5330 {
5331 divmod_optab = sdivmod_optab;
5332 div_optab = sdiv_optab;
5333 }
5334
5335 tree op1 = gimple_assign_rhs1 (gs: stmt);
5336 tree op2 = gimple_assign_rhs2 (gs: stmt);
5337
5338 /* Disable the transform if either is a constant, since division-by-constant
5339 may have specialized expansion. */
5340 if (CONSTANT_CLASS_P (op1))
5341 return false;
5342
5343 if (CONSTANT_CLASS_P (op2))
5344 {
5345 if (integer_pow2p (op2))
5346 return false;
5347
5348 if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5349 && element_precision (type) <= BITS_PER_WORD)
5350 return false;
5351
5352 /* If the divisor is not power of 2 and the precision wider than
5353 HWI, expand_divmod punts on that, so in that case it is better
5354 to use divmod optab or libfunc. Similarly if choose_multiplier
5355 might need pre/post shifts of BITS_PER_WORD or more. */
5356 }
5357
5358 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5359 expand using the [su]divv optabs. */
5360 if (TYPE_OVERFLOW_TRAPS (type))
5361 return false;
5362
5363 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5364 return false;
5365
5366 return true;
5367}
5368
5369/* This function looks for:
5370 t1 = a TRUNC_DIV_EXPR b;
5371 t2 = a TRUNC_MOD_EXPR b;
5372 and transforms it to the following sequence:
5373 complex_tmp = DIVMOD (a, b);
5374 t1 = REALPART_EXPR(a);
5375 t2 = IMAGPART_EXPR(b);
5376 For conditions enabling the transform see divmod_candidate_p().
5377
5378 The pass has three parts:
5379 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5380 other trunc_div_expr and trunc_mod_expr stmts.
5381 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5382 to stmts vector.
5383 3) Insert DIVMOD call just before top_stmt and update entries in
5384 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5385 IMAGPART_EXPR for mod). */
5386
5387static bool
5388convert_to_divmod (gassign *stmt)
5389{
5390 if (stmt_can_throw_internal (cfun, stmt)
5391 || !divmod_candidate_p (stmt))
5392 return false;
5393
5394 tree op1 = gimple_assign_rhs1 (gs: stmt);
5395 tree op2 = gimple_assign_rhs2 (gs: stmt);
5396
5397 imm_use_iterator use_iter;
5398 gimple *use_stmt;
5399 auto_vec<gimple *> stmts;
5400
5401 gimple *top_stmt = stmt;
5402 basic_block top_bb = gimple_bb (g: stmt);
5403
5404 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5405 at-least stmt and possibly other trunc_div/trunc_mod stmts
5406 having same operands as stmt. */
5407
5408 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5409 {
5410 if (is_gimple_assign (gs: use_stmt)
5411 && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR
5412 || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR)
5413 && operand_equal_p (op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0)
5414 && operand_equal_p (op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0))
5415 {
5416 if (stmt_can_throw_internal (cfun, use_stmt))
5417 continue;
5418
5419 basic_block bb = gimple_bb (g: use_stmt);
5420
5421 if (bb == top_bb)
5422 {
5423 if (gimple_uid (g: use_stmt) < gimple_uid (g: top_stmt))
5424 top_stmt = use_stmt;
5425 }
5426 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5427 {
5428 top_bb = bb;
5429 top_stmt = use_stmt;
5430 }
5431 }
5432 }
5433
5434 tree top_op1 = gimple_assign_rhs1 (gs: top_stmt);
5435 tree top_op2 = gimple_assign_rhs2 (gs: top_stmt);
5436
5437 stmts.safe_push (obj: top_stmt);
5438 bool div_seen = (gimple_assign_rhs_code (gs: top_stmt) == TRUNC_DIV_EXPR);
5439
5440 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5441 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5442 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5443 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5444
5445 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5446 {
5447 if (is_gimple_assign (gs: use_stmt)
5448 && (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR
5449 || gimple_assign_rhs_code (gs: use_stmt) == TRUNC_MOD_EXPR)
5450 && operand_equal_p (top_op1, gimple_assign_rhs1 (gs: use_stmt), flags: 0)
5451 && operand_equal_p (top_op2, gimple_assign_rhs2 (gs: use_stmt), flags: 0))
5452 {
5453 if (use_stmt == top_stmt
5454 || stmt_can_throw_internal (cfun, use_stmt)
5455 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (g: use_stmt), top_bb))
5456 continue;
5457
5458 stmts.safe_push (obj: use_stmt);
5459 if (gimple_assign_rhs_code (gs: use_stmt) == TRUNC_DIV_EXPR)
5460 div_seen = true;
5461 }
5462 }
5463
5464 if (!div_seen)
5465 return false;
5466
5467 /* Part 3: Create libcall to internal fn DIVMOD:
5468 divmod_tmp = DIVMOD (op1, op2). */
5469
5470 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5471 tree res = make_temp_ssa_name (type: build_complex_type (TREE_TYPE (op1)),
5472 stmt: call_stmt, name: "divmod_tmp");
5473 gimple_call_set_lhs (gs: call_stmt, lhs: res);
5474 /* We rejected throwing statements above. */
5475 gimple_call_set_nothrow (s: call_stmt, nothrow_p: true);
5476
5477 /* Insert the call before top_stmt. */
5478 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5479 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5480
5481 widen_mul_stats.divmod_calls_inserted++;
5482
5483 /* Update all statements in stmts vector:
5484 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5485 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5486
5487 for (unsigned i = 0; stmts.iterate (ix: i, ptr: &use_stmt); ++i)
5488 {
5489 tree new_rhs;
5490
5491 switch (gimple_assign_rhs_code (gs: use_stmt))
5492 {
5493 case TRUNC_DIV_EXPR:
5494 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5495 break;
5496
5497 case TRUNC_MOD_EXPR:
5498 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5499 break;
5500
5501 default:
5502 gcc_unreachable ();
5503 }
5504
5505 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
5506 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
5507 update_stmt (s: use_stmt);
5508 }
5509
5510 return true;
5511}
5512
5513/* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5514 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5515 value is true iff we converted the statement. */
5516
5517static bool
5518convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
5519{
5520 tree lhs = gimple_assign_lhs (gs: stmt);
5521 tree stype = TREE_TYPE (lhs);
5522 tree sarg0 = gimple_assign_rhs1 (gs: stmt);
5523 tree sarg1 = gimple_assign_rhs2 (gs: stmt);
5524
5525 if (TREE_CODE (stype) != INTEGER_TYPE
5526 || TREE_CODE (sarg1) != INTEGER_CST
5527 || TREE_CODE (sarg0) != SSA_NAME
5528 || !tree_fits_uhwi_p (sarg1)
5529 || !has_single_use (var: sarg0))
5530 return false;
5531
5532 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
5533 if (!def)
5534 return false;
5535
5536 enum tree_code mcode = gimple_assign_rhs_code (gs: def);
5537 if (mcode == NOP_EXPR)
5538 {
5539 tree tmp = gimple_assign_rhs1 (gs: def);
5540 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (var: tmp))
5541 return false;
5542 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
5543 if (!def)
5544 return false;
5545 mcode = gimple_assign_rhs_code (gs: def);
5546 }
5547
5548 if (mcode != WIDEN_MULT_EXPR
5549 || gimple_bb (g: def) != gimple_bb (g: stmt))
5550 return false;
5551 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
5552 if (TREE_CODE (mtype) != INTEGER_TYPE
5553 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
5554 return false;
5555
5556 tree mop1 = gimple_assign_rhs1 (gs: def);
5557 tree mop2 = gimple_assign_rhs2 (gs: def);
5558 tree optype = TREE_TYPE (mop1);
5559 bool unsignedp = TYPE_UNSIGNED (optype);
5560 unsigned int prec = TYPE_PRECISION (optype);
5561
5562 if (unsignedp != TYPE_UNSIGNED (mtype)
5563 || TYPE_PRECISION (mtype) != 2 * prec)
5564 return false;
5565
5566 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
5567 if (bits < prec || bits >= 2 * prec)
5568 return false;
5569
5570 /* For the time being, require operands to have the same sign. */
5571 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
5572 return false;
5573
5574 machine_mode mode = TYPE_MODE (optype);
5575 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
5576 if (optab_handler (op: tab, mode) == CODE_FOR_nothing)
5577 return false;
5578
5579 location_t loc = gimple_location (g: stmt);
5580 tree highpart1 = build_and_insert_binop (gsi, loc, name: "highparttmp",
5581 code: MULT_HIGHPART_EXPR, arg0: mop1, arg1: mop2);
5582 tree highpart2 = highpart1;
5583 tree ntype = optype;
5584
5585 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
5586 {
5587 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
5588 : signed_type_for (optype);
5589 highpart2 = build_and_insert_cast (gsi, loc, type: ntype, val: highpart1);
5590 }
5591 if (bits > prec)
5592 highpart2 = build_and_insert_binop (gsi, loc, name: "highparttmp",
5593 code: RSHIFT_EXPR, arg0: highpart2,
5594 arg1: build_int_cst (ntype, bits - prec));
5595
5596 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
5597 gsi_replace (gsi, new_stmt, true);
5598
5599 widen_mul_stats.highpart_mults_inserted++;
5600 return true;
5601}
5602
5603/* If target has spaceship<MODE>3 expander, pattern recognize
5604 <bb 2> [local count: 1073741824]:
5605 if (a_2(D) == b_3(D))
5606 goto <bb 6>; [34.00%]
5607 else
5608 goto <bb 3>; [66.00%]
5609
5610 <bb 3> [local count: 708669601]:
5611 if (a_2(D) < b_3(D))
5612 goto <bb 6>; [1.04%]
5613 else
5614 goto <bb 4>; [98.96%]
5615
5616 <bb 4> [local count: 701299439]:
5617 if (a_2(D) > b_3(D))
5618 goto <bb 5>; [48.89%]
5619 else
5620 goto <bb 6>; [51.11%]
5621
5622 <bb 5> [local count: 342865295]:
5623
5624 <bb 6> [local count: 1073741824]:
5625 and turn it into:
5626 <bb 2> [local count: 1073741824]:
5627 _1 = .SPACESHIP (a_2(D), b_3(D));
5628 if (_1 == 0)
5629 goto <bb 6>; [34.00%]
5630 else
5631 goto <bb 3>; [66.00%]
5632
5633 <bb 3> [local count: 708669601]:
5634 if (_1 == -1)
5635 goto <bb 6>; [1.04%]
5636 else
5637 goto <bb 4>; [98.96%]
5638
5639 <bb 4> [local count: 701299439]:
5640 if (_1 == 1)
5641 goto <bb 5>; [48.89%]
5642 else
5643 goto <bb 6>; [51.11%]
5644
5645 <bb 5> [local count: 342865295]:
5646
5647 <bb 6> [local count: 1073741824]:
5648 so that the backend can emit optimal comparison and
5649 conditional jump sequence. */
5650
5651static void
5652optimize_spaceship (gcond *stmt)
5653{
5654 enum tree_code code = gimple_cond_code (gs: stmt);
5655 if (code != EQ_EXPR && code != NE_EXPR)
5656 return;
5657 tree arg1 = gimple_cond_lhs (gs: stmt);
5658 tree arg2 = gimple_cond_rhs (gs: stmt);
5659 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
5660 || optab_handler (op: spaceship_optab,
5661 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
5662 || operand_equal_p (arg1, arg2, flags: 0))
5663 return;
5664
5665 basic_block bb0 = gimple_bb (g: stmt), bb1, bb2 = NULL;
5666 edge em1 = NULL, e1 = NULL, e2 = NULL;
5667 bb1 = EDGE_SUCC (bb0, 1)->dest;
5668 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
5669 bb1 = EDGE_SUCC (bb0, 0)->dest;
5670
5671 gcond *g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb1));
5672 if (g == NULL
5673 || !single_pred_p (bb: bb1)
5674 || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
5675 ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0)
5676 : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0)
5677 || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0)))
5678 || !cond_only_block_p (bb1))
5679 return;
5680
5681 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
5682 ? LT_EXPR : GT_EXPR);
5683 switch (gimple_cond_code (gs: g))
5684 {
5685 case LT_EXPR:
5686 case LE_EXPR:
5687 break;
5688 case GT_EXPR:
5689 case GE_EXPR:
5690 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
5691 break;
5692 default:
5693 return;
5694 }
5695
5696 for (int i = 0; i < 2; ++i)
5697 {
5698 /* With NaNs, </<=/>/>= are false, so we need to look for the
5699 third comparison on the false edge from whatever non-equality
5700 comparison the second comparison is. */
5701 if (HONOR_NANS (TREE_TYPE (arg1))
5702 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
5703 continue;
5704
5705 bb2 = EDGE_SUCC (bb1, i)->dest;
5706 g = safe_dyn_cast <gcond *> (p: *gsi_last_bb (bb: bb2));
5707 if (g == NULL
5708 || !single_pred_p (bb: bb2)
5709 || (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0)
5710 ? !operand_equal_p (gimple_cond_rhs (gs: g), arg2, flags: 0)
5711 : (!operand_equal_p (gimple_cond_lhs (gs: g), arg2, flags: 0)
5712 || !operand_equal_p (gimple_cond_rhs (gs: g), arg1, flags: 0)))
5713 || !cond_only_block_p (bb2)
5714 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
5715 continue;
5716
5717 enum tree_code ccode2
5718 = (operand_equal_p (gimple_cond_lhs (gs: g), arg1, flags: 0) ? LT_EXPR : GT_EXPR);
5719 switch (gimple_cond_code (gs: g))
5720 {
5721 case LT_EXPR:
5722 case LE_EXPR:
5723 break;
5724 case GT_EXPR:
5725 case GE_EXPR:
5726 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
5727 break;
5728 default:
5729 continue;
5730 }
5731 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
5732 continue;
5733
5734 if ((ccode == LT_EXPR)
5735 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
5736 {
5737 em1 = EDGE_SUCC (bb1, 1 - i);
5738 e1 = EDGE_SUCC (bb2, 0);
5739 e2 = EDGE_SUCC (bb2, 1);
5740 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
5741 std::swap (a&: e1, b&: e2);
5742 }
5743 else
5744 {
5745 e1 = EDGE_SUCC (bb1, 1 - i);
5746 em1 = EDGE_SUCC (bb2, 0);
5747 e2 = EDGE_SUCC (bb2, 1);
5748 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
5749 std::swap (a&: em1, b&: e2);
5750 }
5751 break;
5752 }
5753
5754 if (em1 == NULL)
5755 {
5756 if ((ccode == LT_EXPR)
5757 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
5758 {
5759 em1 = EDGE_SUCC (bb1, 1);
5760 e1 = EDGE_SUCC (bb1, 0);
5761 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5762 }
5763 else
5764 {
5765 em1 = EDGE_SUCC (bb1, 0);
5766 e1 = EDGE_SUCC (bb1, 1);
5767 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5768 }
5769 }
5770
5771 gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
5772 tree lhs = make_ssa_name (integer_type_node);
5773 gimple_call_set_lhs (gs: gc, lhs);
5774 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5775 gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
5776
5777 gimple_cond_set_lhs (gs: stmt, lhs);
5778 gimple_cond_set_rhs (gs: stmt, integer_zero_node);
5779 update_stmt (s: stmt);
5780
5781 gcond *cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb1));
5782 gimple_cond_set_lhs (gs: cond, lhs);
5783 if (em1->src == bb1 && e2 != em1)
5784 {
5785 gimple_cond_set_rhs (gs: cond, integer_minus_one_node);
5786 gimple_cond_set_code (gs: cond, code: (em1->flags & EDGE_TRUE_VALUE)
5787 ? EQ_EXPR : NE_EXPR);
5788 }
5789 else
5790 {
5791 gcc_assert (e1->src == bb1 && e2 != e1);
5792 gimple_cond_set_rhs (gs: cond, integer_one_node);
5793 gimple_cond_set_code (gs: cond, code: (e1->flags & EDGE_TRUE_VALUE)
5794 ? EQ_EXPR : NE_EXPR);
5795 }
5796 update_stmt (s: cond);
5797
5798 if (e2 != e1 && e2 != em1)
5799 {
5800 cond = as_a <gcond *> (p: *gsi_last_bb (bb: bb2));
5801 gimple_cond_set_lhs (gs: cond, lhs);
5802 if (em1->src == bb2)
5803 gimple_cond_set_rhs (gs: cond, integer_minus_one_node);
5804 else
5805 {
5806 gcc_assert (e1->src == bb2);
5807 gimple_cond_set_rhs (gs: cond, integer_one_node);
5808 }
5809 gimple_cond_set_code (gs: cond,
5810 code: (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
5811 update_stmt (s: cond);
5812 }
5813
5814 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
5815 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
5816 value_range vr (TREE_TYPE (lhs), wm1, w2);
5817 set_range_info (lhs, vr);
5818}
5819
5820
5821/* Find integer multiplications where the operands are extended from
5822 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
5823 or MULT_HIGHPART_EXPR where appropriate. */
5824
5825namespace {
5826
5827const pass_data pass_data_optimize_widening_mul =
5828{
5829 .type: GIMPLE_PASS, /* type */
5830 .name: "widening_mul", /* name */
5831 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
5832 .tv_id: TV_TREE_WIDEN_MUL, /* tv_id */
5833 PROP_ssa, /* properties_required */
5834 .properties_provided: 0, /* properties_provided */
5835 .properties_destroyed: 0, /* properties_destroyed */
5836 .todo_flags_start: 0, /* todo_flags_start */
5837 TODO_update_ssa, /* todo_flags_finish */
5838};
5839
5840class pass_optimize_widening_mul : public gimple_opt_pass
5841{
5842public:
5843 pass_optimize_widening_mul (gcc::context *ctxt)
5844 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
5845 {}
5846
5847 /* opt_pass methods: */
5848 bool gate (function *) final override
5849 {
5850 return flag_expensive_optimizations && optimize;
5851 }
5852
5853 unsigned int execute (function *) final override;
5854
5855}; // class pass_optimize_widening_mul
5856
5857/* Walker class to perform the transformation in reverse dominance order. */
5858
5859class math_opts_dom_walker : public dom_walker
5860{
5861public:
5862 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
5863 if walking modidifes the CFG. */
5864
5865 math_opts_dom_walker (bool *cfg_changed_p)
5866 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
5867 m_cfg_changed_p (cfg_changed_p) {}
5868
5869 /* The actual actions performed in the walk. */
5870
5871 void after_dom_children (basic_block) final override;
5872
5873 /* Set of results of chains of multiply and add statement combinations that
5874 were not transformed into FMAs because of active deferring. */
5875 hash_set<tree> m_last_result_set;
5876
5877 /* Pointer to a flag of the user that needs to be set if CFG has been
5878 modified. */
5879 bool *m_cfg_changed_p;
5880};
5881
5882void
5883math_opts_dom_walker::after_dom_children (basic_block bb)
5884{
5885 gimple_stmt_iterator gsi;
5886
5887 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5888
5889 for (gsi = gsi_after_labels (bb); !gsi_end_p (i: gsi);)
5890 {
5891 gimple *stmt = gsi_stmt (i: gsi);
5892 enum tree_code code;
5893
5894 if (is_gimple_assign (gs: stmt))
5895 {
5896 code = gimple_assign_rhs_code (gs: stmt);
5897 switch (code)
5898 {
5899 case MULT_EXPR:
5900 if (!convert_mult_to_widen (stmt, gsi: &gsi)
5901 && !convert_expand_mult_copysign (stmt, gsi: &gsi)
5902 && convert_mult_to_fma (mul_stmt: stmt,
5903 op1: gimple_assign_rhs1 (gs: stmt),
5904 op2: gimple_assign_rhs2 (gs: stmt),
5905 state: &fma_state))
5906 {
5907 gsi_remove (&gsi, true);
5908 release_defs (stmt);
5909 continue;
5910 }
5911 match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p);
5912 break;
5913
5914 case PLUS_EXPR:
5915 case MINUS_EXPR:
5916 if (!convert_plusminus_to_widen (gsi: &gsi, stmt, code))
5917 {
5918 match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p);
5919 if (gsi_stmt (i: gsi) == stmt)
5920 match_uaddc_usubc (gsi: &gsi, stmt, code);
5921 }
5922 break;
5923
5924 case BIT_NOT_EXPR:
5925 if (match_arith_overflow (gsi: &gsi, stmt, code, cfg_changed: m_cfg_changed_p))
5926 continue;
5927 break;
5928
5929 case TRUNC_MOD_EXPR:
5930 convert_to_divmod (stmt: as_a<gassign *> (p: stmt));
5931 break;
5932
5933 case RSHIFT_EXPR:
5934 convert_mult_to_highpart (stmt: as_a<gassign *> (p: stmt), gsi: &gsi);
5935 break;
5936
5937 case BIT_IOR_EXPR:
5938 case BIT_XOR_EXPR:
5939 match_uaddc_usubc (gsi: &gsi, stmt, code);
5940 break;
5941
5942 case EQ_EXPR:
5943 case NE_EXPR:
5944 match_single_bit_test (gsi: &gsi, stmt);
5945 break;
5946
5947 default:;
5948 }
5949 }
5950 else if (is_gimple_call (gs: stmt))
5951 {
5952 switch (gimple_call_combined_fn (stmt))
5953 {
5954 CASE_CFN_POW:
5955 if (gimple_call_lhs (gs: stmt)
5956 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5957 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5958 &dconst2)
5959 && convert_mult_to_fma (mul_stmt: stmt,
5960 op1: gimple_call_arg (gs: stmt, index: 0),
5961 op2: gimple_call_arg (gs: stmt, index: 0),
5962 state: &fma_state))
5963 {
5964 unlink_stmt_vdef (stmt);
5965 if (gsi_remove (&gsi, true)
5966 && gimple_purge_dead_eh_edges (bb))
5967 *m_cfg_changed_p = true;
5968 release_defs (stmt);
5969 continue;
5970 }
5971 break;
5972
5973 case CFN_COND_MUL:
5974 if (convert_mult_to_fma (mul_stmt: stmt,
5975 op1: gimple_call_arg (gs: stmt, index: 1),
5976 op2: gimple_call_arg (gs: stmt, index: 2),
5977 state: &fma_state,
5978 mul_cond: gimple_call_arg (gs: stmt, index: 0)))
5979
5980 {
5981 gsi_remove (&gsi, true);
5982 release_defs (stmt);
5983 continue;
5984 }
5985 break;
5986
5987 case CFN_COND_LEN_MUL:
5988 if (convert_mult_to_fma (mul_stmt: stmt,
5989 op1: gimple_call_arg (gs: stmt, index: 1),
5990 op2: gimple_call_arg (gs: stmt, index: 2),
5991 state: &fma_state,
5992 mul_cond: gimple_call_arg (gs: stmt, index: 0),
5993 mul_len: gimple_call_arg (gs: stmt, index: 4),
5994 mul_bias: gimple_call_arg (gs: stmt, index: 5)))
5995
5996 {
5997 gsi_remove (&gsi, true);
5998 release_defs (stmt);
5999 continue;
6000 }
6001 break;
6002
6003 case CFN_LAST:
6004 cancel_fma_deferring (state: &fma_state);
6005 break;
6006
6007 default:
6008 break;
6009 }
6010 }
6011 else if (gimple_code (g: stmt) == GIMPLE_COND)
6012 {
6013 match_single_bit_test (gsi: &gsi, stmt);
6014 optimize_spaceship (stmt: as_a <gcond *> (p: stmt));
6015 }
6016 gsi_next (i: &gsi);
6017 }
6018 if (fma_state.m_deferring_p
6019 && fma_state.m_initial_phi)
6020 {
6021 gcc_checking_assert (fma_state.m_last_result);
6022 if (!last_fma_candidate_feeds_initial_phi (state: &fma_state,
6023 last_result_set: &m_last_result_set))
6024 cancel_fma_deferring (state: &fma_state);
6025 else
6026 m_last_result_set.add (k: fma_state.m_last_result);
6027 }
6028}
6029
6030
6031unsigned int
6032pass_optimize_widening_mul::execute (function *fun)
6033{
6034 bool cfg_changed = false;
6035
6036 memset (s: &widen_mul_stats, c: 0, n: sizeof (widen_mul_stats));
6037 calculate_dominance_info (CDI_DOMINATORS);
6038 renumber_gimple_stmt_uids (cfun);
6039
6040 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6041
6042 statistics_counter_event (fun, "widening multiplications inserted",
6043 widen_mul_stats.widen_mults_inserted);
6044 statistics_counter_event (fun, "widening maccs inserted",
6045 widen_mul_stats.maccs_inserted);
6046 statistics_counter_event (fun, "fused multiply-adds inserted",
6047 widen_mul_stats.fmas_inserted);
6048 statistics_counter_event (fun, "divmod calls inserted",
6049 widen_mul_stats.divmod_calls_inserted);
6050 statistics_counter_event (fun, "highpart multiplications inserted",
6051 widen_mul_stats.highpart_mults_inserted);
6052
6053 return cfg_changed ? TODO_cleanup_cfg : 0;
6054}
6055
6056} // anon namespace
6057
6058gimple_opt_pass *
6059make_pass_optimize_widening_mul (gcc::context *ctxt)
6060{
6061 return new pass_optimize_widening_mul (ctxt);
6062}
6063

source code of gcc/tree-ssa-math-opts.cc