1 | /* SLP - Basic Block Vectorization |
---|---|

2 | Copyright (C) 2007-2017 Free Software Foundation, Inc. |

3 | Contributed by Dorit Naishlos <dorit@il.ibm.com> |

4 | and Ira Rosen <irar@il.ibm.com> |

5 | |

6 | This file is part of GCC. |

7 | |

8 | GCC is free software; you can redistribute it and/or modify it under |

9 | the terms of the GNU General Public License as published by the Free |

10 | Software Foundation; either version 3, or (at your option) any later |

11 | version. |

12 | |

13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |

14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |

15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |

16 | for more details. |

17 | |

18 | You should have received a copy of the GNU General Public License |

19 | along with GCC; see the file COPYING3. If not see |

20 | <http://www.gnu.org/licenses/>. */ |

21 | |

22 | #include "config.h" |

23 | #include "system.h" |

24 | #include "coretypes.h" |

25 | #include "backend.h" |

26 | #include "target.h" |

27 | #include "rtl.h" |

28 | #include "tree.h" |

29 | #include "gimple.h" |

30 | #include "tree-pass.h" |

31 | #include "ssa.h" |

32 | #include "optabs-tree.h" |

33 | #include "insn-config.h" |

34 | #include "recog.h" /* FIXME: for insn_data */ |

35 | #include "params.h" |

36 | #include "fold-const.h" |

37 | #include "stor-layout.h" |

38 | #include "gimple-iterator.h" |

39 | #include "cfgloop.h" |

40 | #include "tree-vectorizer.h" |

41 | #include "langhooks.h" |

42 | #include "gimple-walk.h" |

43 | #include "dbgcnt.h" |

44 | #include "tree-vector-builder.h" |

45 | |

46 | |

47 | /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ |

48 | |

49 | static void |

50 | vect_free_slp_tree (slp_tree node) |

51 | { |

52 | int i; |

53 | slp_tree child; |

54 | |

55 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

56 | vect_free_slp_tree (child); |

57 | |

58 | gimple *stmt; |

59 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

60 | /* After transform some stmts are removed and thus their vinfo is gone. */ |

61 | if (vinfo_for_stmt (stmt)) |

62 | { |

63 | gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0); |

64 | STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--; |

65 | } |

66 | |

67 | SLP_TREE_CHILDREN (node).release (); |

68 | SLP_TREE_SCALAR_STMTS (node).release (); |

69 | SLP_TREE_VEC_STMTS (node).release (); |

70 | SLP_TREE_LOAD_PERMUTATION (node).release (); |

71 | |

72 | free (node); |

73 | } |

74 | |

75 | |

76 | /* Free the memory allocated for the SLP instance. */ |

77 | |

78 | void |

79 | vect_free_slp_instance (slp_instance instance) |

80 | { |

81 | vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); |

82 | SLP_INSTANCE_LOADS (instance).release (); |

83 | free (instance); |

84 | } |

85 | |

86 | |

87 | /* Create an SLP node for SCALAR_STMTS. */ |

88 | |

89 | static slp_tree |

90 | vect_create_new_slp_node (vec<gimple *> scalar_stmts) |

91 | { |

92 | slp_tree node; |

93 | gimple *stmt = scalar_stmts[0]; |

94 | unsigned int nops; |

95 | |

96 | if (is_gimple_call (stmt)) |

97 | nops = gimple_call_num_args (stmt); |

98 | else if (is_gimple_assign (stmt)) |

99 | { |

100 | nops = gimple_num_ops (stmt) - 1; |

101 | if (gimple_assign_rhs_code (stmt) == COND_EXPR) |

102 | nops++; |

103 | } |

104 | else if (gimple_code (stmt) == GIMPLE_PHI) |

105 | nops = 0; |

106 | else |

107 | return NULL; |

108 | |

109 | node = XNEW (struct _slp_tree); |

110 | SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; |

111 | SLP_TREE_VEC_STMTS (node).create (0); |

112 | SLP_TREE_CHILDREN (node).create (nops); |

113 | SLP_TREE_LOAD_PERMUTATION (node) = vNULL; |

114 | SLP_TREE_TWO_OPERATORS (node) = false; |

115 | SLP_TREE_DEF_TYPE (node) = vect_internal_def; |

116 | |

117 | unsigned i; |

118 | FOR_EACH_VEC_ELT (scalar_stmts, i, stmt) |

119 | STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++; |

120 | |

121 | return node; |

122 | } |

123 | |

124 | |

125 | /* This structure is used in creation of an SLP tree. Each instance |

126 | corresponds to the same operand in a group of scalar stmts in an SLP |

127 | node. */ |

128 | typedef struct _slp_oprnd_info |

129 | { |

130 | /* Def-stmts for the operands. */ |

131 | vec<gimple *> def_stmts; |

132 | /* Information about the first statement, its vector def-type, type, the |

133 | operand itself in case it's constant, and an indication if it's a pattern |

134 | stmt. */ |

135 | tree first_op_type; |

136 | enum vect_def_type first_dt; |

137 | bool first_pattern; |

138 | bool second_pattern; |

139 | } *slp_oprnd_info; |

140 | |

141 | |

142 | /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each |

143 | operand. */ |

144 | static vec<slp_oprnd_info> |

145 | vect_create_oprnd_info (int nops, int group_size) |

146 | { |

147 | int i; |

148 | slp_oprnd_info oprnd_info; |

149 | vec<slp_oprnd_info> oprnds_info; |

150 | |

151 | oprnds_info.create (nops); |

152 | for (i = 0; i < nops; i++) |

153 | { |

154 | oprnd_info = XNEW (struct _slp_oprnd_info); |

155 | oprnd_info->def_stmts.create (group_size); |

156 | oprnd_info->first_dt = vect_uninitialized_def; |

157 | oprnd_info->first_op_type = NULL_TREE; |

158 | oprnd_info->first_pattern = false; |

159 | oprnd_info->second_pattern = false; |

160 | oprnds_info.quick_push (oprnd_info); |

161 | } |

162 | |

163 | return oprnds_info; |

164 | } |

165 | |

166 | |

167 | /* Free operands info. */ |

168 | |

169 | static void |

170 | vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info) |

171 | { |

172 | int i; |

173 | slp_oprnd_info oprnd_info; |

174 | |

175 | FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |

176 | { |

177 | oprnd_info->def_stmts.release (); |

178 | XDELETE (oprnd_info); |

179 | } |

180 | |

181 | oprnds_info.release (); |

182 | } |

183 | |

184 | |

185 | /* Find the place of the data-ref in STMT in the interleaving chain that starts |

186 | from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ |

187 | |

188 | static int |

189 | vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt) |

190 | { |

191 | gimple *next_stmt = first_stmt; |

192 | int result = 0; |

193 | |

194 | if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |

195 | return -1; |

196 | |

197 | do |

198 | { |

199 | if (next_stmt == stmt) |

200 | return result; |

201 | next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); |

202 | if (next_stmt) |

203 | result += GROUP_GAP (vinfo_for_stmt (next_stmt)); |

204 | } |

205 | while (next_stmt); |

206 | |

207 | return -1; |

208 | } |

209 | |

210 | |

211 | /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that |

212 | they are of a valid type and that they match the defs of the first stmt of |

213 | the SLP group (stored in OPRNDS_INFO). This function tries to match stmts |

214 | by swapping operands of STMT when possible. Non-zero *SWAP indicates swap |

215 | is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond |

216 | and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond |

217 | and code of comparison needs to be inverted. If there is any operand swap |

218 | in this function, *SWAP is set to non-zero value. |

219 | If there was a fatal error return -1; if the error could be corrected by |

220 | swapping operands of father node of this one, return 1; if everything is |

221 | ok return 0. */ |

222 | |

223 | static int |

224 | vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, |

225 | gimple *stmt, unsigned stmt_num, |

226 | vec<slp_oprnd_info> *oprnds_info) |

227 | { |

228 | tree oprnd; |

229 | unsigned int i, number_of_oprnds; |

230 | gimple *def_stmt; |

231 | enum vect_def_type dt = vect_uninitialized_def; |

232 | bool pattern = false; |

233 | slp_oprnd_info oprnd_info; |

234 | int first_op_idx = 1; |

235 | bool commutative = false; |

236 | bool first_op_cond = false; |

237 | bool first = stmt_num == 0; |

238 | bool second = stmt_num == 1; |

239 | |

240 | if (is_gimple_call (stmt)) |

241 | { |

242 | number_of_oprnds = gimple_call_num_args (stmt); |

243 | first_op_idx = 3; |

244 | } |

245 | else if (is_gimple_assign (stmt)) |

246 | { |

247 | enum tree_code code = gimple_assign_rhs_code (stmt); |

248 | number_of_oprnds = gimple_num_ops (stmt) - 1; |

249 | /* Swap can only be done for cond_expr if asked to, otherwise we |

250 | could result in different comparison code to the first stmt. */ |

251 | if (gimple_assign_rhs_code (stmt) == COND_EXPR |

252 | && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt))) |

253 | { |

254 | first_op_cond = true; |

255 | number_of_oprnds++; |

256 | } |

257 | else |

258 | commutative = commutative_tree_code (code); |

259 | } |

260 | else |

261 | return -1; |

262 | |

263 | bool swapped = (*swap != 0); |

264 | gcc_assert (!swapped || first_op_cond); |

265 | for (i = 0; i < number_of_oprnds; i++) |

266 | { |

267 | again: |

268 | if (first_op_cond) |

269 | { |

270 | /* Map indicating how operands of cond_expr should be swapped. */ |

271 | int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; |

272 | int *map = maps[*swap]; |

273 | |

274 | if (i < 2) |

275 | oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]); |

276 | else |

277 | oprnd = gimple_op (stmt, map[i]); |

278 | } |

279 | else |

280 | oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i)); |

281 | |

282 | oprnd_info = (*oprnds_info)[i]; |

283 | |

284 | if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt)) |

285 | { |

286 | if (dump_enabled_p ()) |

287 | { |

288 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

289 | "Build SLP failed: can't analyze def for "); |

290 | dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |

291 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

292 | } |

293 | |

294 | return -1; |

295 | } |

296 | |

297 | /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt |

298 | from the pattern. Check that all the stmts of the node are in the |

299 | pattern. */ |

300 | if (def_stmt && gimple_bb (def_stmt) |

301 | && vect_stmt_in_region_p (vinfo, def_stmt) |

302 | && vinfo_for_stmt (def_stmt) |

303 | && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) |

304 | && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) |

305 | && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) |

306 | { |

307 | pattern = true; |

308 | if (!first && !oprnd_info->first_pattern |

309 | /* Allow different pattern state for the defs of the |

310 | first stmt in reduction chains. */ |

311 | && (oprnd_info->first_dt != vect_reduction_def |

312 | || (!second && !oprnd_info->second_pattern))) |

313 | { |

314 | if (i == 0 |

315 | && !swapped |

316 | && commutative) |

317 | { |

318 | swapped = true; |

319 | goto again; |

320 | } |

321 | |

322 | if (dump_enabled_p ()) |

323 | { |

324 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

325 | "Build SLP failed: some of the stmts" |

326 | " are in a pattern, and others are not "); |

327 | dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |

328 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

329 | } |

330 | |

331 | return 1; |

332 | } |

333 | |

334 | def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); |

335 | dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); |

336 | |

337 | if (dt == vect_unknown_def_type) |

338 | { |

339 | if (dump_enabled_p ()) |

340 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

341 | "Unsupported pattern.\n"); |

342 | return -1; |

343 | } |

344 | |

345 | switch (gimple_code (def_stmt)) |

346 | { |

347 | case GIMPLE_PHI: |

348 | case GIMPLE_ASSIGN: |

349 | break; |

350 | |

351 | default: |

352 | if (dump_enabled_p ()) |

353 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

354 | "unsupported defining stmt:\n"); |

355 | return -1; |

356 | } |

357 | } |

358 | |

359 | if (second) |

360 | oprnd_info->second_pattern = pattern; |

361 | |

362 | if (first) |

363 | { |

364 | oprnd_info->first_dt = dt; |

365 | oprnd_info->first_pattern = pattern; |

366 | oprnd_info->first_op_type = TREE_TYPE (oprnd); |

367 | } |

368 | else |

369 | { |

370 | /* Not first stmt of the group, check that the def-stmt/s match |

371 | the def-stmt/s of the first stmt. Allow different definition |

372 | types for reduction chains: the first stmt must be a |

373 | vect_reduction_def (a phi node), and the rest |

374 | vect_internal_def. */ |

375 | if (((oprnd_info->first_dt != dt |

376 | && !(oprnd_info->first_dt == vect_reduction_def |

377 | && dt == vect_internal_def) |

378 | && !((oprnd_info->first_dt == vect_external_def |

379 | || oprnd_info->first_dt == vect_constant_def) |

380 | && (dt == vect_external_def |

381 | || dt == vect_constant_def))) |

382 | || !types_compatible_p (oprnd_info->first_op_type, |

383 | TREE_TYPE (oprnd)))) |

384 | { |

385 | /* Try swapping operands if we got a mismatch. */ |

386 | if (i == 0 |

387 | && !swapped |

388 | && commutative) |

389 | { |

390 | swapped = true; |

391 | goto again; |

392 | } |

393 | |

394 | if (dump_enabled_p ()) |

395 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

396 | "Build SLP failed: different types\n"); |

397 | |

398 | return 1; |

399 | } |

400 | } |

401 | |

402 | /* Check the types of the definitions. */ |

403 | switch (dt) |

404 | { |

405 | case vect_constant_def: |

406 | case vect_external_def: |

407 | break; |

408 | |

409 | case vect_reduction_def: |

410 | case vect_induction_def: |

411 | case vect_internal_def: |

412 | oprnd_info->def_stmts.quick_push (def_stmt); |

413 | break; |

414 | |

415 | default: |

416 | /* FORNOW: Not supported. */ |

417 | if (dump_enabled_p ()) |

418 | { |

419 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

420 | "Build SLP failed: illegal type of def "); |

421 | dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |

422 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

423 | } |

424 | |

425 | return -1; |

426 | } |

427 | } |

428 | |

429 | /* Swap operands. */ |

430 | if (swapped) |

431 | { |

432 | /* If there are already uses of this stmt in a SLP instance then |

433 | we've committed to the operand order and can't swap it. */ |

434 | if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0) |

435 | { |

436 | if (dump_enabled_p ()) |

437 | { |

438 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

439 | "Build SLP failed: cannot swap operands of " |

440 | "shared stmt "); |

441 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

442 | } |

443 | return -1; |

444 | } |

445 | |

446 | if (first_op_cond) |

447 | { |

448 | tree cond = gimple_assign_rhs1 (stmt); |

449 | enum tree_code code = TREE_CODE (cond); |

450 | |

451 | /* Swap. */ |

452 | if (*swap == 1) |

453 | { |

454 | swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0), |

455 | &TREE_OPERAND (cond, 1)); |

456 | TREE_SET_CODE (cond, swap_tree_comparison (code)); |

457 | } |

458 | /* Invert. */ |

459 | else |

460 | { |

461 | swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt), |

462 | gimple_assign_rhs3_ptr (stmt)); |

463 | bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0)); |

464 | code = invert_tree_comparison (TREE_CODE (cond), honor_nans); |

465 | gcc_assert (code != ERROR_MARK); |

466 | TREE_SET_CODE (cond, code); |

467 | } |

468 | } |

469 | else |

470 | swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), |

471 | gimple_assign_rhs2_ptr (stmt)); |

472 | if (dump_enabled_p ()) |

473 | { |

474 | dump_printf_loc (MSG_NOTE, vect_location, |

475 | "swapped operands to match def types in "); |

476 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |

477 | } |

478 | } |

479 | |

480 | *swap = swapped; |

481 | return 0; |

482 | } |

483 | |

484 | /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the |

485 | caller's attempt to find the vector type in STMT with the narrowest |

486 | element type. Return true if VECTYPE is nonnull and if it is valid |

487 | for VINFO. When returning true, update MAX_NUNITS to reflect the |

488 | number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are |

489 | as for vect_build_slp_tree. */ |

490 | |

491 | static bool |

492 | vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, |

493 | tree vectype, unsigned int *max_nunits) |

494 | { |

495 | if (!vectype) |

496 | { |

497 | if (dump_enabled_p ()) |

498 | { |

499 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

500 | "Build SLP failed: unsupported data-type in "); |

501 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

502 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

503 | } |

504 | /* Fatal mismatch. */ |

505 | return false; |

506 | } |

507 | |

508 | /* If populating the vector type requires unrolling then fail |

509 | before adjusting *max_nunits for basic-block vectorization. */ |

510 | if (is_a <bb_vec_info> (vinfo) |

511 | && TYPE_VECTOR_SUBPARTS (vectype) > group_size) |

512 | { |

513 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

514 | "Build SLP failed: unrolling required " |

515 | "in basic block SLP\n"); |

516 | /* Fatal mismatch. */ |

517 | return false; |

518 | } |

519 | |

520 | /* In case of multiple types we need to detect the smallest type. */ |

521 | if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) |

522 | *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); |

523 | |

524 | return true; |

525 | } |

526 | |

527 | /* Verify if the scalar stmts STMTS are isomorphic, require data |

528 | permutation or are of unsupported types of operation. Return |

529 | true if they are, otherwise return false and indicate in *MATCHES |

530 | which stmts are not isomorphic to the first one. If MATCHES[0] |

531 | is false then this indicates the comparison could not be |

532 | carried out or the stmts will never be vectorized by SLP. |

533 | |

534 | Note COND_EXPR is possibly ismorphic to another one after swapping its |

535 | operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to |

536 | the first stmt by swapping the two operands of comparison; set SWAP[i] |

537 | to 2 if stmt I is isormorphic to the first stmt by inverting the code |

538 | of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped |

539 | to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ |

540 | |

541 | static bool |

542 | vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, |

543 | vec<gimple *> stmts, unsigned int group_size, |

544 | unsigned nops, unsigned int *max_nunits, |

545 | bool *matches, bool *two_operators) |

546 | { |

547 | unsigned int i; |

548 | gimple *first_stmt = stmts[0], *stmt = stmts[0]; |

549 | enum tree_code first_stmt_code = ERROR_MARK; |

550 | enum tree_code alt_stmt_code = ERROR_MARK; |

551 | enum tree_code rhs_code = ERROR_MARK; |

552 | enum tree_code first_cond_code = ERROR_MARK; |

553 | tree lhs; |

554 | bool need_same_oprnds = false; |

555 | tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; |

556 | optab optab; |

557 | int icode; |

558 | machine_mode optab_op2_mode; |

559 | machine_mode vec_mode; |

560 | HOST_WIDE_INT dummy; |

561 | gimple *first_load = NULL, *prev_first_load = NULL; |

562 | |

563 | /* For every stmt in NODE find its def stmt/s. */ |

564 | FOR_EACH_VEC_ELT (stmts, i, stmt) |

565 | { |

566 | swap[i] = 0; |

567 | matches[i] = false; |

568 | |

569 | if (dump_enabled_p ()) |

570 | { |

571 | dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for "); |

572 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |

573 | } |

574 | |

575 | /* Fail to vectorize statements marked as unvectorizable. */ |

576 | if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) |

577 | { |

578 | if (dump_enabled_p ()) |

579 | { |

580 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

581 | "Build SLP failed: unvectorizable statement "); |

582 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

583 | } |

584 | /* Fatal mismatch. */ |

585 | matches[0] = false; |

586 | return false; |

587 | } |

588 | |

589 | lhs = gimple_get_lhs (stmt); |

590 | if (lhs == NULL_TREE) |

591 | { |

592 | if (dump_enabled_p ()) |

593 | { |

594 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

595 | "Build SLP failed: not GIMPLE_ASSIGN nor " |

596 | "GIMPLE_CALL "); |

597 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

598 | } |

599 | /* Fatal mismatch. */ |

600 | matches[0] = false; |

601 | return false; |

602 | } |

603 | |

604 | scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); |

605 | vectype = get_vectype_for_scalar_type (scalar_type); |

606 | if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, |

607 | max_nunits)) |

608 | { |

609 | /* Fatal mismatch. */ |

610 | matches[0] = false; |

611 | return false; |

612 | } |

613 | |

614 | if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) |

615 | { |

616 | rhs_code = CALL_EXPR; |

617 | if (gimple_call_internal_p (call_stmt) |

618 | || gimple_call_tail_p (call_stmt) |

619 | || gimple_call_noreturn_p (call_stmt) |

620 | || !gimple_call_nothrow_p (call_stmt) |

621 | || gimple_call_chain (call_stmt)) |

622 | { |

623 | if (dump_enabled_p ()) |

624 | { |

625 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

626 | "Build SLP failed: unsupported call type "); |

627 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

628 | call_stmt, 0); |

629 | } |

630 | /* Fatal mismatch. */ |

631 | matches[0] = false; |

632 | return false; |

633 | } |

634 | } |

635 | else |

636 | rhs_code = gimple_assign_rhs_code (stmt); |

637 | |

638 | /* Check the operation. */ |

639 | if (i == 0) |

640 | { |

641 | first_stmt_code = rhs_code; |

642 | |

643 | /* Shift arguments should be equal in all the packed stmts for a |

644 | vector shift with scalar shift operand. */ |

645 | if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR |

646 | || rhs_code == LROTATE_EXPR |

647 | || rhs_code == RROTATE_EXPR) |

648 | { |

649 | vec_mode = TYPE_MODE (vectype); |

650 | |

651 | /* First see if we have a vector/vector shift. */ |

652 | optab = optab_for_tree_code (rhs_code, vectype, |

653 | optab_vector); |

654 | |

655 | if (!optab |

656 | || optab_handler (optab, vec_mode) == CODE_FOR_nothing) |

657 | { |

658 | /* No vector/vector shift, try for a vector/scalar shift. */ |

659 | optab = optab_for_tree_code (rhs_code, vectype, |

660 | optab_scalar); |

661 | |

662 | if (!optab) |

663 | { |

664 | if (dump_enabled_p ()) |

665 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

666 | "Build SLP failed: no optab.\n"); |

667 | /* Fatal mismatch. */ |

668 | matches[0] = false; |

669 | return false; |

670 | } |

671 | icode = (int) optab_handler (optab, vec_mode); |

672 | if (icode == CODE_FOR_nothing) |

673 | { |

674 | if (dump_enabled_p ()) |

675 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

676 | "Build SLP failed: " |

677 | "op not supported by target.\n"); |

678 | /* Fatal mismatch. */ |

679 | matches[0] = false; |

680 | return false; |

681 | } |

682 | optab_op2_mode = insn_data[icode].operand[2].mode; |

683 | if (!VECTOR_MODE_P (optab_op2_mode)) |

684 | { |

685 | need_same_oprnds = true; |

686 | first_op1 = gimple_assign_rhs2 (stmt); |

687 | } |

688 | } |

689 | } |

690 | else if (rhs_code == WIDEN_LSHIFT_EXPR) |

691 | { |

692 | need_same_oprnds = true; |

693 | first_op1 = gimple_assign_rhs2 (stmt); |

694 | } |

695 | } |

696 | else |

697 | { |

698 | if (first_stmt_code != rhs_code |

699 | && alt_stmt_code == ERROR_MARK) |

700 | alt_stmt_code = rhs_code; |

701 | if (first_stmt_code != rhs_code |

702 | && (first_stmt_code != IMAGPART_EXPR |

703 | || rhs_code != REALPART_EXPR) |

704 | && (first_stmt_code != REALPART_EXPR |

705 | || rhs_code != IMAGPART_EXPR) |

706 | /* Handle mismatches in plus/minus by computing both |

707 | and merging the results. */ |

708 | && !((first_stmt_code == PLUS_EXPR |

709 | || first_stmt_code == MINUS_EXPR) |

710 | && (alt_stmt_code == PLUS_EXPR |

711 | || alt_stmt_code == MINUS_EXPR) |

712 | && rhs_code == alt_stmt_code) |

713 | && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) |

714 | && (first_stmt_code == ARRAY_REF |

715 | || first_stmt_code == BIT_FIELD_REF |

716 | || first_stmt_code == INDIRECT_REF |

717 | || first_stmt_code == COMPONENT_REF |

718 | || first_stmt_code == MEM_REF))) |

719 | { |

720 | if (dump_enabled_p ()) |

721 | { |

722 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

723 | "Build SLP failed: different operation " |

724 | "in stmt "); |

725 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

726 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

727 | "original stmt "); |

728 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

729 | first_stmt, 0); |

730 | } |

731 | /* Mismatch. */ |

732 | continue; |

733 | } |

734 | |

735 | if (need_same_oprnds |

736 | && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) |

737 | { |

738 | if (dump_enabled_p ()) |

739 | { |

740 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

741 | "Build SLP failed: different shift " |

742 | "arguments in "); |

743 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

744 | } |

745 | /* Mismatch. */ |

746 | continue; |

747 | } |

748 | |

749 | if (rhs_code == CALL_EXPR) |

750 | { |

751 | gimple *first_stmt = stmts[0]; |

752 | if (gimple_call_num_args (stmt) != nops |

753 | || !operand_equal_p (gimple_call_fn (first_stmt), |

754 | gimple_call_fn (stmt), 0) |

755 | || gimple_call_fntype (first_stmt) |

756 | != gimple_call_fntype (stmt)) |

757 | { |

758 | if (dump_enabled_p ()) |

759 | { |

760 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

761 | "Build SLP failed: different calls in "); |

762 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

763 | stmt, 0); |

764 | } |

765 | /* Mismatch. */ |

766 | continue; |

767 | } |

768 | } |

769 | } |

770 | |

771 | /* Grouped store or load. */ |

772 | if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) |

773 | { |

774 | if (REFERENCE_CLASS_P (lhs)) |

775 | { |

776 | /* Store. */ |

777 | ; |

778 | } |

779 | else |

780 | { |

781 | /* Load. */ |

782 | first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); |

783 | if (prev_first_load) |

784 | { |

785 | /* Check that there are no loads from different interleaving |

786 | chains in the same node. */ |

787 | if (prev_first_load != first_load) |

788 | { |

789 | if (dump_enabled_p ()) |

790 | { |

791 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, |

792 | vect_location, |

793 | "Build SLP failed: different " |

794 | "interleaving chains in one node "); |

795 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

796 | stmt, 0); |

797 | } |

798 | /* Mismatch. */ |

799 | continue; |

800 | } |

801 | } |

802 | else |

803 | prev_first_load = first_load; |

804 | } |

805 | } /* Grouped access. */ |

806 | else |

807 | { |

808 | if (TREE_CODE_CLASS (rhs_code) == tcc_reference) |

809 | { |

810 | /* Not grouped load. */ |

811 | if (dump_enabled_p ()) |

812 | { |

813 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

814 | "Build SLP failed: not grouped load "); |

815 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

816 | } |

817 | |

818 | /* FORNOW: Not grouped loads are not supported. */ |

819 | /* Fatal mismatch. */ |

820 | matches[0] = false; |

821 | return false; |

822 | } |

823 | |

824 | /* Not memory operation. */ |

825 | if (TREE_CODE_CLASS (rhs_code) != tcc_binary |

826 | && TREE_CODE_CLASS (rhs_code) != tcc_unary |

827 | && TREE_CODE_CLASS (rhs_code) != tcc_expression |

828 | && TREE_CODE_CLASS (rhs_code) != tcc_comparison |

829 | && rhs_code != CALL_EXPR) |

830 | { |

831 | if (dump_enabled_p ()) |

832 | { |

833 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

834 | "Build SLP failed: operation"); |

835 | dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported "); |

836 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |

837 | } |

838 | /* Fatal mismatch. */ |

839 | matches[0] = false; |

840 | return false; |

841 | } |

842 | |

843 | if (rhs_code == COND_EXPR) |

844 | { |

845 | tree cond_expr = gimple_assign_rhs1 (stmt); |

846 | enum tree_code cond_code = TREE_CODE (cond_expr); |

847 | enum tree_code swap_code = ERROR_MARK; |

848 | enum tree_code invert_code = ERROR_MARK; |

849 | |

850 | if (i == 0) |

851 | first_cond_code = TREE_CODE (cond_expr); |

852 | else if (TREE_CODE_CLASS (cond_code) == tcc_comparison) |

853 | { |

854 | bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); |

855 | swap_code = swap_tree_comparison (cond_code); |

856 | invert_code = invert_tree_comparison (cond_code, honor_nans); |

857 | } |

858 | |

859 | if (first_cond_code == cond_code) |

860 | ; |

861 | /* Isomorphic can be achieved by swapping. */ |

862 | else if (first_cond_code == swap_code) |

863 | swap[i] = 1; |

864 | /* Isomorphic can be achieved by inverting. */ |

865 | else if (first_cond_code == invert_code) |

866 | swap[i] = 2; |

867 | else |

868 | { |

869 | if (dump_enabled_p ()) |

870 | { |

871 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

872 | "Build SLP failed: different" |

873 | " operation"); |

874 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

875 | stmt, 0); |

876 | } |

877 | /* Mismatch. */ |

878 | continue; |

879 | } |

880 | } |

881 | } |

882 | |

883 | matches[i] = true; |

884 | } |

885 | |

886 | for (i = 0; i < group_size; ++i) |

887 | if (!matches[i]) |

888 | return false; |

889 | |

890 | /* If we allowed a two-operation SLP node verify the target can cope |

891 | with the permute we are going to use. */ |

892 | if (alt_stmt_code != ERROR_MARK |

893 | && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) |

894 | { |

895 | unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); |

896 | auto_vec_perm_indices sel (count); |

897 | for (i = 0; i < count; ++i) |

898 | { |

899 | unsigned int elt = i; |

900 | if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) |

901 | elt += count; |

902 | sel.quick_push (elt); |

903 | } |

904 | if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) |

905 | { |

906 | for (i = 0; i < group_size; ++i) |

907 | if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) |

908 | { |

909 | matches[i] = false; |

910 | if (dump_enabled_p ()) |

911 | { |

912 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

913 | "Build SLP failed: different operation " |

914 | "in stmt "); |

915 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

916 | stmts[i], 0); |

917 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

918 | "original stmt "); |

919 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

920 | first_stmt, 0); |

921 | } |

922 | } |

923 | return false; |

924 | } |

925 | *two_operators = true; |

926 | } |

927 | |

928 | return true; |

929 | } |

930 | |

931 | /* Traits for the hash_set to record failed SLP builds for a stmt set. |

932 | Note we never remove apart from at destruction time so we do not |

933 | need a special value for deleted that differs from empty. */ |

934 | struct bst_traits |

935 | { |

936 | typedef vec <gimple *> value_type; |

937 | typedef vec <gimple *> compare_type; |

938 | static inline hashval_t hash (value_type); |

939 | static inline bool equal (value_type existing, value_type candidate); |

940 | static inline bool is_empty (value_type x) { return !x.exists (); } |

941 | static inline bool is_deleted (value_type x) { return !x.exists (); } |

942 | static inline void mark_empty (value_type &x) { x.release (); } |

943 | static inline void mark_deleted (value_type &x) { x.release (); } |

944 | static inline void remove (value_type &x) { x.release (); } |

945 | }; |

946 | inline hashval_t |

947 | bst_traits::hash (value_type x) |

948 | { |

949 | inchash::hash h; |

950 | for (unsigned i = 0; i < x.length (); ++i) |

951 | h.add_int (gimple_uid (x[i])); |

952 | return h.end (); |

953 | } |

954 | inline bool |

955 | bst_traits::equal (value_type existing, value_type candidate) |

956 | { |

957 | if (existing.length () != candidate.length ()) |

958 | return false; |

959 | for (unsigned i = 0; i < existing.length (); ++i) |

960 | if (existing[i] != candidate[i]) |

961 | return false; |

962 | return true; |

963 | } |

964 | |

965 | typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t; |

966 | static scalar_stmts_set_t *bst_fail; |

967 | |

968 | static slp_tree |

969 | vect_build_slp_tree_2 (vec_info *vinfo, |

970 | vec<gimple *> stmts, unsigned int group_size, |

971 | unsigned int *max_nunits, |

972 | vec<slp_tree> *loads, |

973 | bool *matches, unsigned *npermutes, unsigned *tree_size, |

974 | unsigned max_tree_size); |

975 | |

976 | static slp_tree |

977 | vect_build_slp_tree (vec_info *vinfo, |

978 | vec<gimple *> stmts, unsigned int group_size, |

979 | unsigned int *max_nunits, |

980 | vec<slp_tree> *loads, |

981 | bool *matches, unsigned *npermutes, unsigned *tree_size, |

982 | unsigned max_tree_size) |

983 | { |

984 | if (bst_fail->contains (stmts)) |

985 | return NULL; |

986 | slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits, |

987 | loads, matches, npermutes, tree_size, |

988 | max_tree_size); |

989 | /* When SLP build fails for stmts record this, otherwise SLP build |

990 | can be exponential in time when we allow to construct parts from |

991 | scalars, see PR81723. */ |

992 | if (! res) |

993 | { |

994 | vec <gimple *> x; |

995 | x.create (stmts.length ()); |

996 | x.splice (stmts); |

997 | bst_fail->add (x); |

998 | } |

999 | return res; |

1000 | } |

1001 | |

1002 | /* Recursively build an SLP tree starting from NODE. |

1003 | Fail (and return a value not equal to zero) if def-stmts are not |

1004 | isomorphic, require data permutation or are of unsupported types of |

1005 | operation. Otherwise, return 0. |

1006 | The value returned is the depth in the SLP tree where a mismatch |

1007 | was found. */ |

1008 | |

1009 | static slp_tree |

1010 | vect_build_slp_tree_2 (vec_info *vinfo, |

1011 | vec<gimple *> stmts, unsigned int group_size, |

1012 | unsigned int *max_nunits, |

1013 | vec<slp_tree> *loads, |

1014 | bool *matches, unsigned *npermutes, unsigned *tree_size, |

1015 | unsigned max_tree_size) |

1016 | { |

1017 | unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits; |

1018 | gimple *stmt; |

1019 | slp_tree node; |

1020 | |

1021 | matches[0] = false; |

1022 | |

1023 | stmt = stmts[0]; |

1024 | if (is_gimple_call (stmt)) |

1025 | nops = gimple_call_num_args (stmt); |

1026 | else if (is_gimple_assign (stmt)) |

1027 | { |

1028 | nops = gimple_num_ops (stmt) - 1; |

1029 | if (gimple_assign_rhs_code (stmt) == COND_EXPR) |

1030 | nops++; |

1031 | } |

1032 | else if (gimple_code (stmt) == GIMPLE_PHI) |

1033 | nops = 0; |

1034 | else |

1035 | return NULL; |

1036 | |

1037 | /* If the SLP node is a PHI (induction or reduction), terminate |

1038 | the recursion. */ |

1039 | if (gimple_code (stmt) == GIMPLE_PHI) |

1040 | { |

1041 | tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); |

1042 | tree vectype = get_vectype_for_scalar_type (scalar_type); |

1043 | if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, |

1044 | max_nunits)) |

1045 | return NULL; |

1046 | |

1047 | vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)); |

1048 | /* Induction from different IVs is not supported. */ |

1049 | if (def_type == vect_induction_def) |

1050 | { |

1051 | FOR_EACH_VEC_ELT (stmts, i, stmt) |

1052 | if (stmt != stmts[0]) |

1053 | return NULL; |

1054 | } |

1055 | else |

1056 | { |

1057 | /* Else def types have to match. */ |

1058 | FOR_EACH_VEC_ELT (stmts, i, stmt) |

1059 | { |

1060 | /* But for reduction chains only check on the first stmt. */ |

1061 | if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) |

1062 | && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt) |

1063 | continue; |

1064 | if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type) |

1065 | return NULL; |

1066 | } |

1067 | } |

1068 | node = vect_create_new_slp_node (stmts); |

1069 | return node; |

1070 | } |

1071 | |

1072 | |

1073 | bool two_operators = false; |

1074 | unsigned char *swap = XALLOCAVEC (unsigned char, group_size); |

1075 | if (!vect_build_slp_tree_1 (vinfo, swap, |

1076 | stmts, group_size, nops, |

1077 | &this_max_nunits, matches, &two_operators)) |

1078 | return NULL; |

1079 | |

1080 | /* If the SLP node is a load, terminate the recursion. */ |

1081 | if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) |

1082 | && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) |

1083 | { |

1084 | *max_nunits = this_max_nunits; |

1085 | node = vect_create_new_slp_node (stmts); |

1086 | loads->safe_push (node); |

1087 | return node; |

1088 | } |

1089 | |

1090 | /* Get at the operands, verifying they are compatible. */ |

1091 | vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); |

1092 | slp_oprnd_info oprnd_info; |

1093 | FOR_EACH_VEC_ELT (stmts, i, stmt) |

1094 | { |

1095 | int res = vect_get_and_check_slp_defs (vinfo, &swap[i], |

1096 | stmt, i, &oprnds_info); |

1097 | if (res != 0) |

1098 | matches[(res == -1) ? 0 : i] = false; |

1099 | if (!matches[0]) |

1100 | break; |

1101 | } |

1102 | for (i = 0; i < group_size; ++i) |

1103 | if (!matches[i]) |

1104 | { |

1105 | vect_free_oprnd_info (oprnds_info); |

1106 | return NULL; |

1107 | } |

1108 | |

1109 | auto_vec<slp_tree, 4> children; |

1110 | auto_vec<slp_tree> this_loads; |

1111 | |

1112 | stmt = stmts[0]; |

1113 | |

1114 | if (tree_size) |

1115 | max_tree_size -= *tree_size; |

1116 | |

1117 | /* Create SLP_TREE nodes for the definition node/s. */ |

1118 | FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |

1119 | { |

1120 | slp_tree child; |

1121 | unsigned old_nloads = this_loads.length (); |

1122 | unsigned old_tree_size = this_tree_size; |

1123 | unsigned int j; |

1124 | |

1125 | if (oprnd_info->first_dt != vect_internal_def |

1126 | && oprnd_info->first_dt != vect_reduction_def |

1127 | && oprnd_info->first_dt != vect_induction_def) |

1128 | continue; |

1129 | |

1130 | if (++this_tree_size > max_tree_size) |

1131 | { |

1132 | FOR_EACH_VEC_ELT (children, j, child) |

1133 | vect_free_slp_tree (child); |

1134 | vect_free_oprnd_info (oprnds_info); |

1135 | return NULL; |

1136 | } |

1137 | |

1138 | if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |

1139 | group_size, &this_max_nunits, |

1140 | &this_loads, matches, npermutes, |

1141 | &this_tree_size, |

1142 | max_tree_size)) != NULL) |

1143 | { |

1144 | /* If we have all children of child built up from scalars then just |

1145 | throw that away and build it up this node from scalars. */ |

1146 | if (!SLP_TREE_CHILDREN (child).is_empty () |

1147 | /* ??? Rejecting patterns this way doesn't work. We'd have to |

1148 | do extra work to cancel the pattern so the uses see the |

1149 | scalar version. */ |

1150 | && !is_pattern_stmt_p |

1151 | (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) |

1152 | { |

1153 | slp_tree grandchild; |

1154 | |

1155 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |

1156 | if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) |

1157 | break; |

1158 | if (!grandchild) |

1159 | { |

1160 | /* Roll back. */ |

1161 | this_loads.truncate (old_nloads); |

1162 | this_tree_size = old_tree_size; |

1163 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |

1164 | vect_free_slp_tree (grandchild); |

1165 | SLP_TREE_CHILDREN (child).truncate (0); |

1166 | |

1167 | dump_printf_loc (MSG_NOTE, vect_location, |

1168 | "Building parent vector operands from " |

1169 | "scalars instead\n"); |

1170 | oprnd_info->def_stmts = vNULL; |

1171 | SLP_TREE_DEF_TYPE (child) = vect_external_def; |

1172 | children.safe_push (child); |

1173 | continue; |

1174 | } |

1175 | } |

1176 | |

1177 | oprnd_info->def_stmts = vNULL; |

1178 | children.safe_push (child); |

1179 | continue; |

1180 | } |

1181 | |

1182 | /* If the SLP build failed fatally and we analyze a basic-block |

1183 | simply treat nodes we fail to build as externally defined |

1184 | (and thus build vectors from the scalar defs). |

1185 | The cost model will reject outright expensive cases. |

1186 | ??? This doesn't treat cases where permutation ultimatively |

1187 | fails (or we don't try permutation below). Ideally we'd |

1188 | even compute a permutation that will end up with the maximum |

1189 | SLP tree size... */ |

1190 | if (is_a <bb_vec_info> (vinfo) |

1191 | && !matches[0] |

1192 | /* ??? Rejecting patterns this way doesn't work. We'd have to |

1193 | do extra work to cancel the pattern so the uses see the |

1194 | scalar version. */ |

1195 | && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) |

1196 | { |

1197 | dump_printf_loc (MSG_NOTE, vect_location, |

1198 | "Building vector operands from scalars\n"); |

1199 | child = vect_create_new_slp_node (oprnd_info->def_stmts); |

1200 | SLP_TREE_DEF_TYPE (child) = vect_external_def; |

1201 | children.safe_push (child); |

1202 | oprnd_info->def_stmts = vNULL; |

1203 | continue; |

1204 | } |

1205 | |

1206 | /* If the SLP build for operand zero failed and operand zero |

1207 | and one can be commutated try that for the scalar stmts |

1208 | that failed the match. */ |

1209 | if (i == 0 |

1210 | /* A first scalar stmt mismatch signals a fatal mismatch. */ |

1211 | && matches[0] |

1212 | /* ??? For COND_EXPRs we can swap the comparison operands |

1213 | as well as the arms under some constraints. */ |

1214 | && nops == 2 |

1215 | && oprnds_info[1]->first_dt == vect_internal_def |

1216 | && is_gimple_assign (stmt) |

1217 | && commutative_tree_code (gimple_assign_rhs_code (stmt)) |

1218 | && ! two_operators |

1219 | /* Do so only if the number of not successful permutes was nor more |

1220 | than a cut-ff as re-trying the recursive match on |

1221 | possibly each level of the tree would expose exponential |

1222 | behavior. */ |

1223 | && *npermutes < 4) |

1224 | { |

1225 | /* Verify if we can safely swap or if we committed to a specific |

1226 | operand order already. */ |

1227 | for (j = 0; j < group_size; ++j) |

1228 | if (!matches[j] |

1229 | && (swap[j] != 0 |

1230 | || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])))) |

1231 | { |

1232 | if (dump_enabled_p ()) |

1233 | { |

1234 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

1235 | "Build SLP failed: cannot swap operands " |

1236 | "of shared stmt "); |

1237 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

1238 | stmts[j], 0); |

1239 | } |

1240 | goto fail; |

1241 | } |

1242 | |

1243 | /* Swap mismatched definition stmts. */ |

1244 | dump_printf_loc (MSG_NOTE, vect_location, |

1245 | "Re-trying with swapped operands of stmts "); |

1246 | for (j = 0; j < group_size; ++j) |

1247 | if (!matches[j]) |

1248 | { |

1249 | std::swap (oprnds_info[0]->def_stmts[j], |

1250 | oprnds_info[1]->def_stmts[j]); |

1251 | dump_printf (MSG_NOTE, "%d ", j); |

1252 | } |

1253 | dump_printf (MSG_NOTE, "\n"); |

1254 | /* And try again with scratch 'matches' ... */ |

1255 | bool *tem = XALLOCAVEC (bool, group_size); |

1256 | if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |

1257 | group_size, &this_max_nunits, |

1258 | &this_loads, tem, npermutes, |

1259 | &this_tree_size, |

1260 | max_tree_size)) != NULL) |

1261 | { |

1262 | /* ... so if successful we can apply the operand swapping |

1263 | to the GIMPLE IL. This is necessary because for example |

1264 | vect_get_slp_defs uses operand indexes and thus expects |

1265 | canonical operand order. This is also necessary even |

1266 | if we end up building the operand from scalars as |

1267 | we'll continue to process swapped operand two. */ |

1268 | for (j = 0; j < group_size; ++j) |

1269 | { |

1270 | gimple *stmt = stmts[j]; |

1271 | gimple_set_plf (stmt, GF_PLF_1, false); |

1272 | } |

1273 | for (j = 0; j < group_size; ++j) |

1274 | { |

1275 | gimple *stmt = stmts[j]; |

1276 | if (!matches[j]) |

1277 | { |

1278 | /* Avoid swapping operands twice. */ |

1279 | if (gimple_plf (stmt, GF_PLF_1)) |

1280 | continue; |

1281 | swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), |

1282 | gimple_assign_rhs2_ptr (stmt)); |

1283 | gimple_set_plf (stmt, GF_PLF_1, true); |

1284 | } |

1285 | } |

1286 | /* Verify we swap all duplicates or none. */ |

1287 | if (flag_checking) |

1288 | for (j = 0; j < group_size; ++j) |

1289 | { |

1290 | gimple *stmt = stmts[j]; |

1291 | gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]); |

1292 | } |

1293 | |

1294 | /* If we have all children of child built up from scalars then |

1295 | just throw that away and build it up this node from scalars. */ |

1296 | if (!SLP_TREE_CHILDREN (child).is_empty () |

1297 | /* ??? Rejecting patterns this way doesn't work. We'd have |

1298 | to do extra work to cancel the pattern so the uses see the |

1299 | scalar version. */ |

1300 | && !is_pattern_stmt_p |

1301 | (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) |

1302 | { |

1303 | unsigned int j; |

1304 | slp_tree grandchild; |

1305 | |

1306 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |

1307 | if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) |

1308 | break; |

1309 | if (!grandchild) |

1310 | { |

1311 | /* Roll back. */ |

1312 | this_loads.truncate (old_nloads); |

1313 | this_tree_size = old_tree_size; |

1314 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |

1315 | vect_free_slp_tree (grandchild); |

1316 | SLP_TREE_CHILDREN (child).truncate (0); |

1317 | |

1318 | dump_printf_loc (MSG_NOTE, vect_location, |

1319 | "Building parent vector operands from " |

1320 | "scalars instead\n"); |

1321 | oprnd_info->def_stmts = vNULL; |

1322 | SLP_TREE_DEF_TYPE (child) = vect_external_def; |

1323 | children.safe_push (child); |

1324 | continue; |

1325 | } |

1326 | } |

1327 | |

1328 | oprnd_info->def_stmts = vNULL; |

1329 | children.safe_push (child); |

1330 | continue; |

1331 | } |

1332 | |

1333 | ++*npermutes; |

1334 | } |

1335 | |

1336 | fail: |

1337 | gcc_assert (child == NULL); |

1338 | FOR_EACH_VEC_ELT (children, j, child) |

1339 | vect_free_slp_tree (child); |

1340 | vect_free_oprnd_info (oprnds_info); |

1341 | return NULL; |

1342 | } |

1343 | |

1344 | vect_free_oprnd_info (oprnds_info); |

1345 | |

1346 | if (tree_size) |

1347 | *tree_size += this_tree_size; |

1348 | *max_nunits = this_max_nunits; |

1349 | loads->safe_splice (this_loads); |

1350 | |

1351 | node = vect_create_new_slp_node (stmts); |

1352 | SLP_TREE_TWO_OPERATORS (node) = two_operators; |

1353 | SLP_TREE_CHILDREN (node).splice (children); |

1354 | return node; |

1355 | } |

1356 | |

1357 | /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ |

1358 | |

1359 | static void |

1360 | vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node) |

1361 | { |

1362 | int i; |

1363 | gimple *stmt; |

1364 | slp_tree child; |

1365 | |

1366 | dump_printf_loc (dump_kind, loc, "node%s\n", |

1367 | SLP_TREE_DEF_TYPE (node) != vect_internal_def |

1368 | ? " (external)": ""); |

1369 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

1370 | { |

1371 | dump_printf_loc (dump_kind, loc, "\tstmt %d ", i); |

1372 | dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0); |

1373 | } |

1374 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1375 | vect_print_slp_tree (dump_kind, loc, child); |

1376 | } |

1377 | |

1378 | |

1379 | /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). |

1380 | If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index |

1381 | J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the |

1382 | stmts in NODE are to be marked. */ |

1383 | |

1384 | static void |

1385 | vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) |

1386 | { |

1387 | int i; |

1388 | gimple *stmt; |

1389 | slp_tree child; |

1390 | |

1391 | if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |

1392 | return; |

1393 | |

1394 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

1395 | if (j < 0 || i == j) |

1396 | STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; |

1397 | |

1398 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1399 | vect_mark_slp_stmts (child, mark, j); |

1400 | } |

1401 | |

1402 | |

1403 | /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */ |

1404 | |

1405 | static void |

1406 | vect_mark_slp_stmts_relevant (slp_tree node) |

1407 | { |

1408 | int i; |

1409 | gimple *stmt; |

1410 | stmt_vec_info stmt_info; |

1411 | slp_tree child; |

1412 | |

1413 | if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |

1414 | return; |

1415 | |

1416 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

1417 | { |

1418 | stmt_info = vinfo_for_stmt (stmt); |

1419 | gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) |

1420 | || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); |

1421 | STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; |

1422 | } |

1423 | |

1424 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1425 | vect_mark_slp_stmts_relevant (child); |

1426 | } |

1427 | |

1428 | |

1429 | /* Rearrange the statements of NODE according to PERMUTATION. */ |

1430 | |

1431 | static void |

1432 | vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, |

1433 | vec<unsigned> permutation) |

1434 | { |

1435 | gimple *stmt; |

1436 | vec<gimple *> tmp_stmts; |

1437 | unsigned int i; |

1438 | slp_tree child; |

1439 | |

1440 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1441 | vect_slp_rearrange_stmts (child, group_size, permutation); |

1442 | |

1443 | gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); |

1444 | tmp_stmts.create (group_size); |

1445 | tmp_stmts.quick_grow_cleared (group_size); |

1446 | |

1447 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

1448 | tmp_stmts[permutation[i]] = stmt; |

1449 | |

1450 | SLP_TREE_SCALAR_STMTS (node).release (); |

1451 | SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; |

1452 | } |

1453 | |

1454 | |

1455 | /* Attempt to reorder stmts in a reduction chain so that we don't |

1456 | require any load permutation. Return true if that was possible, |

1457 | otherwise return false. */ |

1458 | |

1459 | static bool |

1460 | vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) |

1461 | { |

1462 | unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); |

1463 | unsigned int i, j; |

1464 | unsigned int lidx; |

1465 | slp_tree node, load; |

1466 | |

1467 | /* Compare all the permutation sequences to the first one. We know |

1468 | that at least one load is permuted. */ |

1469 | node = SLP_INSTANCE_LOADS (slp_instn)[0]; |

1470 | if (!node->load_permutation.exists ()) |

1471 | return false; |

1472 | for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i) |

1473 | { |

1474 | if (!load->load_permutation.exists ()) |

1475 | return false; |

1476 | FOR_EACH_VEC_ELT (load->load_permutation, j, lidx) |

1477 | if (lidx != node->load_permutation[j]) |

1478 | return false; |

1479 | } |

1480 | |

1481 | /* Check that the loads in the first sequence are different and there |

1482 | are no gaps between them. */ |

1483 | auto_sbitmap load_index (group_size); |

1484 | bitmap_clear (load_index); |

1485 | FOR_EACH_VEC_ELT (node->load_permutation, i, lidx) |

1486 | { |

1487 | if (lidx >= group_size) |

1488 | return false; |

1489 | if (bitmap_bit_p (load_index, lidx)) |

1490 | return false; |

1491 | |

1492 | bitmap_set_bit (load_index, lidx); |

1493 | } |

1494 | for (i = 0; i < group_size; i++) |

1495 | if (!bitmap_bit_p (load_index, i)) |

1496 | return false; |

1497 | |

1498 | /* This permutation is valid for reduction. Since the order of the |

1499 | statements in the nodes is not important unless they are memory |

1500 | accesses, we can rearrange the statements in all the nodes |

1501 | according to the order of the loads. */ |

1502 | vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, |

1503 | node->load_permutation); |

1504 | |

1505 | /* We are done, no actual permutations need to be generated. */ |

1506 | unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); |

1507 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |

1508 | { |

1509 | gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

1510 | first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)); |

1511 | /* But we have to keep those permutations that are required because |

1512 | of handling of gaps. */ |

1513 | if (unrolling_factor == 1 |

1514 | || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) |

1515 | && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)) |

1516 | SLP_TREE_LOAD_PERMUTATION (node).release (); |

1517 | else |

1518 | for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j) |

1519 | SLP_TREE_LOAD_PERMUTATION (node)[j] = j; |

1520 | } |

1521 | |

1522 | return true; |

1523 | } |

1524 | |

1525 | /* Check if the required load permutations in the SLP instance |

1526 | SLP_INSTN are supported. */ |

1527 | |

1528 | static bool |

1529 | vect_supported_load_permutation_p (slp_instance slp_instn) |

1530 | { |

1531 | unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); |

1532 | unsigned int i, j, k, next; |

1533 | slp_tree node; |

1534 | gimple *stmt, *load, *next_load; |

1535 | |

1536 | if (dump_enabled_p ()) |

1537 | { |

1538 | dump_printf_loc (MSG_NOTE, vect_location, "Load permutation "); |

1539 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |

1540 | if (node->load_permutation.exists ()) |

1541 | FOR_EACH_VEC_ELT (node->load_permutation, j, next) |

1542 | dump_printf (MSG_NOTE, "%d ", next); |

1543 | else |

1544 | for (k = 0; k < group_size; ++k) |

1545 | dump_printf (MSG_NOTE, "%d ", k); |

1546 | dump_printf (MSG_NOTE, "\n"); |

1547 | } |

1548 | |

1549 | /* In case of reduction every load permutation is allowed, since the order |

1550 | of the reduction statements is not important (as opposed to the case of |

1551 | grouped stores). The only condition we need to check is that all the |

1552 | load nodes are of the same size and have the same permutation (and then |

1553 | rearrange all the nodes of the SLP instance according to this |

1554 | permutation). */ |

1555 | |

1556 | /* Check that all the load nodes are of the same size. */ |

1557 | /* ??? Can't we assert this? */ |

1558 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |

1559 | if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) |

1560 | return false; |

1561 | |

1562 | node = SLP_INSTANCE_TREE (slp_instn); |

1563 | stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

1564 | |

1565 | /* Reduction (there are no data-refs in the root). |

1566 | In reduction chain the order of the loads is not important. */ |

1567 | if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) |

1568 | && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |

1569 | vect_attempt_slp_rearrange_stmts (slp_instn); |

1570 | |

1571 | /* In basic block vectorization we allow any subchain of an interleaving |

1572 | chain. |

1573 | FORNOW: not supported in loop SLP because of realignment compications. */ |

1574 | if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) |

1575 | { |

1576 | /* Check whether the loads in an instance form a subchain and thus |

1577 | no permutation is necessary. */ |

1578 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |

1579 | { |

1580 | if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) |

1581 | continue; |

1582 | bool subchain_p = true; |

1583 | next_load = NULL; |

1584 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) |

1585 | { |

1586 | if (j != 0 |

1587 | && (next_load != load |

1588 | || GROUP_GAP (vinfo_for_stmt (load)) != 1)) |

1589 | { |

1590 | subchain_p = false; |

1591 | break; |

1592 | } |

1593 | next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); |

1594 | } |

1595 | if (subchain_p) |

1596 | SLP_TREE_LOAD_PERMUTATION (node).release (); |

1597 | else |

1598 | { |

1599 | stmt_vec_info group_info |

1600 | = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); |

1601 | group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); |

1602 | unsigned nunits |

1603 | = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info)); |

1604 | unsigned k, maxk = 0; |

1605 | FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) |

1606 | if (k > maxk) |

1607 | maxk = k; |

1608 | /* In BB vectorization we may not actually use a loaded vector |

1609 | accessing elements in excess of GROUP_SIZE. */ |

1610 | if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) |

1611 | { |

1612 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

1613 | "BB vectorization with gaps at the end of " |

1614 | "a load is not supported\n"); |

1615 | return false; |

1616 | } |

1617 | |

1618 | /* Verify the permutation can be generated. */ |

1619 | vec<tree> tem; |

1620 | unsigned n_perms; |

1621 | if (!vect_transform_slp_perm_load (node, tem, NULL, |

1622 | 1, slp_instn, true, &n_perms)) |

1623 | { |

1624 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, |

1625 | vect_location, |

1626 | "unsupported load permutation\n"); |

1627 | return false; |

1628 | } |

1629 | } |

1630 | } |

1631 | return true; |

1632 | } |

1633 | |

1634 | /* For loop vectorization verify we can generate the permutation. Be |

1635 | conservative about the vectorization factor, there are permutations |

1636 | that will use three vector inputs only starting from a specific factor |

1637 | and the vectorization factor is not yet final. |

1638 | ??? The SLP instance unrolling factor might not be the maximum one. */ |

1639 | unsigned n_perms; |

1640 | unsigned test_vf |

1641 | = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), |

1642 | LOOP_VINFO_VECT_FACTOR |

1643 | (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)))); |

1644 | FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |

1645 | if (node->load_permutation.exists () |

1646 | && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, |

1647 | slp_instn, true, &n_perms)) |

1648 | return false; |

1649 | |

1650 | return true; |

1651 | } |

1652 | |

1653 | |

1654 | /* Find the last store in SLP INSTANCE. */ |

1655 | |

1656 | gimple * |

1657 | vect_find_last_scalar_stmt_in_slp (slp_tree node) |

1658 | { |

1659 | gimple *last = NULL, *stmt; |

1660 | |

1661 | for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++) |

1662 | { |

1663 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); |

1664 | if (is_pattern_stmt_p (stmt_vinfo)) |

1665 | last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last); |

1666 | else |

1667 | last = get_later_stmt (stmt, last); |

1668 | } |

1669 | |

1670 | return last; |

1671 | } |

1672 | |

1673 | /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ |

1674 | |

1675 | static void |

1676 | vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, |

1677 | stmt_vector_for_cost *prologue_cost_vec, |

1678 | stmt_vector_for_cost *body_cost_vec, |

1679 | unsigned ncopies_for_cost, |

1680 | scalar_stmts_set_t* visited) |

1681 | { |

1682 | unsigned i, j; |

1683 | slp_tree child; |

1684 | gimple *stmt; |

1685 | stmt_vec_info stmt_info; |

1686 | tree lhs; |

1687 | |

1688 | /* If we already costed the exact same set of scalar stmts we're done. |

1689 | We share the generated vector stmts for those. */ |

1690 | if (visited->contains (SLP_TREE_SCALAR_STMTS (node))) |

1691 | return; |

1692 | |

1693 | visited->add (SLP_TREE_SCALAR_STMTS (node).copy ()); |

1694 | |

1695 | /* Recurse down the SLP tree. */ |

1696 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1697 | if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |

1698 | vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, |

1699 | body_cost_vec, ncopies_for_cost, visited); |

1700 | |

1701 | /* Look at the first scalar stmt to determine the cost. */ |

1702 | stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

1703 | stmt_info = vinfo_for_stmt (stmt); |

1704 | if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |

1705 | { |

1706 | vect_memory_access_type memory_access_type |

1707 | = (STMT_VINFO_STRIDED_P (stmt_info) |

1708 | ? VMAT_STRIDED_SLP |

1709 | : VMAT_CONTIGUOUS); |

1710 | if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) |

1711 | vect_model_store_cost (stmt_info, ncopies_for_cost, |

1712 | memory_access_type, vect_uninitialized_def, |

1713 | node, prologue_cost_vec, body_cost_vec); |

1714 | else |

1715 | { |

1716 | gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); |

1717 | if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) |

1718 | { |

1719 | /* If the load is permuted then the alignment is determined by |

1720 | the first group element not by the first scalar stmt DR. */ |

1721 | stmt = GROUP_FIRST_ELEMENT (stmt_info); |

1722 | stmt_info = vinfo_for_stmt (stmt); |

1723 | /* Record the cost for the permutation. */ |

1724 | unsigned n_perms; |

1725 | vect_transform_slp_perm_load (node, vNULL, NULL, |

1726 | ncopies_for_cost, instance, true, |

1727 | &n_perms); |

1728 | record_stmt_cost (body_cost_vec, n_perms, vec_perm, |

1729 | stmt_info, 0, vect_body); |

1730 | unsigned nunits |

1731 | = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); |

1732 | /* And adjust the number of loads performed. This handles |

1733 | redundancies as well as loads that are later dead. */ |

1734 | auto_sbitmap perm (GROUP_SIZE (stmt_info)); |

1735 | bitmap_clear (perm); |

1736 | for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i) |

1737 | bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]); |

1738 | ncopies_for_cost = 0; |

1739 | bool load_seen = false; |

1740 | for (i = 0; i < GROUP_SIZE (stmt_info); ++i) |

1741 | { |

1742 | if (i % nunits == 0) |

1743 | { |

1744 | if (load_seen) |

1745 | ncopies_for_cost++; |

1746 | load_seen = false; |

1747 | } |

1748 | if (bitmap_bit_p (perm, i)) |

1749 | load_seen = true; |

1750 | } |

1751 | if (load_seen) |

1752 | ncopies_for_cost++; |

1753 | gcc_assert (ncopies_for_cost |

1754 | <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info) |

1755 | + nunits - 1) / nunits); |

1756 | ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance); |

1757 | } |

1758 | /* Record the cost for the vector loads. */ |

1759 | vect_model_load_cost (stmt_info, ncopies_for_cost, |

1760 | memory_access_type, node, prologue_cost_vec, |

1761 | body_cost_vec); |

1762 | return; |

1763 | } |

1764 | } |

1765 | else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type) |

1766 | { |

1767 | /* ncopies_for_cost is the number of IVs we generate. */ |

1768 | record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |

1769 | stmt_info, 0, vect_body); |

1770 | |

1771 | /* Prologue cost for the initial values and step vector. */ |

1772 | record_stmt_cost (prologue_cost_vec, ncopies_for_cost, |

1773 | CONSTANT_CLASS_P |

1774 | (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED |

1775 | (stmt_info)) |

1776 | ? vector_load : vec_construct, |

1777 | stmt_info, 0, vect_prologue); |

1778 | record_stmt_cost (prologue_cost_vec, 1, |

1779 | CONSTANT_CLASS_P |

1780 | (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)) |

1781 | ? vector_load : vec_construct, |

1782 | stmt_info, 0, vect_prologue); |

1783 | |

1784 | /* ??? No easy way to get at the actual number of vector stmts |

1785 | to be geneated and thus the derived IVs. */ |

1786 | } |

1787 | else |

1788 | { |

1789 | record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |

1790 | stmt_info, 0, vect_body); |

1791 | if (SLP_TREE_TWO_OPERATORS (node)) |

1792 | { |

1793 | record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |

1794 | stmt_info, 0, vect_body); |

1795 | record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm, |

1796 | stmt_info, 0, vect_body); |

1797 | } |

1798 | } |

1799 | |

1800 | /* Push SLP node def-type to stmts. */ |

1801 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1802 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

1803 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |

1804 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); |

1805 | |

1806 | /* Scan operands and account for prologue cost of constants/externals. |

1807 | ??? This over-estimates cost for multiple uses and should be |

1808 | re-engineered. */ |

1809 | stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

1810 | lhs = gimple_get_lhs (stmt); |

1811 | for (i = 0; i < gimple_num_ops (stmt); ++i) |

1812 | { |

1813 | tree op = gimple_op (stmt, i); |

1814 | gimple *def_stmt; |

1815 | enum vect_def_type dt; |

1816 | if (!op || op == lhs) |

1817 | continue; |

1818 | if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)) |

1819 | { |

1820 | /* Without looking at the actual initializer a vector of |

1821 | constants can be implemented as load from the constant pool. |

1822 | ??? We need to pass down stmt_info for a vector type |

1823 | even if it points to the wrong stmt. */ |

1824 | if (dt == vect_constant_def) |

1825 | record_stmt_cost (prologue_cost_vec, 1, vector_load, |

1826 | stmt_info, 0, vect_prologue); |

1827 | else if (dt == vect_external_def) |

1828 | record_stmt_cost (prologue_cost_vec, 1, vec_construct, |

1829 | stmt_info, 0, vect_prologue); |

1830 | } |

1831 | } |

1832 | |

1833 | /* Restore stmt def-types. */ |

1834 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

1835 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

1836 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |

1837 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; |

1838 | } |

1839 | |

1840 | /* Compute the cost for the SLP instance INSTANCE. */ |

1841 | |

1842 | static void |

1843 | vect_analyze_slp_cost (slp_instance instance, void *data) |

1844 | { |

1845 | stmt_vector_for_cost body_cost_vec, prologue_cost_vec; |

1846 | unsigned ncopies_for_cost; |

1847 | stmt_info_for_cost *si; |

1848 | unsigned i; |

1849 | |

1850 | if (dump_enabled_p ()) |

1851 | dump_printf_loc (MSG_NOTE, vect_location, |

1852 | "=== vect_analyze_slp_cost ===\n"); |

1853 | |

1854 | /* Calculate the number of vector stmts to create based on the unrolling |

1855 | factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is |

1856 | GROUP_SIZE / NUNITS otherwise. */ |

1857 | unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); |

1858 | slp_tree node = SLP_INSTANCE_TREE (instance); |

1859 | stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); |

1860 | /* Adjust the group_size by the vectorization factor which is always one |

1861 | for basic-block vectorization. */ |

1862 | if (STMT_VINFO_LOOP_VINFO (stmt_info)) |

1863 | group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info)); |

1864 | unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); |

1865 | /* For reductions look at a reduction operand in case the reduction |

1866 | operation is widening like DOT_PROD or SAD. */ |

1867 | if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) |

1868 | { |

1869 | gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

1870 | switch (gimple_assign_rhs_code (stmt)) |

1871 | { |

1872 | case DOT_PROD_EXPR: |

1873 | case SAD_EXPR: |

1874 | nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type |

1875 | (TREE_TYPE (gimple_assign_rhs1 (stmt)))); |

1876 | break; |

1877 | default:; |

1878 | } |

1879 | } |

1880 | ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits; |

1881 | |

1882 | prologue_cost_vec.create (10); |

1883 | body_cost_vec.create (10); |

1884 | scalar_stmts_set_t *visited = new scalar_stmts_set_t (); |

1885 | vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), |

1886 | &prologue_cost_vec, &body_cost_vec, |

1887 | ncopies_for_cost, visited); |

1888 | delete visited; |

1889 | |

1890 | /* Record the prologue costs, which were delayed until we were |

1891 | sure that SLP was successful. */ |

1892 | FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) |

1893 | { |

1894 | struct _stmt_vec_info *stmt_info |

1895 | = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; |

1896 | (void) add_stmt_cost (data, si->count, si->kind, stmt_info, |

1897 | si->misalign, vect_prologue); |

1898 | } |

1899 | |

1900 | /* Record the instance's instructions in the target cost model. */ |

1901 | FOR_EACH_VEC_ELT (body_cost_vec, i, si) |

1902 | { |

1903 | struct _stmt_vec_info *stmt_info |

1904 | = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; |

1905 | (void) add_stmt_cost (data, si->count, si->kind, stmt_info, |

1906 | si->misalign, vect_body); |

1907 | } |

1908 | |

1909 | prologue_cost_vec.release (); |

1910 | body_cost_vec.release (); |

1911 | } |

1912 | |

1913 | /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups: |

1914 | one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing |

1915 | the first GROUP1_SIZE stmts, since stores are consecutive), the second |

1916 | containing the remainder. |

1917 | Return the first stmt in the second group. */ |

1918 | |

1919 | static gimple * |

1920 | vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size) |

1921 | { |

1922 | stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt); |

1923 | gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt); |

1924 | gcc_assert (group1_size > 0); |

1925 | int group2_size = GROUP_SIZE (first_vinfo) - group1_size; |

1926 | gcc_assert (group2_size > 0); |

1927 | GROUP_SIZE (first_vinfo) = group1_size; |

1928 | |

1929 | gimple *stmt = first_stmt; |

1930 | for (unsigned i = group1_size; i > 1; i--) |

1931 | { |

1932 | stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); |

1933 | gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); |

1934 | } |

1935 | /* STMT is now the last element of the first group. */ |

1936 | gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); |

1937 | GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0; |

1938 | |

1939 | GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size; |

1940 | for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt))) |

1941 | { |

1942 | GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2; |

1943 | gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); |

1944 | } |

1945 | |

1946 | /* For the second group, the GROUP_GAP is that before the original group, |

1947 | plus skipping over the first vector. */ |

1948 | GROUP_GAP (vinfo_for_stmt (group2)) = |

1949 | GROUP_GAP (first_vinfo) + group1_size; |

1950 | |

1951 | /* GROUP_GAP of the first group now has to skip over the second group too. */ |

1952 | GROUP_GAP (first_vinfo) += group2_size; |

1953 | |

1954 | if (dump_enabled_p ()) |

1955 | dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", |

1956 | group1_size, group2_size); |

1957 | |

1958 | return group2; |

1959 | } |

1960 | |

1961 | /* Analyze an SLP instance starting from a group of grouped stores. Call |

1962 | vect_build_slp_tree to build a tree of packed stmts if possible. |

1963 | Return FALSE if it's impossible to SLP any stmt in the loop. */ |

1964 | |

1965 | static bool |

1966 | vect_analyze_slp_instance (vec_info *vinfo, |

1967 | gimple *stmt, unsigned max_tree_size) |

1968 | { |

1969 | slp_instance new_instance; |

1970 | slp_tree node; |

1971 | unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); |

1972 | unsigned int unrolling_factor = 1, nunits; |

1973 | tree vectype, scalar_type = NULL_TREE; |

1974 | gimple *next; |

1975 | unsigned int i; |

1976 | unsigned int max_nunits = 0; |

1977 | vec<slp_tree> loads; |

1978 | struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); |

1979 | vec<gimple *> scalar_stmts; |

1980 | |

1981 | if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |

1982 | { |

1983 | if (dr) |

1984 | { |

1985 | scalar_type = TREE_TYPE (DR_REF (dr)); |

1986 | vectype = get_vectype_for_scalar_type (scalar_type); |

1987 | } |

1988 | else |

1989 | { |

1990 | gcc_assert (is_a <loop_vec_info> (vinfo)); |

1991 | vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); |

1992 | } |

1993 | |

1994 | group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); |

1995 | } |

1996 | else |

1997 | { |

1998 | gcc_assert (is_a <loop_vec_info> (vinfo)); |

1999 | vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); |

2000 | group_size = as_a <loop_vec_info> (vinfo)->reductions.length (); |

2001 | } |

2002 | |

2003 | if (!vectype) |

2004 | { |

2005 | if (dump_enabled_p ()) |

2006 | { |

2007 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2008 | "Build SLP failed: unsupported data-type "); |

2009 | dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type); |

2010 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

2011 | } |

2012 | |

2013 | return false; |

2014 | } |

2015 | nunits = TYPE_VECTOR_SUBPARTS (vectype); |

2016 | |

2017 | /* Create a node (a root of the SLP tree) for the packed grouped stores. */ |

2018 | scalar_stmts.create (group_size); |

2019 | next = stmt; |

2020 | if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |

2021 | { |

2022 | /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ |

2023 | while (next) |

2024 | { |

2025 | if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) |

2026 | && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) |

2027 | scalar_stmts.safe_push ( |

2028 | STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); |

2029 | else |

2030 | scalar_stmts.safe_push (next); |

2031 | next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); |

2032 | } |

2033 | /* Mark the first element of the reduction chain as reduction to properly |

2034 | transform the node. In the reduction analysis phase only the last |

2035 | element of the chain is marked as reduction. */ |

2036 | if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) |

2037 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def; |

2038 | } |

2039 | else |

2040 | { |

2041 | /* Collect reduction statements. */ |

2042 | vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions; |

2043 | for (i = 0; reductions.iterate (i, &next); i++) |

2044 | scalar_stmts.safe_push (next); |

2045 | } |

2046 | |

2047 | loads.create (group_size); |

2048 | |

2049 | /* Build the tree for the SLP instance. */ |

2050 | bool *matches = XALLOCAVEC (bool, group_size); |

2051 | unsigned npermutes = 0; |

2052 | bst_fail = new scalar_stmts_set_t (); |

2053 | node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, |

2054 | &max_nunits, &loads, matches, &npermutes, |

2055 | NULL, max_tree_size); |

2056 | delete bst_fail; |

2057 | if (node != NULL) |

2058 | { |

2059 | /* Calculate the unrolling factor based on the smallest type. */ |

2060 | unrolling_factor |

2061 | = least_common_multiple (max_nunits, group_size) / group_size; |

2062 | |

2063 | if (unrolling_factor != 1 |

2064 | && is_a <bb_vec_info> (vinfo)) |

2065 | { |

2066 | |

2067 | if (max_nunits > group_size) |

2068 | { |

2069 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2070 | "Build SLP failed: store group " |

2071 | "size not a multiple of the vector size " |

2072 | "in basic block SLP\n"); |

2073 | vect_free_slp_tree (node); |

2074 | loads.release (); |

2075 | return false; |

2076 | } |

2077 | /* Fatal mismatch. */ |

2078 | matches[group_size/max_nunits * max_nunits] = false; |

2079 | vect_free_slp_tree (node); |

2080 | loads.release (); |

2081 | } |

2082 | else |

2083 | { |

2084 | /* Create a new SLP instance. */ |

2085 | new_instance = XNEW (struct _slp_instance); |

2086 | SLP_INSTANCE_TREE (new_instance) = node; |

2087 | SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; |

2088 | SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; |

2089 | SLP_INSTANCE_LOADS (new_instance) = loads; |

2090 | |

2091 | /* Compute the load permutation. */ |

2092 | slp_tree load_node; |

2093 | bool loads_permuted = false; |

2094 | FOR_EACH_VEC_ELT (loads, i, load_node) |

2095 | { |

2096 | vec<unsigned> load_permutation; |

2097 | int j; |

2098 | gimple *load, *first_stmt; |

2099 | bool this_load_permuted = false; |

2100 | load_permutation.create (group_size); |

2101 | first_stmt = GROUP_FIRST_ELEMENT |

2102 | (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); |

2103 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) |

2104 | { |

2105 | int load_place = vect_get_place_in_interleaving_chain |

2106 | (load, first_stmt); |

2107 | gcc_assert (load_place != -1); |

2108 | if (load_place != j) |

2109 | this_load_permuted = true; |

2110 | load_permutation.safe_push (load_place); |

2111 | } |

2112 | if (!this_load_permuted |

2113 | /* The load requires permutation when unrolling exposes |

2114 | a gap either because the group is larger than the SLP |

2115 | group-size or because there is a gap between the groups. */ |

2116 | && (unrolling_factor == 1 |

2117 | || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) |

2118 | && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))) |

2119 | { |

2120 | load_permutation.release (); |

2121 | continue; |

2122 | } |

2123 | SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; |

2124 | loads_permuted = true; |

2125 | } |

2126 | |

2127 | if (loads_permuted) |

2128 | { |

2129 | if (!vect_supported_load_permutation_p (new_instance)) |

2130 | { |

2131 | if (dump_enabled_p ()) |

2132 | { |

2133 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2134 | "Build SLP failed: unsupported load " |

2135 | "permutation "); |

2136 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, |

2137 | TDF_SLIM, stmt, 0); |

2138 | } |

2139 | vect_free_slp_instance (new_instance); |

2140 | return false; |

2141 | } |

2142 | } |

2143 | |

2144 | /* If the loads and stores can be handled with load/store-lan |

2145 | instructions do not generate this SLP instance. */ |

2146 | if (is_a <loop_vec_info> (vinfo) |

2147 | && loads_permuted |

2148 | && dr && vect_store_lanes_supported (vectype, group_size)) |

2149 | { |

2150 | slp_tree load_node; |

2151 | FOR_EACH_VEC_ELT (loads, i, load_node) |

2152 | { |

2153 | gimple *first_stmt = GROUP_FIRST_ELEMENT |

2154 | (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); |

2155 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); |

2156 | /* Use SLP for strided accesses (or if we |

2157 | can't load-lanes). */ |

2158 | if (STMT_VINFO_STRIDED_P (stmt_vinfo) |

2159 | || ! vect_load_lanes_supported |

2160 | (STMT_VINFO_VECTYPE (stmt_vinfo), |

2161 | GROUP_SIZE (stmt_vinfo))) |

2162 | break; |

2163 | } |

2164 | if (i == loads.length ()) |

2165 | { |

2166 | if (dump_enabled_p ()) |

2167 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2168 | "Built SLP cancelled: can use " |

2169 | "load/store-lanes\n"); |

2170 | vect_free_slp_instance (new_instance); |

2171 | return false; |

2172 | } |

2173 | } |

2174 | |

2175 | vinfo->slp_instances.safe_push (new_instance); |

2176 | |

2177 | if (dump_enabled_p ()) |

2178 | { |

2179 | dump_printf_loc (MSG_NOTE, vect_location, |

2180 | "Final SLP tree for instance:\n"); |

2181 | vect_print_slp_tree (MSG_NOTE, vect_location, node); |

2182 | } |

2183 | |

2184 | return true; |

2185 | } |

2186 | } |

2187 | else |

2188 | { |

2189 | /* Failed to SLP. */ |

2190 | /* Free the allocated memory. */ |

2191 | scalar_stmts.release (); |

2192 | loads.release (); |

2193 | } |

2194 | |

2195 | /* For basic block SLP, try to break the group up into multiples of the |

2196 | vector size. */ |

2197 | if (is_a <bb_vec_info> (vinfo) |

2198 | && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) |

2199 | && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) |

2200 | { |

2201 | /* We consider breaking the group only on VF boundaries from the existing |

2202 | start. */ |

2203 | for (i = 0; i < group_size; i++) |

2204 | if (!matches[i]) break; |

2205 | |

2206 | if (i >= nunits && i < group_size) |

2207 | { |

2208 | /* Split into two groups at the first vector boundary before i. */ |

2209 | gcc_assert ((nunits & (nunits - 1)) == 0); |

2210 | unsigned group1_size = i & ~(nunits - 1); |

2211 | |

2212 | gimple *rest = vect_split_slp_store_group (stmt, group1_size); |

2213 | bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); |

2214 | /* If the first non-match was in the middle of a vector, |

2215 | skip the rest of that vector. */ |

2216 | if (group1_size < i) |

2217 | { |

2218 | i = group1_size + nunits; |

2219 | if (i < group_size) |

2220 | rest = vect_split_slp_store_group (rest, nunits); |

2221 | } |

2222 | if (i < group_size) |

2223 | res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); |

2224 | return res; |

2225 | } |

2226 | /* Even though the first vector did not all match, we might be able to SLP |

2227 | (some) of the remainder. FORNOW ignore this possibility. */ |

2228 | } |

2229 | |

2230 | return false; |

2231 | } |

2232 | |

2233 | |

2234 | /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP |

2235 | trees of packed scalar stmts if SLP is possible. */ |

2236 | |

2237 | bool |

2238 | vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) |

2239 | { |

2240 | unsigned int i; |

2241 | gimple *first_element; |

2242 | |

2243 | if (dump_enabled_p ()) |

2244 | dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n"); |

2245 | |

2246 | /* Find SLP sequences starting from groups of grouped stores. */ |

2247 | FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) |

2248 | vect_analyze_slp_instance (vinfo, first_element, max_tree_size); |

2249 | |

2250 | if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |

2251 | { |

2252 | if (loop_vinfo->reduction_chains.length () > 0) |

2253 | { |

2254 | /* Find SLP sequences starting from reduction chains. */ |

2255 | FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) |

2256 | if (! vect_analyze_slp_instance (vinfo, first_element, |

2257 | max_tree_size)) |

2258 | { |

2259 | /* Dissolve reduction chain group. */ |

2260 | gimple *next, *stmt = first_element; |

2261 | while (stmt) |

2262 | { |

2263 | stmt_vec_info vinfo = vinfo_for_stmt (stmt); |

2264 | next = GROUP_NEXT_ELEMENT (vinfo); |

2265 | GROUP_FIRST_ELEMENT (vinfo) = NULL; |

2266 | GROUP_NEXT_ELEMENT (vinfo) = NULL; |

2267 | stmt = next; |

2268 | } |

2269 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element)) |

2270 | = vect_internal_def; |

2271 | } |

2272 | } |

2273 | |

2274 | /* Find SLP sequences starting from groups of reductions. */ |

2275 | if (loop_vinfo->reductions.length () > 1) |

2276 | vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], |

2277 | max_tree_size); |

2278 | } |

2279 | |

2280 | return true; |

2281 | } |

2282 | |

2283 | |

2284 | /* For each possible SLP instance decide whether to SLP it and calculate overall |

2285 | unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at |

2286 | least one instance. */ |

2287 | |

2288 | bool |

2289 | vect_make_slp_decision (loop_vec_info loop_vinfo) |

2290 | { |

2291 | unsigned int i, unrolling_factor = 1; |

2292 | vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |

2293 | slp_instance instance; |

2294 | int decided_to_slp = 0; |

2295 | |

2296 | if (dump_enabled_p ()) |

2297 | dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===" |

2298 | "\n"); |

2299 | |

2300 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |

2301 | { |

2302 | /* FORNOW: SLP if you can. */ |

2303 | if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) |

2304 | unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); |

2305 | |

2306 | /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we |

2307 | call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and |

2308 | loop-based vectorization. Such stmts will be marked as HYBRID. */ |

2309 | vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); |

2310 | decided_to_slp++; |

2311 | } |

2312 | |

2313 | LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; |

2314 | |

2315 | if (decided_to_slp && dump_enabled_p ()) |

2316 | dump_printf_loc (MSG_NOTE, vect_location, |

2317 | "Decided to SLP %d instances. Unrolling factor %d\n", |

2318 | decided_to_slp, unrolling_factor); |

2319 | |

2320 | return (decided_to_slp > 0); |

2321 | } |

2322 | |

2323 | |

2324 | /* Find stmts that must be both vectorized and SLPed (since they feed stmts that |

2325 | can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ |

2326 | |

2327 | static void |

2328 | vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) |

2329 | { |

2330 | gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i]; |

2331 | imm_use_iterator imm_iter; |

2332 | gimple *use_stmt; |

2333 | stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt); |

2334 | slp_tree child; |

2335 | loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |

2336 | struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |

2337 | int j; |

2338 | |

2339 | /* Propagate hybrid down the SLP tree. */ |

2340 | if (stype == hybrid) |

2341 | ; |

2342 | else if (HYBRID_SLP_STMT (stmt_vinfo)) |

2343 | stype = hybrid; |

2344 | else |

2345 | { |

2346 | /* Check if a pure SLP stmt has uses in non-SLP stmts. */ |

2347 | gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo)); |

2348 | /* If we get a pattern stmt here we have to use the LHS of the |

2349 | original stmt for immediate uses. */ |

2350 | if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo) |

2351 | && STMT_VINFO_RELATED_STMT (stmt_vinfo)) |

2352 | stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); |

2353 | tree def; |

2354 | if (gimple_code (stmt) == GIMPLE_PHI) |

2355 | def = gimple_phi_result (stmt); |

2356 | else |

2357 | def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); |

2358 | if (def) |

2359 | FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) |

2360 | { |

2361 | if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |

2362 | continue; |

2363 | use_vinfo = vinfo_for_stmt (use_stmt); |

2364 | if (STMT_VINFO_IN_PATTERN_P (use_vinfo) |

2365 | && STMT_VINFO_RELATED_STMT (use_vinfo)) |

2366 | use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo)); |

2367 | if (!STMT_SLP_TYPE (use_vinfo) |

2368 | && (STMT_VINFO_RELEVANT (use_vinfo) |

2369 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |

2370 | && !(gimple_code (use_stmt) == GIMPLE_PHI |

2371 | && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) |

2372 | { |

2373 | if (dump_enabled_p ()) |

2374 | { |

2375 | dump_printf_loc (MSG_NOTE, vect_location, "use of SLP " |

2376 | "def in non-SLP stmt: "); |

2377 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0); |

2378 | } |

2379 | stype = hybrid; |

2380 | } |

2381 | } |

2382 | } |

2383 | |

2384 | if (stype == hybrid |

2385 | && !HYBRID_SLP_STMT (stmt_vinfo)) |

2386 | { |

2387 | if (dump_enabled_p ()) |

2388 | { |

2389 | dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); |

2390 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |

2391 | } |

2392 | STMT_SLP_TYPE (stmt_vinfo) = hybrid; |

2393 | } |

2394 | |

2395 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |

2396 | if (SLP_TREE_DEF_TYPE (child) != vect_external_def) |

2397 | vect_detect_hybrid_slp_stmts (child, i, stype); |

2398 | } |

2399 | |

2400 | /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */ |

2401 | |

2402 | static tree |

2403 | vect_detect_hybrid_slp_1 (tree *tp, int *, void *data) |

2404 | { |

2405 | walk_stmt_info *wi = (walk_stmt_info *)data; |

2406 | struct loop *loopp = (struct loop *)wi->info; |

2407 | |

2408 | if (wi->is_lhs) |

2409 | return NULL_TREE; |

2410 | |

2411 | if (TREE_CODE (*tp) == SSA_NAME |

2412 | && !SSA_NAME_IS_DEFAULT_DEF (*tp)) |

2413 | { |

2414 | gimple *def_stmt = SSA_NAME_DEF_STMT (*tp); |

2415 | if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt)) |

2416 | && PURE_SLP_STMT (vinfo_for_stmt (def_stmt))) |

2417 | { |

2418 | if (dump_enabled_p ()) |

2419 | { |

2420 | dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); |

2421 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); |

2422 | } |

2423 | STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid; |

2424 | } |

2425 | } |

2426 | |

2427 | return NULL_TREE; |

2428 | } |

2429 | |

2430 | static tree |

2431 | vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled, |

2432 | walk_stmt_info *) |

2433 | { |

2434 | stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi)); |

2435 | /* If the stmt is in a SLP instance then this isn't a reason |

2436 | to mark use definitions in other SLP instances as hybrid. */ |

2437 | if (! STMT_SLP_TYPE (use_vinfo) |

2438 | && (STMT_VINFO_RELEVANT (use_vinfo) |

2439 | || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |

2440 | && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI |

2441 | && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) |

2442 | ; |

2443 | else |

2444 | *handled = true; |

2445 | return NULL_TREE; |

2446 | } |

2447 | |

2448 | /* Find stmts that must be both vectorized and SLPed. */ |

2449 | |

2450 | void |

2451 | vect_detect_hybrid_slp (loop_vec_info loop_vinfo) |

2452 | { |

2453 | unsigned int i; |

2454 | vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |

2455 | slp_instance instance; |

2456 | |

2457 | if (dump_enabled_p ()) |

2458 | dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===" |

2459 | "\n"); |

2460 | |

2461 | /* First walk all pattern stmt in the loop and mark defs of uses as |

2462 | hybrid because immediate uses in them are not recorded. */ |

2463 | for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) |

2464 | { |

2465 | basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; |

2466 | for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); |

2467 | gsi_next (&gsi)) |

2468 | { |

2469 | gimple *stmt = gsi_stmt (gsi); |

2470 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |

2471 | if (STMT_VINFO_IN_PATTERN_P (stmt_info)) |

2472 | { |

2473 | walk_stmt_info wi; |

2474 | memset (&wi, 0, sizeof (wi)); |

2475 | wi.info = LOOP_VINFO_LOOP (loop_vinfo); |

2476 | gimple_stmt_iterator gsi2 |

2477 | = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); |

2478 | walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, |

2479 | vect_detect_hybrid_slp_1, &wi); |

2480 | walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), |

2481 | vect_detect_hybrid_slp_2, |

2482 | vect_detect_hybrid_slp_1, &wi); |

2483 | } |

2484 | } |

2485 | } |

2486 | |

2487 | /* Then walk the SLP instance trees marking stmts with uses in |

2488 | non-SLP stmts as hybrid, also propagating hybrid down the |

2489 | SLP tree, collecting the above info on-the-fly. */ |

2490 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |

2491 | { |

2492 | for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i) |

2493 | vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance), |

2494 | i, pure_slp); |

2495 | } |

2496 | } |

2497 | |

2498 | |

2499 | /* Initialize a bb_vec_info struct for the statements between |

2500 | REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */ |

2501 | |

2502 | _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in, |

2503 | gimple_stmt_iterator region_end_in) |

2504 | : vec_info (vec_info::bb, init_cost (NULL)), |

2505 | bb (gsi_bb (region_begin_in)), |

2506 | region_begin (region_begin_in), |

2507 | region_end (region_end_in) |

2508 | { |

2509 | gimple_stmt_iterator gsi; |

2510 | |

2511 | for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); |

2512 | gsi_next (&gsi)) |

2513 | { |

2514 | gimple *stmt = gsi_stmt (gsi); |

2515 | gimple_set_uid (stmt, 0); |

2516 | set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this)); |

2517 | } |

2518 | |

2519 | bb->aux = this; |

2520 | } |

2521 | |

2522 | |

2523 | /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the |

2524 | stmts in the basic block. */ |

2525 | |

2526 | _bb_vec_info::~_bb_vec_info () |

2527 | { |

2528 | for (gimple_stmt_iterator si = region_begin; |

2529 | gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si)) |

2530 | { |

2531 | gimple *stmt = gsi_stmt (si); |

2532 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |

2533 | |

2534 | if (stmt_info) |

2535 | /* Free stmt_vec_info. */ |

2536 | free_stmt_vec_info (stmt); |

2537 | |

2538 | /* Reset region marker. */ |

2539 | gimple_set_uid (stmt, -1); |

2540 | } |

2541 | |

2542 | bb->aux = NULL; |

2543 | } |

2544 | |

2545 | |

2546 | /* Analyze statements contained in SLP tree NODE after recursively analyzing |

2547 | the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. |

2548 | |

2549 | Return true if the operations are supported. */ |

2550 | |

2551 | static bool |

2552 | vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, |

2553 | slp_instance node_instance) |

2554 | { |

2555 | bool dummy; |

2556 | int i, j; |

2557 | gimple *stmt; |

2558 | slp_tree child; |

2559 | |

2560 | if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |

2561 | return true; |

2562 | |

2563 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

2564 | if (!vect_slp_analyze_node_operations (vinfo, child, node_instance)) |

2565 | return false; |

2566 | |

2567 | stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

2568 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |

2569 | gcc_assert (stmt_info); |

2570 | gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); |

2571 | |

2572 | /* For BB vectorization vector types are assigned here. |

2573 | Memory accesses already got their vector type assigned |

2574 | in vect_analyze_data_refs. */ |

2575 | bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); |

2576 | if (bb_vinfo |

2577 | && ! STMT_VINFO_DATA_REF (stmt_info)) |

2578 | { |

2579 | gcc_assert (PURE_SLP_STMT (stmt_info)); |

2580 | |

2581 | tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); |

2582 | if (dump_enabled_p ()) |

2583 | { |

2584 | dump_printf_loc (MSG_NOTE, vect_location, |

2585 | "get vectype for scalar type: "); |

2586 | dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); |

2587 | dump_printf (MSG_NOTE, "\n"); |

2588 | } |

2589 | |

2590 | tree vectype = get_vectype_for_scalar_type (scalar_type); |

2591 | if (!vectype) |

2592 | { |

2593 | if (dump_enabled_p ()) |

2594 | { |

2595 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2596 | "not SLPed: unsupported data-type "); |

2597 | dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

2598 | scalar_type); |

2599 | dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |

2600 | } |

2601 | return false; |

2602 | } |

2603 | |

2604 | if (dump_enabled_p ()) |

2605 | { |

2606 | dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); |

2607 | dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); |

2608 | dump_printf (MSG_NOTE, "\n"); |

2609 | } |

2610 | |

2611 | gimple *sstmt; |

2612 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt) |

2613 | STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; |

2614 | } |

2615 | |

2616 | /* Calculate the number of vector statements to be created for the |

2617 | scalar stmts in this node. For SLP reductions it is equal to the |

2618 | number of vector statements in the children (which has already been |

2619 | calculated by the recursive call). Otherwise it is the number of |

2620 | scalar elements in one scalar iteration (GROUP_SIZE) multiplied by |

2621 | VF divided by the number of elements in a vector. */ |

2622 | if (GROUP_FIRST_ELEMENT (stmt_info) |

2623 | && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) |

2624 | SLP_TREE_NUMBER_OF_VEC_STMTS (node) |

2625 | = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); |

2626 | else |

2627 | { |

2628 | int vf; |

2629 | if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |

2630 | vf = loop_vinfo->vectorization_factor; |

2631 | else |

2632 | vf = 1; |

2633 | unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); |

2634 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |

2635 | SLP_TREE_NUMBER_OF_VEC_STMTS (node) |

2636 | = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype); |

2637 | } |

2638 | |

2639 | /* Push SLP node def-type to stmt operands. */ |

2640 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |

2641 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

2642 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) |

2643 | = SLP_TREE_DEF_TYPE (child); |

2644 | bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance); |

2645 | /* Restore def-types. */ |

2646 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |

2647 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

2648 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) |

2649 | = vect_internal_def; |

2650 | if (! res) |

2651 | return false; |

2652 | |

2653 | return true; |

2654 | } |

2655 | |

2656 | |

2657 | /* Analyze statements in SLP instances of VINFO. Return true if the |

2658 | operations are supported. */ |

2659 | |

2660 | bool |

2661 | vect_slp_analyze_operations (vec_info *vinfo) |

2662 | { |

2663 | slp_instance instance; |

2664 | int i; |

2665 | |

2666 | if (dump_enabled_p ()) |

2667 | dump_printf_loc (MSG_NOTE, vect_location, |

2668 | "=== vect_slp_analyze_operations ===\n"); |

2669 | |

2670 | for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) |

2671 | { |

2672 | if (!vect_slp_analyze_node_operations (vinfo, |

2673 | SLP_INSTANCE_TREE (instance), |

2674 | instance)) |

2675 | { |

2676 | dump_printf_loc (MSG_NOTE, vect_location, |

2677 | "removing SLP instance operations starting from: "); |

2678 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, |

2679 | SLP_TREE_SCALAR_STMTS |

2680 | (SLP_INSTANCE_TREE (instance))[0], 0); |

2681 | vect_free_slp_instance (instance); |

2682 | vinfo->slp_instances.ordered_remove (i); |

2683 | } |

2684 | else |

2685 | { |

2686 | /* Compute the costs of the SLP instance. */ |

2687 | vect_analyze_slp_cost (instance, vinfo->target_cost_data); |

2688 | i++; |

2689 | } |

2690 | } |

2691 | |

2692 | return !vinfo->slp_instances.is_empty (); |

2693 | } |

2694 | |

2695 | |

2696 | /* Compute the scalar cost of the SLP node NODE and its children |

2697 | and return it. Do not account defs that are marked in LIFE and |

2698 | update LIFE according to uses of NODE. */ |

2699 | |

2700 | static unsigned |

2701 | vect_bb_slp_scalar_cost (basic_block bb, |

2702 | slp_tree node, vec<bool, va_heap> *life) |

2703 | { |

2704 | unsigned scalar_cost = 0; |

2705 | unsigned i; |

2706 | gimple *stmt; |

2707 | slp_tree child; |

2708 | |

2709 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

2710 | { |

2711 | unsigned stmt_cost; |

2712 | ssa_op_iter op_iter; |

2713 | def_operand_p def_p; |

2714 | stmt_vec_info stmt_info; |

2715 | |

2716 | if ((*life)[i]) |

2717 | continue; |

2718 | |

2719 | /* If there is a non-vectorized use of the defs then the scalar |

2720 | stmt is kept live in which case we do not account it or any |

2721 | required defs in the SLP children in the scalar cost. This |

2722 | way we make the vectorization more costly when compared to |

2723 | the scalar cost. */ |

2724 | FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |

2725 | { |

2726 | imm_use_iterator use_iter; |

2727 | gimple *use_stmt; |

2728 | FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) |

2729 | if (!is_gimple_debug (use_stmt) |

2730 | && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo, |

2731 | use_stmt) |

2732 | || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt)))) |

2733 | { |

2734 | (*life)[i] = true; |

2735 | BREAK_FROM_IMM_USE_STMT (use_iter); |

2736 | } |

2737 | } |

2738 | if ((*life)[i]) |

2739 | continue; |

2740 | |

2741 | /* Count scalar stmts only once. */ |

2742 | if (gimple_visited_p (stmt)) |

2743 | continue; |

2744 | gimple_set_visited (stmt, true); |

2745 | |

2746 | stmt_info = vinfo_for_stmt (stmt); |

2747 | if (STMT_VINFO_DATA_REF (stmt_info)) |

2748 | { |

2749 | if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |

2750 | stmt_cost = vect_get_stmt_cost (scalar_load); |

2751 | else |

2752 | stmt_cost = vect_get_stmt_cost (scalar_store); |

2753 | } |

2754 | else |

2755 | stmt_cost = vect_get_stmt_cost (scalar_stmt); |

2756 | |

2757 | scalar_cost += stmt_cost; |

2758 | } |

2759 | |

2760 | auto_vec<bool, 20> subtree_life; |

2761 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

2762 | { |

2763 | if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |

2764 | { |

2765 | /* Do not directly pass LIFE to the recursive call, copy it to |

2766 | confine changes in the callee to the current child/subtree. */ |

2767 | subtree_life.safe_splice (*life); |

2768 | scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life); |

2769 | subtree_life.truncate (0); |

2770 | } |

2771 | } |

2772 | |

2773 | return scalar_cost; |

2774 | } |

2775 | |

2776 | /* Check if vectorization of the basic block is profitable. */ |

2777 | |

2778 | static bool |

2779 | vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) |

2780 | { |

2781 | vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); |

2782 | slp_instance instance; |

2783 | int i; |

2784 | unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; |

2785 | unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; |

2786 | |

2787 | /* Calculate scalar cost. */ |

2788 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |

2789 | { |

2790 | auto_vec<bool, 20> life; |

2791 | life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); |

2792 | scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), |

2793 | SLP_INSTANCE_TREE (instance), |

2794 | &life); |

2795 | } |

2796 | |

2797 | /* Unset visited flag. */ |

2798 | for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; |

2799 | gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) |

2800 | gimple_set_visited (gsi_stmt (gsi), false); |

2801 | |

2802 | /* Complete the target-specific cost calculation. */ |

2803 | finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost, |

2804 | &vec_inside_cost, &vec_epilogue_cost); |

2805 | |

2806 | vec_outside_cost = vec_prologue_cost + vec_epilogue_cost; |

2807 | |

2808 | if (dump_enabled_p ()) |

2809 | { |

2810 | dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); |

2811 | dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n", |

2812 | vec_inside_cost); |

2813 | dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost); |

2814 | dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost); |

2815 | dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost); |

2816 | } |

2817 | |

2818 | /* Vectorization is profitable if its cost is more than the cost of scalar |

2819 | version. Note that we err on the vector side for equal cost because |

2820 | the cost estimate is otherwise quite pessimistic (constant uses are |

2821 | free on the scalar side but cost a load on the vector side for |

2822 | example). */ |

2823 | if (vec_outside_cost + vec_inside_cost > scalar_cost) |

2824 | return false; |

2825 | |

2826 | return true; |

2827 | } |

2828 | |

2829 | /* Check if the basic block can be vectorized. Returns a bb_vec_info |

2830 | if so and sets fatal to true if failure is independent of |

2831 | current_vector_size. */ |

2832 | |

2833 | static bb_vec_info |

2834 | vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, |

2835 | gimple_stmt_iterator region_end, |

2836 | vec<data_reference_p> datarefs, int n_stmts, |

2837 | bool &fatal) |

2838 | { |

2839 | bb_vec_info bb_vinfo; |

2840 | slp_instance instance; |

2841 | int i; |

2842 | int min_vf = 2; |

2843 | |

2844 | /* The first group of checks is independent of the vector size. */ |

2845 | fatal = true; |

2846 | |

2847 | if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) |

2848 | { |

2849 | if (dump_enabled_p ()) |

2850 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2851 | "not vectorized: too many instructions in " |

2852 | "basic block.\n"); |

2853 | free_data_refs (datarefs); |

2854 | return NULL; |

2855 | } |

2856 | |

2857 | bb_vinfo = new _bb_vec_info (region_begin, region_end); |

2858 | if (!bb_vinfo) |

2859 | return NULL; |

2860 | |

2861 | BB_VINFO_DATAREFS (bb_vinfo) = datarefs; |

2862 | |

2863 | /* Analyze the data references. */ |

2864 | |

2865 | if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) |

2866 | { |

2867 | if (dump_enabled_p ()) |

2868 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2869 | "not vectorized: unhandled data-ref in basic " |

2870 | "block.\n"); |

2871 | |

2872 | delete bb_vinfo; |

2873 | return NULL; |

2874 | } |

2875 | |

2876 | if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2) |

2877 | { |

2878 | if (dump_enabled_p ()) |

2879 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2880 | "not vectorized: not enough data-refs in " |

2881 | "basic block.\n"); |

2882 | |

2883 | delete bb_vinfo; |

2884 | return NULL; |

2885 | } |

2886 | |

2887 | if (!vect_analyze_data_ref_accesses (bb_vinfo)) |

2888 | { |

2889 | if (dump_enabled_p ()) |

2890 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2891 | "not vectorized: unhandled data access in " |

2892 | "basic block.\n"); |

2893 | |

2894 | delete bb_vinfo; |

2895 | return NULL; |

2896 | } |

2897 | |

2898 | /* If there are no grouped stores in the region there is no need |

2899 | to continue with pattern recog as vect_analyze_slp will fail |

2900 | anyway. */ |

2901 | if (bb_vinfo->grouped_stores.is_empty ()) |

2902 | { |

2903 | if (dump_enabled_p ()) |

2904 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2905 | "not vectorized: no grouped stores in " |

2906 | "basic block.\n"); |

2907 | |

2908 | delete bb_vinfo; |

2909 | return NULL; |

2910 | } |

2911 | |

2912 | /* While the rest of the analysis below depends on it in some way. */ |

2913 | fatal = false; |

2914 | |

2915 | vect_pattern_recog (bb_vinfo); |

2916 | |

2917 | /* Check the SLP opportunities in the basic block, analyze and build SLP |

2918 | trees. */ |

2919 | if (!vect_analyze_slp (bb_vinfo, n_stmts)) |

2920 | { |

2921 | if (dump_enabled_p ()) |

2922 | { |

2923 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2924 | "Failed to SLP the basic block.\n"); |

2925 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2926 | "not vectorized: failed to find SLP opportunities " |

2927 | "in basic block.\n"); |

2928 | } |

2929 | |

2930 | delete bb_vinfo; |

2931 | return NULL; |

2932 | } |

2933 | |

2934 | vect_record_base_alignments (bb_vinfo); |

2935 | |

2936 | /* Analyze and verify the alignment of data references and the |

2937 | dependence in the SLP instances. */ |

2938 | for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) |

2939 | { |

2940 | if (! vect_slp_analyze_and_verify_instance_alignment (instance) |

2941 | || ! vect_slp_analyze_instance_dependence (instance)) |

2942 | { |

2943 | dump_printf_loc (MSG_NOTE, vect_location, |

2944 | "removing SLP instance operations starting from: "); |

2945 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, |

2946 | SLP_TREE_SCALAR_STMTS |

2947 | (SLP_INSTANCE_TREE (instance))[0], 0); |

2948 | vect_free_slp_instance (instance); |

2949 | BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i); |

2950 | continue; |

2951 | } |

2952 | |

2953 | /* Mark all the statements that we want to vectorize as pure SLP and |

2954 | relevant. */ |

2955 | vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); |

2956 | vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance)); |

2957 | |

2958 | i++; |

2959 | } |

2960 | if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ()) |

2961 | { |

2962 | delete bb_vinfo; |

2963 | return NULL; |

2964 | } |

2965 | |

2966 | if (!vect_slp_analyze_operations (bb_vinfo)) |

2967 | { |

2968 | if (dump_enabled_p ()) |

2969 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2970 | "not vectorized: bad operation in basic block.\n"); |

2971 | |

2972 | delete bb_vinfo; |

2973 | return NULL; |

2974 | } |

2975 | |

2976 | /* Cost model: check if the vectorization is worthwhile. */ |

2977 | if (!unlimited_cost_model (NULL) |

2978 | && !vect_bb_vectorization_profitable_p (bb_vinfo)) |

2979 | { |

2980 | if (dump_enabled_p ()) |

2981 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

2982 | "not vectorized: vectorization is not " |

2983 | "profitable.\n"); |

2984 | |

2985 | delete bb_vinfo; |

2986 | return NULL; |

2987 | } |

2988 | |

2989 | if (dump_enabled_p ()) |

2990 | dump_printf_loc (MSG_NOTE, vect_location, |

2991 | "Basic block will be vectorized using SLP\n"); |

2992 | |

2993 | return bb_vinfo; |

2994 | } |

2995 | |

2996 | |

2997 | /* Main entry for the BB vectorizer. Analyze and transform BB, returns |

2998 | true if anything in the basic-block was vectorized. */ |

2999 | |

3000 | bool |

3001 | vect_slp_bb (basic_block bb) |

3002 | { |

3003 | bb_vec_info bb_vinfo; |

3004 | gimple_stmt_iterator gsi; |

3005 | unsigned int vector_sizes; |

3006 | bool any_vectorized = false; |

3007 | |

3008 | if (dump_enabled_p ()) |

3009 | dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n"); |

3010 | |

3011 | /* Autodetect first vector size we try. */ |

3012 | current_vector_size = 0; |

3013 | vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); |

3014 | |

3015 | gsi = gsi_start_bb (bb); |

3016 | |

3017 | while (1) |

3018 | { |

3019 | if (gsi_end_p (gsi)) |

3020 | break; |

3021 | |

3022 | gimple_stmt_iterator region_begin = gsi; |

3023 | vec<data_reference_p> datarefs = vNULL; |

3024 | int insns = 0; |

3025 | |

3026 | for (; !gsi_end_p (gsi); gsi_next (&gsi)) |

3027 | { |

3028 | gimple *stmt = gsi_stmt (gsi); |

3029 | if (is_gimple_debug (stmt)) |

3030 | continue; |

3031 | insns++; |

3032 | |

3033 | if (gimple_location (stmt) != UNKNOWN_LOCATION) |

3034 | vect_location = gimple_location (stmt); |

3035 | |

3036 | if (!find_data_references_in_stmt (NULL, stmt, &datarefs)) |

3037 | break; |

3038 | } |

3039 | |

3040 | /* Skip leading unhandled stmts. */ |

3041 | if (gsi_stmt (region_begin) == gsi_stmt (gsi)) |

3042 | { |

3043 | gsi_next (&gsi); |

3044 | continue; |

3045 | } |

3046 | |

3047 | gimple_stmt_iterator region_end = gsi; |

3048 | |

3049 | bool vectorized = false; |

3050 | bool fatal = false; |

3051 | bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end, |

3052 | datarefs, insns, fatal); |

3053 | if (bb_vinfo |

3054 | && dbg_cnt (vect_slp)) |

3055 | { |

3056 | if (dump_enabled_p ()) |

3057 | dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); |

3058 | |

3059 | vect_schedule_slp (bb_vinfo); |

3060 | |

3061 | if (dump_enabled_p ()) |

3062 | dump_printf_loc (MSG_NOTE, vect_location, |

3063 | "basic block part vectorized\n"); |

3064 | |

3065 | vectorized = true; |

3066 | } |

3067 | delete bb_vinfo; |

3068 | |

3069 | any_vectorized |= vectorized; |

3070 | |

3071 | vector_sizes &= ~current_vector_size; |

3072 | if (vectorized |

3073 | || vector_sizes == 0 |

3074 | || current_vector_size == 0 |

3075 | /* If vect_slp_analyze_bb_1 signaled that analysis for all |

3076 | vector sizes will fail do not bother iterating. */ |

3077 | || fatal) |

3078 | { |

3079 | if (gsi_end_p (region_end)) |

3080 | break; |

3081 | |

3082 | /* Skip the unhandled stmt. */ |

3083 | gsi_next (&gsi); |

3084 | |

3085 | /* And reset vector sizes. */ |

3086 | current_vector_size = 0; |

3087 | vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); |

3088 | } |

3089 | else |

3090 | { |

3091 | /* Try the next biggest vector size. */ |

3092 | current_vector_size = 1 << floor_log2 (vector_sizes); |

3093 | if (dump_enabled_p ()) |

3094 | dump_printf_loc (MSG_NOTE, vect_location, |

3095 | "***** Re-trying analysis with " |

3096 | "vector size %d\n", current_vector_size); |

3097 | |

3098 | /* Start over. */ |

3099 | gsi = region_begin; |

3100 | } |

3101 | } |

3102 | |

3103 | return any_vectorized; |

3104 | } |

3105 | |

3106 | |

3107 | /* Return 1 if vector type of boolean constant which is OPNUM |

3108 | operand in statement STMT is a boolean vector. */ |

3109 | |

3110 | static bool |

3111 | vect_mask_constant_operand_p (gimple *stmt, int opnum) |

3112 | { |

3113 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); |

3114 | enum tree_code code = gimple_expr_code (stmt); |

3115 | tree op, vectype; |

3116 | gimple *def_stmt; |

3117 | enum vect_def_type dt; |

3118 | |

3119 | /* For comparison and COND_EXPR type is chosen depending |

3120 | on the other comparison operand. */ |

3121 | if (TREE_CODE_CLASS (code) == tcc_comparison) |

3122 | { |

3123 | if (opnum) |

3124 | op = gimple_assign_rhs1 (stmt); |

3125 | else |

3126 | op = gimple_assign_rhs2 (stmt); |

3127 | |

3128 | if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, |

3129 | &dt, &vectype)) |

3130 | gcc_unreachable (); |

3131 | |

3132 | return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); |

3133 | } |

3134 | |

3135 | if (code == COND_EXPR) |

3136 | { |

3137 | tree cond = gimple_assign_rhs1 (stmt); |

3138 | |

3139 | if (TREE_CODE (cond) == SSA_NAME) |

3140 | op = cond; |

3141 | else if (opnum) |

3142 | op = TREE_OPERAND (cond, 1); |

3143 | else |

3144 | op = TREE_OPERAND (cond, 0); |

3145 | |

3146 | if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, |

3147 | &dt, &vectype)) |

3148 | gcc_unreachable (); |

3149 | |

3150 | return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); |

3151 | } |

3152 | |

3153 | return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); |

3154 | } |

3155 | |

3156 | |

3157 | /* For constant and loop invariant defs of SLP_NODE this function returns |

3158 | (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. |

3159 | OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of |

3160 | scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create. |

3161 | REDUC_INDEX is the index of the reduction operand in the statements, unless |

3162 | it is -1. */ |

3163 | |

3164 | static void |

3165 | vect_get_constant_vectors (tree op, slp_tree slp_node, |

3166 | vec<tree> *vec_oprnds, |

3167 | unsigned int op_num, unsigned int number_of_vectors) |

3168 | { |

3169 | vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); |

3170 | gimple *stmt = stmts[0]; |

3171 | stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); |

3172 | unsigned nunits; |

3173 | tree vec_cst; |

3174 | unsigned j, number_of_places_left_in_vector; |

3175 | tree vector_type; |

3176 | tree vop; |

3177 | int group_size = stmts.length (); |

3178 | unsigned int vec_num, i; |

3179 | unsigned number_of_copies = 1; |

3180 | vec<tree> voprnds; |

3181 | voprnds.create (number_of_vectors); |

3182 | bool constant_p, is_store; |

3183 | tree neutral_op = NULL; |

3184 | enum tree_code code = gimple_expr_code (stmt); |

3185 | gimple_seq ctor_seq = NULL; |

3186 | |

3187 | /* Check if vector type is a boolean vector. */ |

3188 | if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) |

3189 | && vect_mask_constant_operand_p (stmt, op_num)) |

3190 | vector_type |

3191 | = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); |

3192 | else |

3193 | vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); |

3194 | nunits = TYPE_VECTOR_SUBPARTS (vector_type); |

3195 | |

3196 | if (STMT_VINFO_DATA_REF (stmt_vinfo)) |

3197 | { |

3198 | is_store = true; |

3199 | op = gimple_assign_rhs1 (stmt); |

3200 | } |

3201 | else |

3202 | is_store = false; |

3203 | |

3204 | gcc_assert (op); |

3205 | |

3206 | /* NUMBER_OF_COPIES is the number of times we need to use the same values in |

3207 | created vectors. It is greater than 1 if unrolling is performed. |

3208 | |

3209 | For example, we have two scalar operands, s1 and s2 (e.g., group of |

3210 | strided accesses of size two), while NUNITS is four (i.e., four scalars |

3211 | of this type can be packed in a vector). The output vector will contain |

3212 | two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES |

3213 | will be 2). |

3214 | |

3215 | If GROUP_SIZE > NUNITS, the scalars will be split into several vectors |

3216 | containing the operands. |

3217 | |

3218 | For example, NUNITS is four as before, and the group size is 8 |

3219 | (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and |

3220 | {s5, s6, s7, s8}. */ |

3221 | |

3222 | number_of_copies = nunits * number_of_vectors / group_size; |

3223 | |

3224 | number_of_places_left_in_vector = nunits; |

3225 | constant_p = true; |

3226 | tree_vector_builder elts (vector_type, nunits, 1); |

3227 | elts.quick_grow (nunits); |

3228 | bool place_after_defs = false; |

3229 | for (j = 0; j < number_of_copies; j++) |

3230 | { |

3231 | for (i = group_size - 1; stmts.iterate (i, &stmt); i--) |

3232 | { |

3233 | if (is_store) |

3234 | op = gimple_assign_rhs1 (stmt); |

3235 | else |

3236 | { |

3237 | switch (code) |

3238 | { |

3239 | case COND_EXPR: |

3240 | { |

3241 | tree cond = gimple_assign_rhs1 (stmt); |

3242 | if (TREE_CODE (cond) == SSA_NAME) |

3243 | op = gimple_op (stmt, op_num + 1); |

3244 | else if (op_num == 0 || op_num == 1) |

3245 | op = TREE_OPERAND (cond, op_num); |

3246 | else |

3247 | { |

3248 | if (op_num == 2) |

3249 | op = gimple_assign_rhs2 (stmt); |

3250 | else |

3251 | op = gimple_assign_rhs3 (stmt); |

3252 | } |

3253 | } |

3254 | break; |

3255 | |

3256 | case CALL_EXPR: |

3257 | op = gimple_call_arg (stmt, op_num); |

3258 | break; |

3259 | |

3260 | case LSHIFT_EXPR: |

3261 | case RSHIFT_EXPR: |

3262 | case LROTATE_EXPR: |

3263 | case RROTATE_EXPR: |

3264 | op = gimple_op (stmt, op_num + 1); |

3265 | /* Unlike the other binary operators, shifts/rotates have |

3266 | the shift count being int, instead of the same type as |

3267 | the lhs, so make sure the scalar is the right type if |

3268 | we are dealing with vectors of |

3269 | long long/long/short/char. */ |

3270 | if (op_num == 1 && TREE_CODE (op) == INTEGER_CST) |

3271 | op = fold_convert (TREE_TYPE (vector_type), op); |

3272 | break; |

3273 | |

3274 | default: |

3275 | op = gimple_op (stmt, op_num + 1); |

3276 | break; |

3277 | } |

3278 | } |

3279 | |

3280 | /* Create 'vect_ = {op0,op1,...,opn}'. */ |

3281 | number_of_places_left_in_vector--; |

3282 | tree orig_op = op; |

3283 | if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op))) |

3284 | { |

3285 | if (CONSTANT_CLASS_P (op)) |

3286 | { |

3287 | if (VECTOR_BOOLEAN_TYPE_P (vector_type)) |

3288 | { |

3289 | /* Can't use VIEW_CONVERT_EXPR for booleans because |

3290 | of possibly different sizes of scalar value and |

3291 | vector element. */ |

3292 | if (integer_zerop (op)) |

3293 | op = build_int_cst (TREE_TYPE (vector_type), 0); |

3294 | else if (integer_onep (op)) |

3295 | op = build_all_ones_cst (TREE_TYPE (vector_type)); |

3296 | else |

3297 | gcc_unreachable (); |

3298 | } |

3299 | else |

3300 | op = fold_unary (VIEW_CONVERT_EXPR, |

3301 | TREE_TYPE (vector_type), op); |

3302 | gcc_assert (op && CONSTANT_CLASS_P (op)); |

3303 | } |

3304 | else |

3305 | { |

3306 | tree new_temp = make_ssa_name (TREE_TYPE (vector_type)); |

3307 | gimple *init_stmt; |

3308 | if (VECTOR_BOOLEAN_TYPE_P (vector_type)) |

3309 | { |

3310 | tree true_val |

3311 | = build_all_ones_cst (TREE_TYPE (vector_type)); |

3312 | tree false_val |

3313 | = build_zero_cst (TREE_TYPE (vector_type)); |

3314 | gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op))); |

3315 | init_stmt = gimple_build_assign (new_temp, COND_EXPR, |

3316 | op, true_val, |

3317 | false_val); |

3318 | } |

3319 | else |

3320 | { |

3321 | op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), |

3322 | op); |

3323 | init_stmt |

3324 | = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, |

3325 | op); |

3326 | } |

3327 | gimple_seq_add_stmt (&ctor_seq, init_stmt); |

3328 | op = new_temp; |

3329 | } |

3330 | } |

3331 | elts[number_of_places_left_in_vector] = op; |

3332 | if (!CONSTANT_CLASS_P (op)) |

3333 | constant_p = false; |

3334 | if (TREE_CODE (orig_op) == SSA_NAME |

3335 | && !SSA_NAME_IS_DEFAULT_DEF (orig_op) |

3336 | && STMT_VINFO_BB_VINFO (stmt_vinfo) |

3337 | && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb |

3338 | == gimple_bb (SSA_NAME_DEF_STMT (orig_op)))) |

3339 | place_after_defs = true; |

3340 | |

3341 | if (number_of_places_left_in_vector == 0) |

3342 | { |

3343 | if (constant_p) |

3344 | vec_cst = elts.build (); |

3345 | else |

3346 | { |

3347 | vec<constructor_elt, va_gc> *v; |

3348 | unsigned k; |

3349 | vec_alloc (v, nunits); |

3350 | for (k = 0; k < nunits; ++k) |

3351 | CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); |

3352 | vec_cst = build_constructor (vector_type, v); |

3353 | } |

3354 | tree init; |

3355 | gimple_stmt_iterator gsi; |

3356 | if (place_after_defs) |

3357 | { |

3358 | gsi = gsi_for_stmt |

3359 | (vect_find_last_scalar_stmt_in_slp (slp_node)); |

3360 | init = vect_init_vector (stmt, vec_cst, vector_type, &gsi); |

3361 | } |

3362 | else |

3363 | init = vect_init_vector (stmt, vec_cst, vector_type, NULL); |

3364 | if (ctor_seq != NULL) |

3365 | { |

3366 | gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); |

3367 | gsi_insert_seq_before_without_update (&gsi, ctor_seq, |

3368 | GSI_SAME_STMT); |

3369 | ctor_seq = NULL; |

3370 | } |

3371 | voprnds.quick_push (init); |

3372 | place_after_defs = false; |

3373 | number_of_places_left_in_vector = nunits; |

3374 | constant_p = true; |

3375 | elts.new_vector (vector_type, nunits, 1); |

3376 | elts.quick_grow (nunits); |

3377 | } |

3378 | } |

3379 | } |

3380 | |

3381 | /* Since the vectors are created in the reverse order, we should invert |

3382 | them. */ |

3383 | vec_num = voprnds.length (); |

3384 | for (j = vec_num; j != 0; j--) |

3385 | { |

3386 | vop = voprnds[j - 1]; |

3387 | vec_oprnds->quick_push (vop); |

3388 | } |

3389 | |

3390 | voprnds.release (); |

3391 | |

3392 | /* In case that VF is greater than the unrolling factor needed for the SLP |

3393 | group of stmts, NUMBER_OF_VECTORS to be created is greater than |

3394 | NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have |

3395 | to replicate the vectors. */ |

3396 | while (number_of_vectors > vec_oprnds->length ()) |

3397 | { |

3398 | tree neutral_vec = NULL; |

3399 | |

3400 | if (neutral_op) |

3401 | { |

3402 | if (!neutral_vec) |

3403 | neutral_vec = build_vector_from_val (vector_type, neutral_op); |

3404 | |

3405 | vec_oprnds->quick_push (neutral_vec); |

3406 | } |

3407 | else |

3408 | { |

3409 | for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++) |

3410 | vec_oprnds->quick_push (vop); |

3411 | } |

3412 | } |

3413 | } |

3414 | |

3415 | |

3416 | /* Get vectorized definitions from SLP_NODE that contains corresponding |

3417 | vectorized def-stmts. */ |

3418 | |

3419 | static void |

3420 | vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds) |

3421 | { |

3422 | tree vec_oprnd; |

3423 | gimple *vec_def_stmt; |

3424 | unsigned int i; |

3425 | |

3426 | gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ()); |

3427 | |

3428 | FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt) |

3429 | { |

3430 | gcc_assert (vec_def_stmt); |

3431 | if (gimple_code (vec_def_stmt) == GIMPLE_PHI) |

3432 | vec_oprnd = gimple_phi_result (vec_def_stmt); |

3433 | else |

3434 | vec_oprnd = gimple_get_lhs (vec_def_stmt); |

3435 | vec_oprnds->quick_push (vec_oprnd); |

3436 | } |

3437 | } |

3438 | |

3439 | |

3440 | /* Get vectorized definitions for SLP_NODE. |

3441 | If the scalar definitions are loop invariants or constants, collect them and |

3442 | call vect_get_constant_vectors() to create vector stmts. |

3443 | Otherwise, the def-stmts must be already vectorized and the vectorized stmts |

3444 | must be stored in the corresponding child of SLP_NODE, and we call |

3445 | vect_get_slp_vect_defs () to retrieve them. */ |

3446 | |

3447 | void |

3448 | vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, |

3449 | vec<vec<tree> > *vec_oprnds) |

3450 | { |

3451 | gimple *first_stmt; |

3452 | int number_of_vects = 0, i; |

3453 | unsigned int child_index = 0; |

3454 | HOST_WIDE_INT lhs_size_unit, rhs_size_unit; |

3455 | slp_tree child = NULL; |

3456 | vec<tree> vec_defs; |

3457 | tree oprnd; |

3458 | bool vectorized_defs; |

3459 | |

3460 | first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; |

3461 | FOR_EACH_VEC_ELT (ops, i, oprnd) |

3462 | { |

3463 | /* For each operand we check if it has vectorized definitions in a child |

3464 | node or we need to create them (for invariants and constants). We |

3465 | check if the LHS of the first stmt of the next child matches OPRND. |

3466 | If it does, we found the correct child. Otherwise, we call |

3467 | vect_get_constant_vectors (), and not advance CHILD_INDEX in order |

3468 | to check this child node for the next operand. */ |

3469 | vectorized_defs = false; |

3470 | if (SLP_TREE_CHILDREN (slp_node).length () > child_index) |

3471 | { |

3472 | child = SLP_TREE_CHILDREN (slp_node)[child_index]; |

3473 | |

3474 | /* We have to check both pattern and original def, if available. */ |

3475 | if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |

3476 | { |

3477 | gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0]; |

3478 | gimple *related |

3479 | = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); |

3480 | tree first_def_op; |

3481 | |

3482 | if (gimple_code (first_def) == GIMPLE_PHI) |

3483 | first_def_op = gimple_phi_result (first_def); |

3484 | else |

3485 | first_def_op = gimple_get_lhs (first_def); |

3486 | if (operand_equal_p (oprnd, first_def_op, 0) |

3487 | || (related |

3488 | && operand_equal_p (oprnd, gimple_get_lhs (related), 0))) |

3489 | { |

3490 | /* The number of vector defs is determined by the number of |

3491 | vector statements in the node from which we get those |

3492 | statements. */ |

3493 | number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); |

3494 | vectorized_defs = true; |

3495 | child_index++; |

3496 | } |

3497 | } |

3498 | else |

3499 | child_index++; |

3500 | } |

3501 | |

3502 | if (!vectorized_defs) |

3503 | { |

3504 | if (i == 0) |

3505 | { |

3506 | number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); |

3507 | /* Number of vector stmts was calculated according to LHS in |

3508 | vect_schedule_slp_instance (), fix it by replacing LHS with |

3509 | RHS, if necessary. See vect_get_smallest_scalar_type () for |

3510 | details. */ |

3511 | vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, |

3512 | &rhs_size_unit); |

3513 | if (rhs_size_unit != lhs_size_unit) |

3514 | { |

3515 | number_of_vects *= rhs_size_unit; |

3516 | number_of_vects /= lhs_size_unit; |

3517 | } |

3518 | } |

3519 | } |

3520 | |

3521 | /* Allocate memory for vectorized defs. */ |

3522 | vec_defs = vNULL; |

3523 | vec_defs.create (number_of_vects); |

3524 | |

3525 | /* For reduction defs we call vect_get_constant_vectors (), since we are |

3526 | looking for initial loop invariant values. */ |

3527 | if (vectorized_defs) |

3528 | /* The defs are already vectorized. */ |

3529 | vect_get_slp_vect_defs (child, &vec_defs); |

3530 | else |

3531 | /* Build vectors from scalar defs. */ |

3532 | vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i, |

3533 | number_of_vects); |

3534 | |

3535 | vec_oprnds->quick_push (vec_defs); |

3536 | } |

3537 | } |

3538 | |

3539 | /* Generate vector permute statements from a list of loads in DR_CHAIN. |

3540 | If ANALYZE_ONLY is TRUE, only check that it is possible to create valid |

3541 | permute statements for the SLP node NODE of the SLP instance |

3542 | SLP_NODE_INSTANCE. */ |

3543 | |

3544 | bool |

3545 | vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, |

3546 | gimple_stmt_iterator *gsi, int vf, |

3547 | slp_instance slp_node_instance, bool analyze_only, |

3548 | unsigned *n_perms) |

3549 | { |

3550 | gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

3551 | stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |

3552 | tree mask_element_type = NULL_TREE, mask_type; |

3553 | int nunits, vec_index = 0; |

3554 | tree vectype = STMT_VINFO_VECTYPE (stmt_info); |

3555 | int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); |

3556 | int mask_element; |

3557 | machine_mode mode; |

3558 | |

3559 | if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) |

3560 | return false; |

3561 | |

3562 | stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); |

3563 | |

3564 | mode = TYPE_MODE (vectype); |

3565 | |

3566 | /* The generic VEC_PERM_EXPR code always uses an integral type of the |

3567 | same size as the vector element being permuted. */ |

3568 | mask_element_type = lang_hooks.types.type_for_mode |

3569 | (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); |

3570 | mask_type = get_vectype_for_scalar_type (mask_element_type); |

3571 | nunits = TYPE_VECTOR_SUBPARTS (vectype); |

3572 | auto_vec_perm_indices mask (nunits); |

3573 | mask.quick_grow (nunits); |

3574 | |

3575 | /* Initialize the vect stmts of NODE to properly insert the generated |

3576 | stmts later. */ |

3577 | if (! analyze_only) |

3578 | for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); |

3579 | i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++) |

3580 | SLP_TREE_VEC_STMTS (node).quick_push (NULL); |

3581 | |

3582 | /* Generate permutation masks for every NODE. Number of masks for each NODE |

3583 | is equal to GROUP_SIZE. |

3584 | E.g., we have a group of three nodes with three loads from the same |

3585 | location in each node, and the vector size is 4. I.e., we have a |

3586 | a0b0c0a1b1c1... sequence and we need to create the following vectors: |

3587 | for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3 |

3588 | for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3 |

3589 | ... |

3590 | |

3591 | The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}. |

3592 | The last mask is illegal since we assume two operands for permute |

3593 | operation, and the mask element values can't be outside that range. |

3594 | Hence, the last mask must be converted into {2,5,5,5}. |

3595 | For the first two permutations we need the first and the second input |

3596 | vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation |

3597 | we need the second and the third vectors: {b1,c1,a2,b2} and |

3598 | {c2,a3,b3,c3}. */ |

3599 | |

3600 | int vect_stmts_counter = 0; |

3601 | int index = 0; |

3602 | int first_vec_index = -1; |

3603 | int second_vec_index = -1; |

3604 | bool noop_p = true; |

3605 | *n_perms = 0; |

3606 | |

3607 | for (int j = 0; j < vf; j++) |

3608 | { |

3609 | for (int k = 0; k < group_size; k++) |

3610 | { |

3611 | int i = (SLP_TREE_LOAD_PERMUTATION (node)[k] |

3612 | + j * STMT_VINFO_GROUP_SIZE (stmt_info)); |

3613 | vec_index = i / nunits; |

3614 | mask_element = i % nunits; |

3615 | if (vec_index == first_vec_index |

3616 | || first_vec_index == -1) |

3617 | { |

3618 | first_vec_index = vec_index; |

3619 | } |

3620 | else if (vec_index == second_vec_index |

3621 | || second_vec_index == -1) |

3622 | { |

3623 | second_vec_index = vec_index; |

3624 | mask_element += nunits; |

3625 | } |

3626 | else |

3627 | { |

3628 | if (dump_enabled_p ()) |

3629 | { |

3630 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |

3631 | "permutation requires at " |

3632 | "least three vectors "); |

3633 | dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |

3634 | stmt, 0); |

3635 | } |

3636 | gcc_assert (analyze_only); |

3637 | return false; |

3638 | } |

3639 | |

3640 | gcc_assert (mask_element >= 0 |

3641 | && mask_element < 2 * nunits); |

3642 | if (mask_element != index) |

3643 | noop_p = false; |

3644 | mask[index++] = mask_element; |

3645 | |

3646 | if (index == nunits) |

3647 | { |

3648 | if (! noop_p |

3649 | && ! can_vec_perm_p (mode, false, &mask)) |

3650 | { |

3651 | if (dump_enabled_p ()) |

3652 | { |

3653 | dump_printf_loc (MSG_MISSED_OPTIMIZATION, |

3654 | vect_location, |

3655 | "unsupported vect permute { "); |

3656 | for (i = 0; i < nunits; ++i) |

3657 | dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]); |

3658 | dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); |

3659 | } |

3660 | gcc_assert (analyze_only); |

3661 | return false; |

3662 | } |

3663 | |

3664 | if (! noop_p) |

3665 | ++*n_perms; |

3666 | |

3667 | if (!analyze_only) |

3668 | { |

3669 | tree mask_vec = NULL_TREE; |

3670 | |

3671 | if (! noop_p) |

3672 | { |

3673 | tree_vector_builder mask_elts (mask_type, nunits, 1); |

3674 | for (int l = 0; l < nunits; ++l) |

3675 | mask_elts.quick_push (build_int_cst (mask_element_type, |

3676 | mask[l])); |

3677 | mask_vec = mask_elts.build (); |

3678 | } |

3679 | |

3680 | if (second_vec_index == -1) |

3681 | second_vec_index = first_vec_index; |

3682 | |

3683 | /* Generate the permute statement if necessary. */ |

3684 | tree first_vec = dr_chain[first_vec_index]; |

3685 | tree second_vec = dr_chain[second_vec_index]; |

3686 | gimple *perm_stmt; |

3687 | if (! noop_p) |

3688 | { |

3689 | tree perm_dest |

3690 | = vect_create_destination_var (gimple_assign_lhs (stmt), |

3691 | vectype); |

3692 | perm_dest = make_ssa_name (perm_dest); |

3693 | perm_stmt = gimple_build_assign (perm_dest, |

3694 | VEC_PERM_EXPR, |

3695 | first_vec, second_vec, |

3696 | mask_vec); |

3697 | vect_finish_stmt_generation (stmt, perm_stmt, gsi); |

3698 | } |

3699 | else |

3700 | /* If mask was NULL_TREE generate the requested |

3701 | identity transform. */ |

3702 | perm_stmt = SSA_NAME_DEF_STMT (first_vec); |

3703 | |

3704 | /* Store the vector statement in NODE. */ |

3705 | SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; |

3706 | } |

3707 | |

3708 | index = 0; |

3709 | first_vec_index = -1; |

3710 | second_vec_index = -1; |

3711 | noop_p = true; |

3712 | } |

3713 | } |

3714 | } |

3715 | |

3716 | return true; |

3717 | } |

3718 | |

3719 | typedef hash_map <vec <gimple *>, slp_tree, |

3720 | simple_hashmap_traits <bst_traits, slp_tree> > |

3721 | scalar_stmts_to_slp_tree_map_t; |

3722 | |

3723 | /* Vectorize SLP instance tree in postorder. */ |

3724 | |

3725 | static bool |

3726 | vect_schedule_slp_instance (slp_tree node, slp_instance instance, |

3727 | scalar_stmts_to_slp_tree_map_t *bst_map) |

3728 | { |

3729 | gimple *stmt; |

3730 | bool grouped_store, is_store; |

3731 | gimple_stmt_iterator si; |

3732 | stmt_vec_info stmt_info; |

3733 | unsigned int group_size; |

3734 | tree vectype; |

3735 | int i, j; |

3736 | slp_tree child; |

3737 | |

3738 | if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |

3739 | return false; |

3740 | |

3741 | /* See if we have already vectorized the same set of stmts and reuse their |

3742 | vectorized stmts. */ |

3743 | slp_tree &leader |

3744 | = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ()); |

3745 | if (leader) |

3746 | { |

3747 | SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader)); |

3748 | return false; |

3749 | } |

3750 | |

3751 | leader = node; |

3752 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

3753 | vect_schedule_slp_instance (child, instance, bst_map); |

3754 | |

3755 | /* Push SLP node def-type to stmts. */ |

3756 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

3757 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

3758 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |

3759 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); |

3760 | |

3761 | stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |

3762 | stmt_info = vinfo_for_stmt (stmt); |

3763 | |

3764 | /* VECTYPE is the type of the destination. */ |

3765 | vectype = STMT_VINFO_VECTYPE (stmt_info); |

3766 | group_size = SLP_INSTANCE_GROUP_SIZE (instance); |

3767 | |

3768 | if (!SLP_TREE_VEC_STMTS (node).exists ()) |

3769 | SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); |

3770 | |

3771 | if (dump_enabled_p ()) |

3772 | { |

3773 | dump_printf_loc (MSG_NOTE,vect_location, |

3774 | "------>vectorizing SLP node starting from: "); |

3775 | dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |

3776 | } |

3777 | |

3778 | /* Vectorized stmts go before the last scalar stmt which is where |

3779 | all uses are ready. */ |

3780 | si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)); |

3781 | |

3782 | /* Mark the first element of the reduction chain as reduction to properly |

3783 | transform the node. In the analysis phase only the last element of the |

3784 | chain is marked as reduction. */ |

3785 | if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info) |

3786 | && GROUP_FIRST_ELEMENT (stmt_info) == stmt) |

3787 | { |

3788 | STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; |

3789 | STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; |

3790 | } |

3791 | |

3792 | /* Handle two-operation SLP nodes by vectorizing the group with |

3793 | both operations and then performing a merge. */ |

3794 | if (SLP_TREE_TWO_OPERATORS (node)) |

3795 | { |

3796 | enum tree_code code0 = gimple_assign_rhs_code (stmt); |

3797 | enum tree_code ocode = ERROR_MARK; |

3798 | gimple *ostmt; |

3799 | auto_vec_perm_indices mask (group_size); |

3800 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) |

3801 | if (gimple_assign_rhs_code (ostmt) != code0) |

3802 | { |

3803 | mask.quick_push (1); |

3804 | ocode = gimple_assign_rhs_code (ostmt); |

3805 | } |

3806 | else |

3807 | mask.quick_push (0); |

3808 | if (ocode != ERROR_MARK) |

3809 | { |

3810 | vec<gimple *> v0; |

3811 | vec<gimple *> v1; |

3812 | unsigned j; |

3813 | tree tmask = NULL_TREE; |

3814 | vect_transform_stmt (stmt, &si, &grouped_store, node, instance); |

3815 | v0 = SLP_TREE_VEC_STMTS (node).copy (); |

3816 | SLP_TREE_VEC_STMTS (node).truncate (0); |

3817 | gimple_assign_set_rhs_code (stmt, ocode); |

3818 | vect_transform_stmt (stmt, &si, &grouped_store, node, instance); |

3819 | gimple_assign_set_rhs_code (stmt, code0); |

3820 | v1 = SLP_TREE_VEC_STMTS (node).copy (); |

3821 | SLP_TREE_VEC_STMTS (node).truncate (0); |

3822 | tree meltype = build_nonstandard_integer_type |

3823 | (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1); |

3824 | tree mvectype = get_same_sized_vectype (meltype, vectype); |

3825 | unsigned k = 0, l; |

3826 | for (j = 0; j < v0.length (); ++j) |

3827 | { |

3828 | unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); |

3829 | tree_vector_builder melts (mvectype, nunits, 1); |

3830 | for (l = 0; l < nunits; ++l) |

3831 | { |

3832 | if (k >= group_size) |

3833 | k = 0; |

3834 | tree t = build_int_cst (meltype, mask[k++] * nunits + l); |

3835 | melts.quick_push (t); |

3836 | } |

3837 | tmask = melts.build (); |

3838 | |

3839 | /* ??? Not all targets support a VEC_PERM_EXPR with a |

3840 | constant mask that would translate to a vec_merge RTX |

3841 | (with their vec_perm_const_ok). We can either not |

3842 | vectorize in that case or let veclower do its job. |

3843 | Unfortunately that isn't too great and at least for |

3844 | plus/minus we'd eventually like to match targets |

3845 | vector addsub instructions. */ |

3846 | gimple *vstmt; |

3847 | vstmt = gimple_build_assign (make_ssa_name (vectype), |

3848 | VEC_PERM_EXPR, |

3849 | gimple_assign_lhs (v0[j]), |

3850 | gimple_assign_lhs (v1[j]), tmask); |

3851 | vect_finish_stmt_generation (stmt, vstmt, &si); |

3852 | SLP_TREE_VEC_STMTS (node).quick_push (vstmt); |

3853 | } |

3854 | v0.release (); |

3855 | v1.release (); |

3856 | return false; |

3857 | } |

3858 | } |

3859 | is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance); |

3860 | |

3861 | /* Restore stmt def-types. */ |

3862 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

3863 | if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |

3864 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |

3865 | STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; |

3866 | |

3867 | return is_store; |

3868 | } |

3869 | |

3870 | /* Replace scalar calls from SLP node NODE with setting of their lhs to zero. |

3871 | For loop vectorization this is done in vectorizable_call, but for SLP |

3872 | it needs to be deferred until end of vect_schedule_slp, because multiple |

3873 | SLP instances may refer to the same scalar stmt. */ |

3874 | |

3875 | static void |

3876 | vect_remove_slp_scalar_calls (slp_tree node) |

3877 | { |

3878 | gimple *stmt, *new_stmt; |

3879 | gimple_stmt_iterator gsi; |

3880 | int i; |

3881 | slp_tree child; |

3882 | tree lhs; |

3883 | stmt_vec_info stmt_info; |

3884 | |

3885 | if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |

3886 | return; |

3887 | |

3888 | FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |

3889 | vect_remove_slp_scalar_calls (child); |

3890 | |

3891 | FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |

3892 | { |

3893 | if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL) |

3894 | continue; |

3895 | stmt_info = vinfo_for_stmt (stmt); |

3896 | if (stmt_info == NULL |

3897 | || is_pattern_stmt_p (stmt_info) |

3898 | || !PURE_SLP_STMT (stmt_info)) |

3899 | continue; |

3900 | lhs = gimple_call_lhs (stmt); |

3901 | new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); |

3902 | set_vinfo_for_stmt (new_stmt, stmt_info); |

3903 | set_vinfo_for_stmt (stmt, NULL); |

3904 | STMT_VINFO_STMT (stmt_info) = new_stmt; |

3905 | gsi = gsi_for_stmt (stmt); |

3906 | gsi_replace (&gsi, new_stmt, false); |

3907 | SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; |

3908 | } |

3909 | } |

3910 | |

3911 | /* Generate vector code for all SLP instances in the loop/basic block. */ |

3912 | |

3913 | bool |

3914 | vect_schedule_slp (vec_info *vinfo) |

3915 | { |

3916 | vec<slp_instance> slp_instances; |

3917 | slp_instance instance; |

3918 | unsigned int i; |

3919 | bool is_store = false; |

3920 | |

3921 | slp_instances = vinfo->slp_instances; |

3922 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |

3923 | { |

3924 | /* Schedule the tree of INSTANCE. */ |

3925 | scalar_stmts_to_slp_tree_map_t *bst_map |

3926 | = new scalar_stmts_to_slp_tree_map_t (); |

3927 | is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), |

3928 | instance, bst_map); |

3929 | delete bst_map; |

3930 | if (dump_enabled_p ()) |

3931 | dump_printf_loc (MSG_NOTE, vect_location, |

3932 | "vectorizing stmts using SLP.\n"); |

3933 | } |

3934 | |

3935 | FOR_EACH_VEC_ELT (slp_instances, i, instance) |

3936 | { |

3937 | slp_tree root = SLP_INSTANCE_TREE (instance); |

3938 | gimple *store; |

3939 | unsigned int j; |

3940 | gimple_stmt_iterator gsi; |

3941 | |

3942 | /* Remove scalar call stmts. Do not do this for basic-block |

3943 | vectorization as not all uses may be vectorized. |

3944 | ??? Why should this be necessary? DCE should be able to |

3945 | remove the stmts itself. |

3946 | ??? For BB vectorization we can as well remove scalar |

3947 | stmts starting from the SLP tree root if they have no |

3948 | uses. */ |

3949 | if (is_a <loop_vec_info> (vinfo)) |

3950 | vect_remove_slp_scalar_calls (root); |

3951 | |

3952 | for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store) |

3953 | && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) |

3954 | { |

3955 | if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store))) |

3956 | break; |

3957 | |

3958 | if (is_pattern_stmt_p (vinfo_for_stmt (store))) |

3959 | store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store)); |

3960 | /* Free the attached stmt_vec_info and remove the stmt. */ |

3961 | gsi = gsi_for_stmt (store); |

3962 | unlink_stmt_vdef (store); |

3963 | gsi_remove (&gsi, true); |

3964 | release_defs (store); |

3965 | free_stmt_vec_info (store); |

3966 | } |

3967 | } |

3968 | |

3969 | return is_store; |

3970 | } |

3971 |