Warning: That file was not part of the compilation database. It may have many parsing errors.

1 | /**************************************************************************** |
---|---|

2 | ** |

3 | ** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com |

4 | ** Contact: https://www.qt.io/licensing/ |

5 | ** |

6 | ** This file is part of the QtGui module of the Qt Toolkit. |

7 | ** |

8 | ** $QT_BEGIN_LICENSE:LGPL$ |

9 | ** Commercial License Usage |

10 | ** Licensees holding valid commercial Qt licenses may use this file in |

11 | ** accordance with the commercial license agreement provided with the |

12 | ** Software or, alternatively, in accordance with the terms contained in |

13 | ** a written agreement between you and The Qt Company. For licensing terms |

14 | ** and conditions see https://www.qt.io/terms-conditions. For further |

15 | ** information use the contact form at https://www.qt.io/contact-us. |

16 | ** |

17 | ** GNU Lesser General Public License Usage |

18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |

19 | ** General Public License version 3 as published by the Free Software |

20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |

21 | ** packaging of this file. Please review the following information to |

22 | ** ensure the GNU Lesser General Public License version 3 requirements |

23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |

24 | ** |

25 | ** GNU General Public License Usage |

26 | ** Alternatively, this file may be used under the terms of the GNU |

27 | ** General Public License version 2.0 or (at your option) the GNU General |

28 | ** Public license version 3 or any later version approved by the KDE Free |

29 | ** Qt Foundation. The licenses are as published by the Free Software |

30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |

31 | ** included in the packaging of this file. Please review the following |

32 | ** information to ensure the GNU General Public License requirements will |

33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |

34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |

35 | ** |

36 | ** $QT_END_LICENSE$ |

37 | ** |

38 | ****************************************************************************/ |

39 | |

40 | #include "qt_mips_asm_dsp_p.h" |

41 | |

42 | LEAF_MIPS_DSP(destfetchARGB32_asm_mips_dsp) |

43 | /* |

44 | * a0 - buffer address (dst) |

45 | * a1 - data address (src) |

46 | * a2 - length |

47 | */ |

48 | |

49 | beqz a2, 2f |

50 | move v0, a0 /* just return the address of buffer |

51 | * for storing returning values */ |

52 | move v0, a0 |

53 | andi t1, a2, 0x1 |

54 | li t7, 8388736 /* t7 = 0x800080 */ |

55 | beqz t1, 1f |

56 | nop |

57 | lw t8, 0(a1) |

58 | addiu a2, a2, -1 |

59 | srl t6, t8, 24 /* t6 = alpha */ |

60 | |

61 | preceu.ph.qbra t0, t8 |

62 | mul t1, t0, t6 |

63 | preceu.ph.qbla t4, t8 |

64 | mul t5, t4, t6 |

65 | |

66 | preceu.ph.qbla t2, t1 |

67 | addq.ph t3, t1, t2 |

68 | addq.ph t3, t3, t7 |

69 | preceu.ph.qbla t1, t3 /* t1 holds R & B blended with alpha |

70 | * | 0 | dRab | 0 | dBab | */ |

71 | preceu.ph.qbla t2, t5 |

72 | addq.ph t3, t2, t5 |

73 | addq.ph t4, t3, t7 |

74 | preceu.ph.qbla t2, t4 /* t2 holds A & G blended with alpha |

75 | * | 0 | dAab | 0 | dGab | */ |

76 | andi t2, t2, 255 /* t2 = 0xff */ |

77 | |

78 | sll t0, t6, 24 |

79 | sll t3, t2, 8 |

80 | or t4, t0, t3 |

81 | or t0, t1, t4 |

82 | sw t0, 0(a0) |

83 | addiu a0, a0, 4 |

84 | addiu a1, a1, 4 |

85 | beqz a2, 2f /* there was only one member */ |

86 | nop |

87 | 1: |

88 | lw t0, 0(a1) /* t0 = src1 */ |

89 | lw t1, 4(a1) /* t1 = src2 */ |

90 | precrq.qb.ph t4, t0, t1 /* t4 = a1 G1 a2 G2 */ |

91 | preceu.ph.qbra t3, t4 /* t3 = 0 G1 0 G2 */ |

92 | preceu.ph.qbla t2, t4 /* t2 = | 0 | a1 | 0 | a2 | */ |

93 | srl t5, t2, 8 |

94 | or t8, t2, t5 /* t8 = 0 a1 a1 a2 */ |

95 | muleu_s.ph.qbr t5, t8, t3 |

96 | |

97 | addiu a2, a2, -2 |

98 | addiu a1, a1, 8 |

99 | precrq.ph.w t9, t0, t1 |

100 | preceu.ph.qbra t9, t9 |

101 | |

102 | preceu.ph.qbla t6, t5 |

103 | addq.ph t5, t5, t6 |

104 | addq.ph t2, t5, t7 |

105 | muleu_s.ph.qbr t6, t8, t9 |

106 | sll t3, t1, 16 |

107 | packrl.ph t3, t0, t3 |

108 | preceu.ph.qbra t3, t3 |

109 | muleu_s.ph.qbr t8, t8, t3 |

110 | preceu.ph.qbla t3, t6 |

111 | addq.ph t3, t6, t3 |

112 | addq.ph t3, t3, t7 |

113 | preceu.ph.qbla t5, t8 |

114 | addq.ph t5, t8, t5 |

115 | addq.ph t5, t5, t7 |

116 | |

117 | precrq.ph.w t0, t4, t3 /* t0 = | 0 | a1 | 0 | dR1 | */ |

118 | precrq.ph.w t1, t2, t5 /* t1 = | 0 | dG1 | 0 | dB1 | */ |

119 | precrq.qb.ph t6, t0, t1 /* t6 = | a1 | dR1 | dG1 | dB1 | */ |

120 | sll t3, t3, 16 |

121 | sll t5, t5, 16 |

122 | packrl.ph t0, t4, t3 |

123 | packrl.ph t1, t2, t5 |

124 | precrq.qb.ph t8, t0, t1 /* t8 = | a2 | dR2 | dG2 | dB2 | */ |

125 | sw t6, 0(a0) |

126 | sw t8, 4(a0) |

127 | bnez a2, 1b |

128 | addiu a0, a0, 8 |

129 | 2: |

130 | j ra |

131 | nop |

132 | |

133 | END(destfetchARGB32_asm_mips_dsp) |

134 | |

135 | LEAF_MIPS_DSP(qt_memfill32_asm_mips_dsp) |

136 | /* |

137 | * a0 - destination address (dst) |

138 | * a1 - value |

139 | * a2 - count |

140 | */ |

141 | |

142 | beqz a2, 5f |

143 | nop |

144 | li t8, 8 |

145 | andi t0, a2, 0x7 /* t0 holds how many counts exceeds 8 */ |

146 | beqzl t0, 2f /* count is multiple of 8 (8, 16, 24, ....) */ |

147 | addiu a2, a2, -8 |

148 | subu a2, a2, t0 |

149 | 1: |

150 | sw a1, 0(a0) |

151 | addiu t0, t0, -1 |

152 | bnez t0, 1b |

153 | addiu a0, a0, 4 |

154 | bgeu a2, t8, 2f |

155 | addiu a2, a2, -8 |

156 | b 5f |

157 | nop |

158 | 2: |

159 | beqz a2, 4f |

160 | nop |

161 | 3: |

162 | pref 30, 32(a0) |

163 | addiu a2, a2, -8 |

164 | sw a1, 0( a0) |

165 | sw a1, 4(a0) |

166 | sw a1, 8(a0) |

167 | sw a1, 12(a0) |

168 | addiu a0, a0, 32 |

169 | sw a1, -16(a0) |

170 | sw a1, -12(a0) |

171 | sw a1, -8(a0) |

172 | bnez a2, 3b |

173 | sw a1, -4(a0) |

174 | 4: |

175 | sw a1, 0(a0) |

176 | sw a1, 4(a0) |

177 | sw a1, 8(a0) |

178 | sw a1, 12(a0) |

179 | addiu a0, a0, 32 |

180 | sw a1, -16(a0) |

181 | sw a1, -12(a0) |

182 | sw a1, -8(a0) |

183 | sw a1, -4(a0) |

184 | 5: |

185 | jr ra |

186 | nop |

187 | |

188 | END(qt_memfill32_asm_mips_dsp) |

189 | |

190 | LEAF_MIPS_DSP(comp_func_SourceOver_asm_mips_dsp) |

191 | /* |

192 | * a0 - uint *dest |

193 | * a1 - const uint *src |

194 | * a2 - int length |

195 | * a3 - uint const_alpha |

196 | */ |

197 | |

198 | beqz a2, 5f |

199 | nop |

200 | li t8, 0xff |

201 | li t7, 8388736 /* t7 = 0x800080 */ |

202 | bne a3, t8, 4f |

203 | nop |

204 | |

205 | /* part where const_alpha = 255 */ |

206 | b 2f |

207 | nop |

208 | 1: |

209 | addiu a0, a0, 4 |

210 | addiu a2, a2, -1 |

211 | beqz a2, 5f |

212 | nop |

213 | 2: |

214 | lw t0, 0(a1) /* t0 = s = src[i] */ |

215 | addiu a1, a1, 4 |

216 | nor t1, t0, zero |

217 | srl t1, t1, 24 /* t1 = ~qAlpha(s) */ |

218 | bnez t1, 3f |

219 | nop |

220 | sw t0, 0(a0) /* dst[i] = src[i] */ |

221 | addiu a2, a2, -1 |

222 | bnez a2, 2b |

223 | addiu a0, a0, 4 |

224 | b 5f |

225 | nop |

226 | 3: |

227 | beqz t0, 1b |

228 | nop |

229 | |

230 | lw t4, 0(a0) |

231 | replv.ph t6, t1 |

232 | muleu_s.ph.qbl t2, t4, t6 |

233 | muleu_s.ph.qbr t3, t4, t6 |

234 | addiu a2, a2, -1 |

235 | preceu.ph.qbla t4, t2 |

236 | addq.ph t4, t2, t4 |

237 | addq.ph t4, t4, t7 |

238 | preceu.ph.qbla t5, t3 |

239 | addq.ph t5, t5, t3 |

240 | addq.ph t5, t5, t7 |

241 | precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ |

242 | addu t8, t0, t8 /* dst[i] = |

243 | * s + BYTE_MUL(dst[i],~qAlpha(s)) */ |

244 | sw t8, 0(a0) |

245 | bnez a2, 2b |

246 | addiu a0, a0, 4 |

247 | b 5f |

248 | nop |

249 | 4: |

250 | lw t0, 0(a0) /* t0 - dst[i] "1" */ |

251 | lw t1, 0(a1) /* t1 - src[i] "2" */ |

252 | addiu a1, a1, 4 |

253 | addiu a2, a2, -1 |

254 | replv.ph t6, a3 /* a1 = 0x00a00a */ |

255 | muleu_s.ph.qbl t2, t1, t6 |

256 | muleu_s.ph.qbr t3, t1, t6 |

257 | preceu.ph.qbla t4, t2 |

258 | addq.ph t4, t2, t4 |

259 | addq.ph t4, t4, t7 |

260 | preceu.ph.qbla t5, t3 |

261 | addq.ph t5, t5, t3 |

262 | addq.ph t5, t5, t7 |

263 | precrq.qb.ph t8, t4, t5 /* t8 = | dsA | dsR | dsG | dsB | */ |

264 | |

265 | nor t6, t8, zero |

266 | srl t6, t6, 24 |

267 | replv.ph t6, t6 |

268 | |

269 | muleu_s.ph.qbl t2, t0, t6 |

270 | muleu_s.ph.qbr t3, t0, t6 |

271 | preceu.ph.qbla t4, t2 |

272 | addq.ph t4, t2, t4 |

273 | addq.ph t4, t4, t7 |

274 | preceu.ph.qbla t5, t3 |

275 | addq.ph t5, t5, t3 |

276 | addq.ph t5, t5, t7 |

277 | precrq.qb.ph t6, t4, t5 /* t6 = | ddA | ddR | ddG | ddB | */ |

278 | |

279 | addu t0, t8, t6 |

280 | sw t0, 0(a0) |

281 | bnez a2, 4b |

282 | addiu a0, a0, 4 |

283 | 5: |

284 | jr ra |

285 | nop |

286 | |

287 | END(comp_func_SourceOver_asm_mips_dsp) |

288 | |

289 | LEAF_MIPS_DSPR2(qt_destStoreARGB32_asm_mips_dsp) |

290 | /* |

291 | * a0 - uint * data |

292 | * a1 - const uint *buffer |

293 | * a2 - int length |

294 | */ |

295 | |

296 | blez a2, 6f |

297 | move v1, zero |

298 | li t0, 255 |

299 | lui a3, 0xff |

300 | j 2f |

301 | lui t2, 0xff00 |

302 | 1: |

303 | addiu v1, v1, 1 |

304 | sw zero, 0(a0) |

305 | addiu a1, a1, 4 |

306 | beq v1, a2, 6f |

307 | addiu a0, a0, 4 |

308 | 2: |

309 | lw v0, 0(a1) |

310 | srl t3, v0, 0x18 |

311 | beql t3, t0, 5f |

312 | addiu v1, v1, 1 |

313 | beqz t3, 1b |

314 | |

315 | srl t1, v0, 0x8 |

316 | andi t1, t1, 0xff |

317 | |

318 | teq t3, zero, 0x7 |

319 | div zero, a3, t3 |

320 | move t8, t3 |

321 | andi t6, v0, 0xff |

322 | |

323 | srl t3,v0,0x10 |

324 | andi t3,t3,0xff |

325 | |

326 | and t5, v0, t2 |

327 | mflo t4 |

328 | |

329 | mult $ac0, t4, t6 |

330 | mult $ac1, t1, t4 |

331 | mul t4, t3, t4 |

332 | |

333 | sltiu t8, t8, 2 |

334 | beqz t8, 3f |

335 | nop |

336 | mflo t6, $ac0 |

337 | mflo t1, $ac1 |

338 | sra t6, t6, 0x10 |

339 | sra t1, t1, 0x8 |

340 | b 4f |

341 | nop |

342 | 3: |

343 | extr.w t6, $ac0, 0x10 |

344 | extr.w t1, $ac1, 0x8 |

345 | 4: |

346 | and v0, t4, a3 |

347 | or v0, v0, t6 |

348 | or v0, v0, t5 |

349 | andi t1, t1, 0xff00 |

350 | or v0, v0, t1 |

351 | addiu v1, v1, 1 |

352 | 5: |

353 | sw v0, 0(a0) |

354 | addiu a1, a1, 4 |

355 | bne v1, a2, 2b |

356 | addiu a0, a0, 4 |

357 | 6: |

358 | jr ra |

359 | nop |

360 | |

361 | END(qt_destStoreARGB32_asm_mips_dsp) |

362 | |

363 | LEAF_MIPS_DSP(comp_func_solid_Source_dsp_asm_x2) |

364 | /* |

365 | * a0 - const uint *dest |

366 | * a1 - int length |

367 | * a2 - uint color |

368 | * a3 - uint ialpha |

369 | */ |

370 | |

371 | beqz a1, 2f |

372 | nop |

373 | replv.ph a3, a3 |

374 | li t9, 8388736 /* t9 = 0x800080 */ |

375 | 1: |

376 | lw t0, 0(a0) |

377 | lw t1, 4(a0) |

378 | or t2, t0, t1 /* if both dest are zero, no computation needed */ |

379 | beqz t2, 12f |

380 | addiu a1, -2 |

381 | |

382 | BYTE_MUL_x2 t0, t1, t6, t7, a3, a3, t9, t2, t3, t4, t5, 0 |

383 | 11: |

384 | addu t2, a2, t6 |

385 | addu t3, a2, t7 |

386 | sw t2, 0(a0) |

387 | sw t3, 4(a0) |

388 | bnez a1, 1b |

389 | addiu a0, 8 |

390 | b 2f |

391 | 12: |

392 | addu t2, a2, t0 |

393 | addu t3, a2, t1 |

394 | sw t2, 0(a0) |

395 | sw t3, 4(a0) |

396 | bnez a1, 1b |

397 | addiu a0, 8 |

398 | 2: |

399 | jr ra |

400 | nop |

401 | |

402 | END(comp_func_solid_Source_dsp_asm_x2) |

403 | |

404 | LEAF_MIPS_DSP(comp_func_solid_DestinationOver_dsp_asm_x2) |

405 | /* |

406 | * a0 - uint *dest |

407 | * a1 - int length |

408 | * a2 - uint color |

409 | */ |

410 | |

411 | addiu sp, sp, -8 |

412 | sw s0, 0(sp) |

413 | sw s1, 4(sp) |

414 | beqz a1, 2f |

415 | nop |

416 | beqz a2, 2f |

417 | nop |

418 | li t9, 8388736 /* t4 = 0x800080 */ |

419 | |

420 | 1: |

421 | lw t0, 0(a0) |

422 | lw t1, 4(a0) |

423 | not t2, t0 |

424 | not t3, t1 |

425 | srl t4, t2, 24 |

426 | srl t5, t3, 24 |

427 | or t2, t4, t5 /* if both dest are zero, no computation needed */ |

428 | beqz t2, 11f |

429 | addiu a1, -2 |

430 | replv.ph t2, t4 |

431 | replv.ph t3, t5 |

432 | |

433 | BYTE_MUL_x2 a2, a2, t8, a3, t2, t3, t9, t4, t5, t6, t7 |

434 | |

435 | addu t0, t0, t8 |

436 | addu t1, t1, a3 |

437 | 11: |

438 | sw t0, 0(a0) |

439 | sw t1, 4(a0) |

440 | bnez a1, 1b |

441 | addiu a0, 8 |

442 | |

443 | 2: |

444 | lw s0, 0(sp) |

445 | lw s1, 4(sp) |

446 | addiu sp, sp, 8 |

447 | jr ra |

448 | nop |

449 | |

450 | END(comp_func_solid_DestinationOver_dsp_asm_x2) |

451 | |

452 | LEAF_MIPS_DSP(comp_func_DestinationOver_dsp_asm_x2) |

453 | /* |

454 | * a0 - uint *dest |

455 | * a1 - uint *src |

456 | * a2 - int length |

457 | * a3 - uint const_alpha |

458 | */ |

459 | |

460 | .set noat |

461 | addiu sp, sp, -8 |

462 | sw s0, 0(sp) |

463 | sw s1, 4(sp) |

464 | beqz a2, 3f |

465 | nop |

466 | li t9, 8388736 /* t4 = 0x800080 */ |

467 | li t0, 0xff |

468 | beq a3, t0, 2f |

469 | nop |

470 | |

471 | /* part where const_alpha != 255 */ |

472 | 1: |

473 | replv.ph a3, a3 |

474 | 11: |

475 | lw t0, 0(a1) # src_1 |

476 | lw t1, 4(a1) # src_2 |

477 | addiu a2, -2 |

478 | |

479 | BYTE_MUL_x2 t0, t1, t8, AT, a3, a3, t9, t4, t5, t6, t7, 0 |

480 | # t8 = s1 |

481 | # AT = s2 |

482 | lw t0, 0(a0) # dest_1 |

483 | lw t1, 4(a0) # dest_2 |

484 | addiu a1, 8 |

485 | not t2, t0 |

486 | not t3, t1 |

487 | srl t4, t2, 24 |

488 | srl t5, t3, 24 |

489 | replv.ph t2, t4 # qAlpha(~d) 1 |

490 | replv.ph t3, t5 # qAlpha(~d) 2 |

491 | |

492 | BYTE_MUL_x2 t8, AT, s0, s1, t2, t3, t9, t4, t5, t6, t7 |

493 | |

494 | addu t0, t0, s0 |

495 | addu t1, t1, s1 |

496 | sw t0, 0(a0) |

497 | sw t1, 4(a0) |

498 | bnez a2, 11b |

499 | addiu a0, 8 |

500 | b 3f |

501 | nop |

502 | |

503 | /* part where const_alpha = 255 */ |

504 | 2: |

505 | lw t0, 0(a0) # dest 1 |

506 | lw t1, 4(a0) # dest 2 |

507 | lw s0, 0(a1) # src 1 |

508 | lw s1, 4(a1) # src 2 |

509 | not t2, t0 |

510 | not t3, t1 |

511 | srl t4, t2, 24 |

512 | srl t5, t3, 24 |

513 | replv.ph t2, t4 |

514 | replv.ph t3, t5 |

515 | addiu a1, 8 |

516 | addiu a2, -2 |

517 | |

518 | BYTE_MUL_x2 s0, s1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

519 | |

520 | addu t0, t0, t8 |

521 | addu t1, t1, AT |

522 | sw t0, 0(a0) |

523 | sw t1, 4(a0) |

524 | bnez a2, 2b |

525 | addiu a0, 8 |

526 | |

527 | 3: |

528 | lw s0, 0(sp) |

529 | lw s1, 4(sp) |

530 | addiu sp, sp, 8 |

531 | jr ra |

532 | nop |

533 | .set at |

534 | |

535 | END(comp_func_DestinationOver_dsp_asm_x2) |

536 | |

537 | LEAF_MIPS_DSP(comp_func_solid_SourceIn_dsp_asm_x2) |

538 | /* |

539 | * a0 - uint *dest |

540 | * a1 - int length |

541 | * a2 - uint color |

542 | * a3 - uint const_alpha |

543 | */ |

544 | |

545 | .set noat |

546 | addiu sp, -12 |

547 | sw s0, 0(sp) |

548 | sw s1, 4(sp) |

549 | sw s2, 8(sp) |

550 | beqz a1, 3f |

551 | nop |

552 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

553 | lui t8, 0xff00 |

554 | li t0, 0xff |

555 | beq a3, t0, 2f |

556 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

557 | |

558 | /* part where const_alpha != 255 */ |

559 | 1: |

560 | replv.ph t0, a3 |

561 | li t5, 0xff |

562 | BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ |

563 | subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ |

564 | 11: |

565 | lw t2, 0(a0) /* t2 = d */ |

566 | lw s0, 4(a0) |

567 | addiu a1, -2 |

568 | srl t3, t2, 24 /* t3 = qAlpha(d) */ |

569 | srl s2, s0, 24 |

570 | |

571 | INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 |

572 | INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 |

573 | |

574 | sw AT, 0(a0) |

575 | sw s1, 4(a0) |

576 | bnez a1, 11b |

577 | addiu a0, 8 |

578 | b 3f |

579 | nop |

580 | |

581 | /* part where const_alpha = 255 */ |

582 | 2: |

583 | lw t0, 0(a0) /* dest 1 */ |

584 | lw t1, 4(a0) /* dest 2 */ |

585 | srl t4, t0, 24 |

586 | srl t5, t1, 24 |

587 | replv.ph t2, t4 |

588 | replv.ph t3, t5 |

589 | addiu a1, -2 |

590 | |

591 | BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

592 | |

593 | sw t8, 0(a0) |

594 | sw AT, 4(a0) |

595 | bnez a1, 2b |

596 | addiu a0, 8 |

597 | |

598 | 3: |

599 | lw s0, 0(sp) |

600 | lw s1, 4(sp) |

601 | lw s2, 8(sp) |

602 | addiu sp, 12 |

603 | jr ra |

604 | nop |

605 | .set at |

606 | |

607 | END(comp_func_solid_SourceIn_dsp_asm_x2) |

608 | |

609 | LEAF_MIPS_DSP(comp_func_SourceIn_dsp_asm_x2) |

610 | /* |

611 | * a0 - uint *dest |

612 | * a1 - const uint *src |

613 | * a2 - int length |

614 | * a3 - uint const_alpha |

615 | */ |

616 | |

617 | .set noat |

618 | addiu sp, -16 |

619 | sw s0, 0(sp) |

620 | sw s1, 4(sp) |

621 | sw s2, 8(sp) |

622 | sw s3, 12(sp) |

623 | beqz a2, 3f |

624 | nop |

625 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

626 | lui t8, 0xff00 |

627 | li t0, 0xff |

628 | beq a3, t0, 2f |

629 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

630 | |

631 | /* part where const_alpha != 255 */ |

632 | 1: |

633 | li t5, 0xff |

634 | subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ |

635 | replv.ph a3, a3 |

636 | 11: |

637 | lw t0, 0(a1) /* t0 = src 1 */ |

638 | lw t1, 4(a1) /* t1 = src 2 */ |

639 | addiu a2, -2 |

640 | |

641 | BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 |

642 | |

643 | lw t0, 0(a0) /* t0 = dest 1 */ |

644 | lw t1, 4(a0) /* t1 = dest 2 */ |

645 | addiu a1, 8 |

646 | |

647 | srl t2, t0, 24 /* t2 = qAlpha(d) 1 */ |

648 | srl t3, t1, 24 /* t3 = qAlpha(d) 2 */ |

649 | |

650 | INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 |

651 | INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 |

652 | |

653 | sw s1, 0(a0) |

654 | sw s2, 4(a0) |

655 | bnez a2, 11b |

656 | addiu a0, 8 |

657 | b 3f |

658 | nop |

659 | |

660 | /* part where const_alpha = 255 */ |

661 | 2: |

662 | lw t2, 0(a0) /* dest 1 */ |

663 | lw t3, 4(a0) /* dest 2 */ |

664 | lw t0, 0(a1) /* src 1 */ |

665 | lw t1, 4(a1) /* src 2 */ |

666 | srl t4, t2, 24 |

667 | srl t5, t3, 24 |

668 | replv.ph t2, t4 |

669 | replv.ph t3, t5 |

670 | addiu a2, -2 |

671 | |

672 | BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

673 | |

674 | addiu a1, 8 |

675 | sw t8, 0(a0) |

676 | sw AT, 4(a0) |

677 | bnez a2, 2b |

678 | addiu a0, 8 |

679 | |

680 | 3: |

681 | lw s0, 0(sp) |

682 | lw s1, 4(sp) |

683 | lw s2, 8(sp) |

684 | lw s3, 12(sp) |

685 | addiu sp, 16 |

686 | jr ra |

687 | nop |

688 | .set at |

689 | |

690 | END(comp_func_SourceIn_dsp_asm_x2) |

691 | |

692 | LEAF_MIPS_DSP(comp_func_solid_DestinationIn_dsp_asm_x2) |

693 | /* |

694 | * a0 - uint *dest |

695 | * a1 - int length |

696 | * a2 - uint a |

697 | */ |

698 | |

699 | .set noat |

700 | beqz a1, 2f |

701 | nop |

702 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

703 | replv.ph a2, a2 |

704 | 1: |

705 | lw t0, 0(a0) |

706 | lw t1, 4(a0) |

707 | addiu a1, -2 |

708 | |

709 | BYTE_MUL_x2 t0, t1, t8, AT, a2, a2, t9, t4, t5, t6, t7, 0 |

710 | |

711 | sw t8, 0(a0) |

712 | sw AT, 4(a0) |

713 | bnez a1, 1b |

714 | addiu a0, 8 |

715 | 2: |

716 | jr ra |

717 | nop |

718 | .set at |

719 | |

720 | END(comp_func_solid_DestinationIn_dsp_asm_x2) |

721 | |

722 | LEAF_MIPS_DSP(comp_func_DestinationIn_dsp_asm_x2) |

723 | /* |

724 | * a0 - uint *dest |

725 | * a1 - const uint *src |

726 | * a2 - int length |

727 | * a3 - uint const_alpha |

728 | */ |

729 | |

730 | addiu sp, -8 |

731 | sw s0, 0(sp) |

732 | sw s1, 4(sp) |

733 | beqz a2, 3f |

734 | nop |

735 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

736 | li t0, 0xff |

737 | beq a3, t0, 2f |

738 | nop |

739 | |

740 | /* part where const_alpha != 255 */ |

741 | 1: |

742 | li t5, 0xff |

743 | subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ |

744 | replv.ph a3, a3 |

745 | 11: |

746 | lw t0, 0(a1) /* t0 = src 1 */ |

747 | lw t1, 4(a1) /* t1 = src 2 */ |

748 | addiu a2, -2 |

749 | srl t0, t0, 24 |

750 | srl t1, t1, 24 |

751 | |

752 | BYTE_MUL_x2 t0, t1, s1, t7, a3, a3, t9, t3, t4, t5, t6, 0 |

753 | |

754 | lw t0, 0(a0) /* t0 = dest 1 */ |

755 | lw t1, 4(a0) /* t1 = dest 2 */ |

756 | addu s1, s1, t8 /* a 1 */ |

757 | addu t7, t7, t8 /* a 2 */ |

758 | replv.ph t2, s1 |

759 | replv.ph t3, t7 |

760 | |

761 | BYTE_MUL_x2 t0, t1, s1, t7, t2, t3, t9, t4, t5, t6, s0 |

762 | |

763 | addiu a1, 8 |

764 | sw s1, 0(a0) |

765 | sw t7, 4(a0) |

766 | bnez a2, 11b |

767 | addiu a0, 8 |

768 | b 3f |

769 | nop |

770 | |

771 | /* part where const_alpha = 255 */ |

772 | 2: |

773 | lw t2, 0(a1) /* src 1 */ |

774 | lw t3, 4(a1) /* src 2 */ |

775 | lw t0, 0(a0) /* dest 1 */ |

776 | lw t1, 4(a0) /* dest 2 */ |

777 | srl t4, t2, 24 |

778 | srl t5, t3, 24 |

779 | replv.ph t2, t4 /* t2 = qAlpha(src 1) */ |

780 | replv.ph t3, t5 /* t3 = qAlpha(src 2) */ |

781 | addiu a2, -2 |

782 | |

783 | BYTE_MUL_x2 t0, t1, t8, s1, t2, t3, t9, t4, t5, t6, t7 |

784 | |

785 | addiu a1, 8 |

786 | sw t8, 0(a0) |

787 | sw s1, 4(a0) |

788 | bnez a2, 2b |

789 | addiu a0, 8 |

790 | |

791 | 3: |

792 | lw s0, 0(sp) |

793 | lw s1, 4(sp) |

794 | addiu sp, 8 |

795 | jr ra |

796 | nop |

797 | |

798 | END(comp_func_DestinationIn_dsp_asm_x2) |

799 | |

800 | LEAF_MIPS_DSP(comp_func_DestinationOut_dsp_asm_x2) |

801 | /* |

802 | * a0 - uint *dest |

803 | * a1 - const uint *src |

804 | * a2 - int length |

805 | * a3 - uint const_alpha |

806 | */ |

807 | |

808 | .set noat |

809 | addiu sp, -4 |

810 | sw s0, 0(sp) |

811 | beqz a2, 3f |

812 | nop |

813 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

814 | li t0, 0xff |

815 | beq a3, t0, 2f |

816 | nop |

817 | |

818 | /* part where const_alpha != 255 */ |

819 | 1: |

820 | li t5, 0xff |

821 | subu t8, t5, a3 /* t8 = cia = 255 - const_alpha */ |

822 | replv.ph a3, a3 |

823 | 11: |

824 | lw t0, 0(a1) /* t0 = src 1 */ |

825 | lw t1, 4(a1) /* t1 = src 2 */ |

826 | not t0, t0 |

827 | not t1, t1 |

828 | addiu a2, -2 |

829 | srl t0, t0, 24 |

830 | srl t1, t1, 24 |

831 | |

832 | BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 |

833 | |

834 | lw t0, 0(a0) /* t0 = dest 1 */ |

835 | lw t1, 4(a0) /* t1 = dest 2 */ |

836 | addu AT, AT, t8 /* a 1 */ |

837 | addu t7, t7, t8 /* a 2 */ |

838 | replv.ph t2, AT |

839 | replv.ph t3, t7 |

840 | |

841 | BYTE_MUL_x2 t0, t1, AT, t7, t2, t3, t9, t4, t5, t6, s0 |

842 | |

843 | addiu a1, 8 |

844 | sw AT, 0(a0) |

845 | sw t7, 4(a0) |

846 | bnez a2, 11b |

847 | addiu a0, 8 |

848 | b 3f |

849 | nop |

850 | |

851 | /* part where const_alpha = 255 */ |

852 | 2: |

853 | lw t2, 0(a1) /* src 1 */ |

854 | lw t3, 4(a1) /* src 2 */ |

855 | not t2, t2 |

856 | not t3, t3 |

857 | lw t0, 0(a0) /* dest 1 */ |

858 | lw t1, 4(a0) /* dest 2 */ |

859 | srl t4, t2, 24 |

860 | srl t5, t3, 24 |

861 | replv.ph t2, t4 /* t2 = qAlpha(src 1) */ |

862 | replv.ph t3, t5 /* t3 = qAlpha(src 2) */ |

863 | addiu a2, -2 |

864 | |

865 | BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

866 | |

867 | addiu a1, 8 |

868 | sw t8, 0(a0) |

869 | sw AT, 4(a0) |

870 | bnez a2, 2b |

871 | addiu a0, 8 |

872 | |

873 | 3: |

874 | lw s0, 0(sp) |

875 | addiu sp, 4 |

876 | jr ra |

877 | nop |

878 | .set at |

879 | |

880 | END(comp_func_DestinationOut_dsp_asm_x2) |

881 | |

882 | LEAF_MIPS_DSP(comp_func_solid_SourceAtop_dsp_asm_x2) |

883 | /* |

884 | * a0 - uint *dest |

885 | * a1 - int length |

886 | * a2 - uint color |

887 | * a3 - uint sia |

888 | */ |

889 | |

890 | .set noat |

891 | addu sp, -4 |

892 | sw s0, 0(sp) |

893 | beqz a1, 2f |

894 | nop |

895 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

896 | lui t8, 0xff00 |

897 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

898 | 1: |

899 | lw t0, 0(a0) /* t0 = dest 1 */ |

900 | lw t1, 4(a0) /* t1 = dest 2 */ |

901 | addiu a1, -2 |

902 | srl t2, t0, 24 /* t2 = qAlpha(dest 1) */ |

903 | srl t3, t1, 24 /* t3 = qAlpha(dest 2) */ |

904 | |

905 | INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 |

906 | INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 |

907 | |

908 | sw AT, 0(a0) |

909 | sw s0, 4(a0) |

910 | bnez a1, 1b |

911 | addiu a0, 8 |

912 | 2: |

913 | lw s0, 0(sp) |

914 | addiu sp, 4 |

915 | jr ra |

916 | nop |

917 | .set at |

918 | |

919 | END(comp_func_solid_SourceAtop_dsp_asm_x2) |

920 | |

921 | LEAF_MIPS_DSP(comp_func_SourceAtop_dsp_asm_x2) |

922 | /* |

923 | * a0 - uint *dest |

924 | * a1 - const uint *src |

925 | * a2 - int length |

926 | * a3 - uint const_alpha |

927 | */ |

928 | |

929 | .set noat |

930 | addiu sp, -20 |

931 | sw s0, 0(sp) |

932 | sw s1, 4(sp) |

933 | sw s2, 8(sp) |

934 | sw s3, 12(sp) |

935 | sw s4, 16(sp) |

936 | beqz a2, 3f |

937 | nop |

938 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

939 | lui t8, 0xff00 |

940 | li t0, 0xff |

941 | beq a3, t0, 2f |

942 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

943 | |

944 | /* part where const_alpha != 255 */ |

945 | 1: |

946 | replv.ph a3, a3 |

947 | 11: |

948 | lw AT, 0(a1) /* src 1 */ |

949 | lw s0, 4(a1) /* src 2 */ |

950 | |

951 | BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |

952 | /* t0 = s */ |

953 | |

954 | lw t2, 0(a0) /* t2 = dest 1 */ |

955 | lw t3, 4(a0) /* t3 = dest 2 */ |

956 | |

957 | srl t4, t2, 24 /* t4 = qAplpha(dest 1) */ |

958 | srl t5, t3, 24 |

959 | not t6, t0 |

960 | not t7, t1 |

961 | srl t6, t6, 24 /* t6 = qAlpha(~s) */ |

962 | srl t7, t7, 24 |

963 | addiu a2, -2 |

964 | |

965 | INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |

966 | INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |

967 | |

968 | addiu a1, 8 |

969 | sw AT, 0(a0) |

970 | sw s0, 4(a0) |

971 | bnez a2, 11b |

972 | addiu a0, 8 |

973 | b 3f |

974 | nop |

975 | |

976 | /* part where const_alpha = 255 */ |

977 | 2: |

978 | lw t2, 0(a0) /* dest 1 */ |

979 | lw t3, 4(a0) /* dest 2 */ |

980 | lw t0, 0(a1) /* src 1 */ |

981 | lw t1, 4(a1) /* src 2 */ |

982 | srl t4, t2, 24 |

983 | srl t5, t3, 24 |

984 | not t6, t0 |

985 | not t7, t1 |

986 | srl t6, t6, 24 |

987 | srl t7, t7, 24 |

988 | addiu a2, -2 |

989 | |

990 | INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |

991 | INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |

992 | |

993 | addiu a1, 8 |

994 | sw AT, 0(a0) |

995 | sw s0, 4(a0) |

996 | bnez a2, 2b |

997 | addiu a0, 8 |

998 | |

999 | 3: |

1000 | lw s0, 0(sp) |

1001 | lw s1, 4(sp) |

1002 | lw s2, 8(sp) |

1003 | lw s3, 12(sp) |

1004 | lw s4, 16(sp) |

1005 | addiu sp, 20 |

1006 | jr ra |

1007 | nop |

1008 | .set at |

1009 | |

1010 | END(comp_func_SourceAtop_dsp_asm_x2) |

1011 | |

1012 | LEAF_MIPS_DSP(comp_func_solid_DestinationAtop_dsp_asm_x2) |

1013 | /* |

1014 | * a0 - uint *dest |

1015 | * a1 - int length |

1016 | * a2 - uint color |

1017 | * a3 - uint a |

1018 | */ |

1019 | |

1020 | .set noat |

1021 | addiu sp, -4 |

1022 | sw s0, 0(sp) |

1023 | beqz a1, 2f |

1024 | nop |

1025 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1026 | lui t8, 0xff00 |

1027 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1028 | 1: |

1029 | lw t0, 0(a0) /* t0 = dest 1 */ |

1030 | lw t1, 4(a0) /* t1 = dest 2 */ |

1031 | addiu a1, -2 |

1032 | not t2, t0 |

1033 | not t3, t1 |

1034 | srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ |

1035 | srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ |

1036 | |

1037 | INTERPOLATE_PIXEL_255 t0, a3, a2, t2, AT, t9, t8, t4, t5, t6, t7 |

1038 | INTERPOLATE_PIXEL_255 t1, a3, a2, t3, s0, t9, t8, t4, t5, t6, t7 |

1039 | |

1040 | sw AT, 0(a0) |

1041 | sw s0, 4(a0) |

1042 | bnez a1, 1b |

1043 | addiu a0, 8 |

1044 | 2: |

1045 | lw s0, 0(sp) |

1046 | addiu sp, 4 |

1047 | jr ra |

1048 | nop |

1049 | .set at |

1050 | |

1051 | END(comp_func_solid_DestinationAtop_dsp_asm_x2) |

1052 | |

1053 | LEAF_MIPS_DSP(comp_func_DestinationAtop_dsp_asm_x2) |

1054 | /* |

1055 | * a0 - uint *dest |

1056 | * a1 - const uint *src |

1057 | * a2 - int length |

1058 | * a3 - uint const_alpha |

1059 | */ |

1060 | |

1061 | .set noat |

1062 | addiu sp, -24 |

1063 | sw s0, 0(sp) |

1064 | sw s1, 4(sp) |

1065 | sw s2, 8(sp) |

1066 | sw s3, 12(sp) |

1067 | sw s4, 16(sp) |

1068 | sw s5, 20(sp) |

1069 | beqz a2, 3f |

1070 | nop |

1071 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1072 | lui t8, 0xff00 |

1073 | li t0, 0xff |

1074 | beq a3, t0, 2f |

1075 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1076 | |

1077 | /* part where const_alpha != 255 */ |

1078 | 1: |

1079 | li s5, 0xff |

1080 | subu s5, s5, a3 /* s5 = cia = 255 - const_alpha */ |

1081 | replv.ph a3, a3 |

1082 | 11: |

1083 | lw AT, 0(a1) /* src 1 */ |

1084 | lw s0, 4(a1) /* src 2 */ |

1085 | |

1086 | BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |

1087 | /* t0 = s */ |

1088 | |

1089 | lw t2, 0(a0) /* t2 = dest 1 */ |

1090 | lw t3, 4(a0) /* t3 = dest 2 */ |

1091 | |

1092 | not t4, t2 |

1093 | not t5, t3 |

1094 | srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ |

1095 | srl t5, t5, 24 |

1096 | srl t6, t0, 24 |

1097 | srl t7, t1, 24 |

1098 | addu t6, t6, s5 /* t6 = a = qAlpha(s1) + cia */ |

1099 | addu t7, t7, s5 |

1100 | addiu a2, -2 |

1101 | |

1102 | INTERPOLATE_PIXEL_255 t2, t6, t0, t4, AT, t9, t8, s1, s2, s3, s4 |

1103 | INTERPOLATE_PIXEL_255 t3, t7, t1, t5, s0, t9, t8, s1, s2, s3, s4 |

1104 | |

1105 | addiu a1, 8 |

1106 | sw AT, 0(a0) |

1107 | sw s0, 4(a0) |

1108 | bnez a2, 11b |

1109 | addiu a0, 8 |

1110 | b 3f |

1111 | nop |

1112 | |

1113 | /* part where const_alpha = 255 */ |

1114 | 2: |

1115 | lw t2, 0(a0) /* d1 */ |

1116 | lw t3, 4(a0) /* d2 */ |

1117 | lw t0, 0(a1) /* s1 */ |

1118 | lw t1, 4(a1) /* s2 */ |

1119 | srl t4, t0, 24 /* t4 = qAlpha(s1) */ |

1120 | srl t5, t1, 24 |

1121 | not t6, t2 |

1122 | not t7, t3 |

1123 | srl t6, t6, 24 /* qAlpha(~d1) */ |

1124 | srl t7, t7, 24 |

1125 | addiu a2, -2 |

1126 | |

1127 | INTERPOLATE_PIXEL_255 t2, t4, t0, t6, AT, t9, t8, s1, s2, s3, s4 |

1128 | INTERPOLATE_PIXEL_255 t3, t5, t1, t7, s0, t9, t8, s1, s2, s3, s4 |

1129 | |

1130 | addiu a1, 8 |

1131 | sw AT, 0(a0) |

1132 | sw s0, 4(a0) |

1133 | bnez a2, 2b |

1134 | addiu a0, 8 |

1135 | |

1136 | 3: |

1137 | lw s0, 0(sp) |

1138 | lw s1, 4(sp) |

1139 | lw s2, 8(sp) |

1140 | lw s3, 12(sp) |

1141 | lw s4, 16(sp) |

1142 | lw s5, 20(sp) |

1143 | addiu sp, 24 |

1144 | jr ra |

1145 | nop |

1146 | .set at |

1147 | |

1148 | END(comp_func_DestinationAtop_dsp_asm_x2) |

1149 | |

1150 | LEAF_MIPS_DSP(comp_func_solid_XOR_dsp_asm_x2) |

1151 | /* |

1152 | * a0 - uint *dest |

1153 | * a1 - int length |

1154 | * a2 - uint color |

1155 | * a3 - uint sia |

1156 | */ |

1157 | |

1158 | .set noat |

1159 | addu sp, -4 |

1160 | sw s0, 0(sp) |

1161 | beqz a1, 2f |

1162 | nop |

1163 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1164 | lui t8, 0xff00 |

1165 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1166 | 1: |

1167 | lw t0, 0(a0) /* t0 = dest 1 */ |

1168 | lw t1, 4(a0) /* t1 = dest 2 */ |

1169 | addiu a1, -2 |

1170 | not t2, t0 |

1171 | not t3, t1 |

1172 | srl t2, t2, 24 /* t2 = qAlpha(~(dest 1)) */ |

1173 | srl t3, t3, 24 /* t3 = qAlpha(~(dest 2)) */ |

1174 | |

1175 | INTERPOLATE_PIXEL_255 a2, t2, t0, a3, AT, t9, t8, t4, t5, t6, t7 |

1176 | INTERPOLATE_PIXEL_255 a2, t3, t1, a3, s0, t9, t8, t4, t5, t6, t7 |

1177 | |

1178 | sw AT, 0(a0) |

1179 | sw s0, 4(a0) |

1180 | bnez a1, 1b |

1181 | addiu a0, 8 |

1182 | 2: |

1183 | lw s0, 0(sp) |

1184 | addu sp, 4 |

1185 | jr ra |

1186 | nop |

1187 | .set at |

1188 | |

1189 | END(comp_func_solid_XOR_dsp_asm_x2) |

1190 | |

1191 | LEAF_MIPS_DSP(comp_func_XOR_dsp_asm_x2) |

1192 | /* |

1193 | * a0 - uint *dest |

1194 | * a1 - const uint *src |

1195 | * a2 - int length |

1196 | * a3 - uint const_alpha |

1197 | */ |

1198 | |

1199 | .set noat |

1200 | addiu sp, -20 |

1201 | sw s0, 0(sp) |

1202 | sw s1, 4(sp) |

1203 | sw s2, 8(sp) |

1204 | sw s3, 12(sp) |

1205 | sw s4, 16(sp) |

1206 | beqz a2, 3f |

1207 | nop |

1208 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1209 | lui t8, 0xff00 |

1210 | li t0, 0xff |

1211 | beq a3, t0, 2f |

1212 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1213 | |

1214 | /* part where const_alpha != 255 */ |

1215 | 1: |

1216 | replv.ph a3, a3 |

1217 | 11: |

1218 | lw AT, 0(a1) /* src 1 */ |

1219 | lw s0, 4(a1) /* src 2 */ |

1220 | |

1221 | BYTE_MUL_x2 AT, s0, t0, t1, a3, a3, t9, t3, t4, t5, t6, 0 |

1222 | /* t0 = s1 */ |

1223 | /* t1 = s2 */ |

1224 | |

1225 | lw t2, 0(a0) /* t2 = dest 1 */ |

1226 | lw t3, 4(a0) /* t3 = dest 2 */ |

1227 | |

1228 | not t4, t2 |

1229 | not t5, t3 |

1230 | srl t4, t4, 24 /* t4 = qAplpha(~(dest 1)) */ |

1231 | srl t5, t5, 24 |

1232 | not t6, t0 |

1233 | not t7, t1 |

1234 | srl t6, t6, 24 /* t6 = qAlpha(~s) */ |

1235 | srl t7, t7, 24 |

1236 | addiu a2, -2 |

1237 | |

1238 | INTERPOLATE_PIXEL_255 t0, t4, t2, t6, AT, t9, t8, s1, s2, s3, s4 |

1239 | INTERPOLATE_PIXEL_255 t1, t5, t3, t7, s0, t9, t8, s1, s2, s3, s4 |

1240 | |

1241 | addiu a1, 8 |

1242 | sw AT, 0(a0) |

1243 | sw s0, 4(a0) |

1244 | bnez a2, 11b |

1245 | addiu a0, 8 |

1246 | b 3f |

1247 | nop |

1248 | |

1249 | /* part where const_alpha = 255 */ |

1250 | 2: |

1251 | lw t2, 0(a0) /* d1 */ |

1252 | lw t3, 4(a0) /* d2 */ |

1253 | lw t0, 0(a1) /* s1 */ |

1254 | lw t1, 4(a1) /* s2 */ |

1255 | not t4, t0 |

1256 | not t5, t1 |

1257 | srl t4, t4, 24 /* t4 = qAlpha(~s1) */ |

1258 | srl t5, t5, 24 |

1259 | not t6, t2 |

1260 | not t7, t3 |

1261 | srl t6, t6, 24 /* qAlpha(~d1) */ |

1262 | srl t7, t7, 24 |

1263 | addiu a2, -2 |

1264 | |

1265 | INTERPOLATE_PIXEL_255 t0, t6, t2, t4, AT, t9, t8, s1, s2, s3, s4 |

1266 | INTERPOLATE_PIXEL_255 t1, t7, t3, t5, s0, t9, t8, s1, s2, s3, s4 |

1267 | |

1268 | addiu a1, 8 |

1269 | sw AT, 0(a0) |

1270 | sw s0, 4(a0) |

1271 | bnez a2, 2b |

1272 | addiu a0, 8 |

1273 | |

1274 | 3: |

1275 | lw s0, 0(sp) |

1276 | lw s1, 4(sp) |

1277 | lw s2, 8(sp) |

1278 | lw s3, 12(sp) |

1279 | lw s4, 16(sp) |

1280 | addiu sp, 20 |

1281 | jr ra |

1282 | nop |

1283 | .set at |

1284 | |

1285 | END(comp_func_XOR_dsp_asm_x2) |

1286 | |

1287 | LEAF_MIPS_DSP(comp_func_solid_SourceOut_dsp_asm_x2) |

1288 | /* |

1289 | * a0 - uint *dest |

1290 | * a1 - int length |

1291 | * a2 - uint color |

1292 | * a3 - uint const_alpha |

1293 | */ |

1294 | |

1295 | .set noat |

1296 | addiu sp, -12 |

1297 | sw s0, 0(sp) |

1298 | sw s1, 4(sp) |

1299 | sw s2, 8(sp) |

1300 | beqz a1, 3f |

1301 | nop |

1302 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1303 | lui t8, 0xff00 |

1304 | li t0, 0xff |

1305 | beq a3, t0, 2f |

1306 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1307 | |

1308 | /* part where const_alpha != 255 */ |

1309 | 1: |

1310 | replv.ph t0, a3 |

1311 | li t5, 0xff |

1312 | BYTE_MUL a2, a2, t0, t9, t1, t2, t3, t4 /* a2 = color ( = BYTE_MUL(color, const_alpha)); */ |

1313 | subu t1, t5, a3 /* t1 = cia = 255 - const_alpha */ |

1314 | 11: |

1315 | lw t2, 0(a0) /* t2 = d1 */ |

1316 | lw s0, 4(a0) /* s0 = d2 */ |

1317 | addiu a1, -2 |

1318 | not t3, t2 |

1319 | not s2, s0 |

1320 | srl t3, t3, 24 /* t3 = qAlpha(~d1) */ |

1321 | srl s2, s2, 24 /* s2 = qAlpha(~d2) */ |

1322 | |

1323 | INTERPOLATE_PIXEL_255 a2, t3, t2, t1, AT, t9, t8, t4, t5, t6, t7 |

1324 | INTERPOLATE_PIXEL_255 a2, s2, s0, t1, s1, t9, t8, t4, t5, t6, t7 |

1325 | |

1326 | sw AT, 0(a0) |

1327 | sw s1, 4(a0) |

1328 | bnez a1, 11b |

1329 | addiu a0, 8 |

1330 | b 3f |

1331 | nop |

1332 | |

1333 | /* part where const_alpha = 255 */ |

1334 | 2: |

1335 | lw t0, 0(a0) /* dest 1 */ |

1336 | lw t1, 4(a0) /* dest 2 */ |

1337 | not t4, t0 |

1338 | not t5, t1 |

1339 | srl t4, t4, 24 |

1340 | srl t5, t5, 24 |

1341 | replv.ph t2, t4 |

1342 | replv.ph t3, t5 |

1343 | addiu a1, -2 |

1344 | |

1345 | BYTE_MUL_x2 a2, a2, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

1346 | |

1347 | sw t8, 0(a0) |

1348 | sw AT, 4(a0) |

1349 | bnez a1, 2b |

1350 | addiu a0, 8 |

1351 | |

1352 | 3: |

1353 | lw s0, 0(sp) |

1354 | lw s1, 4(sp) |

1355 | lw s2, 8(sp) |

1356 | addiu sp, 12 |

1357 | jr ra |

1358 | nop |

1359 | .set at |

1360 | |

1361 | END(comp_func_solid_SourceOut_dsp_asm_x2) |

1362 | |

1363 | LEAF_MIPS_DSP(comp_func_SourceOut_dsp_asm_x2) |

1364 | /* |

1365 | * a0 - uint *dest |

1366 | * a1 - const uint *src |

1367 | * a2 - int length |

1368 | * a3 - uint const_alpha |

1369 | */ |

1370 | |

1371 | .set noat |

1372 | addiu sp, -16 |

1373 | sw s0, 0(sp) |

1374 | sw s1, 4(sp) |

1375 | sw s2, 8(sp) |

1376 | sw s3, 12(sp) |

1377 | beqz a2, 3f |

1378 | nop |

1379 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1380 | lui t8, 0xff00 |

1381 | li t0, 0xff |

1382 | beq a3, t0, 2f |

1383 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1384 | |

1385 | /* part where const_alpha != 255 */ |

1386 | 1: |

1387 | li t5, 0xff |

1388 | subu t7, t5, a3 /* t7 = cia = 255 - const_alpha */ |

1389 | replv.ph a3, a3 |

1390 | 11: |

1391 | lw t0, 0(a1) /* t0 = src 1 */ |

1392 | lw t1, 4(a1) /* t1 = src 2 */ |

1393 | addiu a2, -2 |

1394 | |

1395 | BYTE_MUL_x2 t0, t1, AT, s0, a3, a3, t9, t3, t4, t5, t6, 0 |

1396 | |

1397 | lw t0, 0(a0) /* t0 = dest 1 */ |

1398 | lw t1, 4(a0) /* t1 = dest 2 */ |

1399 | addiu a1, 8 |

1400 | |

1401 | not t2, t0 |

1402 | not t3, t1 |

1403 | srl t2, t2, 24 /* t2 = qAlpha(~d1) */ |

1404 | srl t3, t3, 24 /* t3 = qAlpha(~d2) */ |

1405 | |

1406 | INTERPOLATE_PIXEL_255 AT, t2, t0, t7, s1, t9, t8, t4, t5, t6, s3 |

1407 | INTERPOLATE_PIXEL_255 s0, t3, t1, t7, s2, t9, t8, t4, t5, t6, s3 |

1408 | |

1409 | sw s1, 0(a0) |

1410 | sw s2, 4(a0) |

1411 | bnez a2, 11b |

1412 | addiu a0, 8 |

1413 | b 3f |

1414 | nop |

1415 | |

1416 | /* part where const_alpha = 255 */ |

1417 | 2: |

1418 | lw t2, 0(a0) /* dest 1 */ |

1419 | lw t3, 4(a0) /* dest 2 */ |

1420 | lw t0, 0(a1) /* src 1 */ |

1421 | lw t1, 4(a1) /* src 2 */ |

1422 | not t4, t2 |

1423 | not t5, t3 |

1424 | srl t4, t4, 24 /* qAlpha(~d1) */ |

1425 | srl t5, t5, 24 /* qAlpha(~d2) */ |

1426 | replv.ph t2, t4 |

1427 | replv.ph t3, t5 |

1428 | addiu a2, -2 |

1429 | |

1430 | BYTE_MUL_x2 t0, t1, t8, AT, t2, t3, t9, t4, t5, t6, t7 |

1431 | |

1432 | addiu a1, 8 |

1433 | sw t8, 0(a0) |

1434 | sw AT, 4(a0) |

1435 | bnez a2, 2b |

1436 | addiu a0, 8 |

1437 | |

1438 | 3: |

1439 | lw s0, 0(sp) |

1440 | lw s1, 4(sp) |

1441 | lw s2, 8(sp) |

1442 | lw s3, 12(sp) |

1443 | addiu sp, 16 |

1444 | jr ra |

1445 | nop |

1446 | .set at |

1447 | |

1448 | END(comp_func_SourceOut_dsp_asm_x2) |

1449 | |

1450 | LEAF_MIPS_DSP(comp_func_Source_dsp_asm_x2) |

1451 | /* |

1452 | * a0 - uint *dest |

1453 | * a1 - const uint *src |

1454 | * a2 - int length |

1455 | * a3 - uint const_alpha |

1456 | */ |

1457 | |

1458 | .set noat |

1459 | addiu sp, -8 |

1460 | sw s0, 0(sp) |

1461 | sw s1, 4(sp) |

1462 | beqz a2, 2f |

1463 | nop |

1464 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1465 | lui t8, 0xff00 |

1466 | ori t8, t8, 0xff00 /* t8 = 0xff00ff00 (andi_factor) */ |

1467 | li t7, 0xff |

1468 | subu t7, t7, a3 /* t7 = ialpha */ |

1469 | 1: |

1470 | lw t0, 0(a0) /* t0 = dest 1 */ |

1471 | lw t1, 4(a0) /* t1 = dest 2 */ |

1472 | lw t2, 0(a1) /* t2 = src 1 */ |

1473 | lw t3, 4(a1) /* t3 = src 2 */ |

1474 | addiu a2, -2 |

1475 | addiu a1, 8 |

1476 | |

1477 | INTERPOLATE_PIXEL_255 t2, a3, t0, t7, AT, t9, t8, t4, t5, t6, s1 |

1478 | INTERPOLATE_PIXEL_255 t3, a3, t1, t7, s0, t9, t8, t4, t5, t6, s1 |

1479 | |

1480 | sw AT, 0(a0) |

1481 | sw s0, 4(a0) |

1482 | bnez a2, 1b |

1483 | addiu a0, 8 |

1484 | 2: |

1485 | lw s0, 0(sp) |

1486 | lw s1, 4(sp) |

1487 | addiu sp, 8 |

1488 | jr ra |

1489 | nop |

1490 | .set at |

1491 | |

1492 | END(comp_func_Source_dsp_asm_x2) |

1493 | |

1494 | LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) |

1495 | /* |

1496 | * a0 - uint *dest |

1497 | * a1 - const uint *src |

1498 | * a2 - int length |

1499 | * a3 - uint const_alpha |

1500 | */ |

1501 | |

1502 | .set noat |

1503 | addiu sp, -12 |

1504 | sw s0, 0(sp) |

1505 | sw s1, 4(sp) |

1506 | sw s2, 8(sp) |

1507 | beqz a2, 2f |

1508 | nop |

1509 | replv.ph a3, a3 |

1510 | li t9, 8388736 /* t9 = 0x800080 (rounding_factor) */ |

1511 | |

1512 | 1: |

1513 | lw t0, 0(a1) /* t0 = src 1 */ |

1514 | lw t1, 4(a1) /* t1 = src 2 */ |

1515 | addiu a2, -2 |

1516 | |

1517 | BYTE_MUL_x2 t0, t1, AT, t7, a3, a3, t9, t3, t4, t5, t6, 0 |

1518 | |

1519 | lw t0, 0(a0) /* t0 = dest 1 */ |

1520 | lw t1, 4(a0) /* t1 = dest 2 */ |

1521 | not s1, AT |

1522 | not s2, t7 |

1523 | srl s1, s1, 24 /* s1 = qAlpha(~s1) */ |

1524 | srl s2, s2, 24 /* s2 = qAlpha(~s2) */ |

1525 | replv.ph s1, s1 |

1526 | replv.ph s2, s2 |

1527 | |

1528 | BYTE_MUL_x2 t0, t1, t2, t3, s1, s2, t9, t4, t5, t6, s0 |

1529 | |

1530 | addiu a1, 8 |

1531 | addu AT, AT, t2 |

1532 | addu t7, t7, t3 |

1533 | sw AT, 0(a0) |

1534 | sw t7, 4(a0) |

1535 | bnez a2, 1b |

1536 | addiu a0, 8 |

1537 | |

1538 | 2: |

1539 | lw s0, 0(sp) |

1540 | lw s1, 4(sp) |

1541 | lw s2, 8(sp) |

1542 | addiu sp, 12 |

1543 | jr ra |

1544 | nop |

1545 | .set at |

1546 | |

1547 | END(qt_blend_argb32_on_argb32_mips_dsp_asm_x2) |

1548 | |

1549 | LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) |

1550 | /* |

1551 | * a0 - uint *dest |

1552 | * a1 - const uint *src |

1553 | * a2 - int length |

1554 | */ |

1555 | |

1556 | beqz a2, 5f |

1557 | nop |

1558 | li t7, 8388736 /* t7 = 0x800080 */ |

1559 | b 2f |

1560 | nop |

1561 | 1: |

1562 | addiu a0, a0, 4 |

1563 | addiu a2, a2, -1 |

1564 | beqz a2, 5f |

1565 | nop |

1566 | 2: |

1567 | lw t0, 0(a1) /* t0 = s = src[i] */ |

1568 | addiu a1, a1, 4 |

1569 | nor t1, t0, zero |

1570 | srl t1, t1, 24 /* t1 = ~qAlpha(s) */ |

1571 | bnez t1, 3f |

1572 | nop |

1573 | sw t0, 0(a0) /* dst[i] = src[i] */ |

1574 | addiu a2, a2, -1 |

1575 | bnez a2, 2b |

1576 | addiu a0, a0, 4 |

1577 | b 5f |

1578 | nop |

1579 | 3: |

1580 | beqz t0, 1b |

1581 | replv.ph t6, t1 /* | 0 | qAlpha(~s) | 0 | qAlpha(~s) | */ |

1582 | |

1583 | lw t4, 0(a0) |

1584 | addiu a2, a2, -1 |

1585 | beqz t4, 31f |

1586 | move t8, zero |

1587 | |

1588 | BYTE_MUL t4, t8, t6, t7, t1, t2, t3, t4 |

1589 | 31: |

1590 | addu t8, t0, t8 /* dst[i] = |

1591 | * s + BYTE_MUL(dst[i],~qAlpha(s)) */ |

1592 | sw t8, 0(a0) |

1593 | bnez a2, 2b |

1594 | addiu a0, a0, 4 |

1595 | b 5f |

1596 | nop |

1597 | 5: |

1598 | jr ra |

1599 | nop |

1600 | |

1601 | END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm) |

1602 | |

1603 | |

1604 | #if defined(__MIPSEL) && __MIPSEL |

1605 | # define PACK(r, s, t) packrl.ph r, s, t |

1606 | # define SWHI(r, o, b) swl r, o + 1 (b) |

1607 | # define SWLO(r, o, b) swr r, o + 0 (b) |

1608 | # define LDHI(r, o, b) lwl r, o + 1 (b) |

1609 | # define LDLO(r, o, b) lwr r, o + 2 (b) |

1610 | #else |

1611 | # define PACK(r, s, t) packrl.ph r, t, s |

1612 | # define SWHI(r, o, b) swr r, o + 1 (b) |

1613 | # define SWLO(r, o, b) swl r, o + 0 (b) |

1614 | # define LDHI(r, o, b) lwr r, o + 1 (b) |

1615 | # define LDLO(r, o, b) lwl r, o + 2 (b) |

1616 | #endif |

1617 | |

1618 | LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) |

1619 | /* |

1620 | * a0 - dst (*r5g6b5) |

1621 | * a1 - src (const *r5g6b5) |

1622 | * a2 - len (unsigned int) |

1623 | * |

1624 | * Register usage: |

1625 | * t0-3 - Scratch registers |

1626 | * t4 - Number of iterations to do in unrolled loops |

1627 | * t5-7 - Auxiliary scratch registers. |

1628 | * |

1629 | * Check if base addresses of src/dst are aligned, cases: |

1630 | * a) Both aligned. |

1631 | * b) Both unaligned: |

1632 | * 1. Copy a halfword |

1633 | * 2. Use aligned case. |

1634 | * c) dst aligned, src unaligned: |

1635 | * 1. Read a word from dst, halfword from src. |

1636 | * 2. Continue reading words from both. |

1637 | * d) dst unaligned, src aligned: |

1638 | * 1. Read a word from src, halfword from dst. |

1639 | * 2. Continue reading words from both. |

1640 | */ |

1641 | |

1642 | beqz a2, 0f /* if (a2:len == 0): return */ |

1643 | andi t0, a0, 0x3 /* t0 = a0:dst % 4 */ |

1644 | andi t1, a1, 0x3 /* t1 = a1:dst % 4 */ |

1645 | or t2, t0, t1 /* t1 = t0 | t1 */ |

1646 | |

1647 | beqz t2, 4f /* both aligned */ |

1648 | nop |

1649 | beqz t0, 3f /* dst aligned, src unaligned */ |

1650 | nop |

1651 | beqz t1, 2f /* src aligned, dst unaligned */ |

1652 | nop |

1653 | |

1654 | /* |

1655 | * Both src/dst are unaligned: read 1 halfword from each, |

1656 | * the fall-off to continue with word-aligned copy. |

1657 | */ |

1658 | lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ |

1659 | addiu a1, a1, 2 /* src++ */ |

1660 | addiu a2, a2,-1 /* len-- */ |

1661 | sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */ |

1662 | addiu a0, a0, 2 /* dst++ */ |

1663 | |

1664 | /* |

1665 | * Both src/dst pointers are word-aligned, process eight |

1666 | * items at a time in an unrolled loop. |

1667 | */ |

1668 | 4: beqz a2, 0f /* if (len == 0): return */ |

1669 | srl t4, a2, 3 /* t4 = len / 8 */ |

1670 | |

1671 | beqz t4, 5f /* if (t4 == 0): tail */ |

1672 | andi a2, a2, 0x07 /* len = len % 8 */ |

1673 | |

1674 | 1: lw t0, 0 (a1) |

1675 | lw t1, 4 (a1) |

1676 | lw t2, 8 (a1) |

1677 | lw t3, 12 (a1) |

1678 | |

1679 | addiu t4, t4, -1 /* t4-- */ |

1680 | addiu a1, a1, 16 /* src += 8 */ |

1681 | |

1682 | sw t0, 0 (a0) |

1683 | sw t1, 4 (a0) |

1684 | sw t2, 8 (a0) |

1685 | sw t3, 12 (a0) |

1686 | |

1687 | bnez t4, 1b |

1688 | addiu a0, a0, 16 /* dst += 8 */ |

1689 | |

1690 | b 5f |

1691 | nop |

1692 | |

1693 | |

1694 | /* |

1695 | * dst pointer is unaligned |

1696 | */ |

1697 | 2: beqz a2, 0f /* if (len == 0): return */ |

1698 | srl t4, a2, 3 /* t4 = len / 8 */ |

1699 | beqz t4, 5f /* if (t4 == 0): tail */ |

1700 | andi a2, a2, 0x07 /* len = len % 8 */ |

1701 | |

1702 | 1: lw t0, 0 (a1) |

1703 | lw t1, 4 (a1) |

1704 | lw t2, 8 (a1) |

1705 | lw t3, 12 (a1) |

1706 | |

1707 | addiu t4, t4, -1 /* t4-- */ |

1708 | addiu a1, a1, 16 /* src += 8 */ |

1709 | |

1710 | SWLO (t0, 0, a0) |

1711 | PACK (t5, t1, t0) |

1712 | PACK (t6, t2, t1) |

1713 | PACK (t7, t3, t2) |

1714 | SWHI (t3, 14, a0) |

1715 | sw t5, 2 (a0) |

1716 | sw t6, 6 (a0) |

1717 | sw t7, 10 (a0) |

1718 | |

1719 | bnez t4, 1b |

1720 | addiu a0, a0, 16 /* dst += 8 */ |

1721 | |

1722 | b 5f |

1723 | nop |

1724 | |

1725 | /* |

1726 | * src pointer is unaligned |

1727 | */ |

1728 | 3: beqz a2, 0f /* if (len == 0): return */ |

1729 | srl t4, a2, 3 /* t4 = len / 8 */ |

1730 | beqz t4, 5f /* if (t4 == 0): tail */ |

1731 | andi a2, a2, 0x07 /* len = len % 8 */ |

1732 | |

1733 | 1: LDHI (t0, 0, a1) |

1734 | lw t1, 2 (a1) |

1735 | lw t2, 6 (a1) |

1736 | lw t3, 10 (a1) |

1737 | LDLO (t5, 12, a1) |

1738 | |

1739 | addiu t4, t4, -1 /* t4-- */ |

1740 | addiu a1, a1, 16 /* src += 8 */ |

1741 | |

1742 | PACK (t0, t1, t0) |

1743 | PACK (t6, t2, t1) |

1744 | PACK (t7, t3, t2) |

1745 | sw t0, 0 (a0) |

1746 | PACK (t0, t5, t3) |

1747 | sw t6, 4 (a0) |

1748 | sw t7, 8 (a0) |

1749 | sw t0, 12 (a0) |

1750 | |

1751 | bnez t4, 1b |

1752 | addiu a0, a0, 16 /* dst += 8 */ |

1753 | |

1754 | |

1755 | 5: /* Process remaining items (a2:len < 4), one at a time */ |

1756 | beqz a2, 0f |

1757 | nop |

1758 | |

1759 | 1: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */ |

1760 | addiu a2, a2,-1 /* len-- */ |

1761 | addiu a1, a1, 2 /* src++ */ |

1762 | sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */ |

1763 | bnez a2, 1b /* if (len != 0): loop */ |

1764 | addiu a0, a0, 2 /* dst++ */ |

1765 | |

1766 | 0: jr ra |

1767 | nop |

1768 | |

1769 | END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm) |

1770 | |

1771 | |

1772 | #undef LDHI |

1773 | #undef LDLO |

1774 | #undef PACK |

1775 | #undef SWHI |

1776 | #undef SWLO |

1777 | |

1778 | |

1779 | LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm) |

1780 | /* |

1781 | * a0 - dst (*r5g6b5) |

1782 | * a1 - src (const *r5g6b5) |

1783 | * a2 - len (unsigned int) - batch length |

1784 | * a3 - alpha (int) |

1785 | */ |

1786 | |

1787 | beqz a2, 2f |

1788 | li t9, 255 |

1789 | sll t8, a3, 8 |

1790 | subu a3, t8, a3 |

1791 | srl a3, a3, 8 |

1792 | subu t9, t9, a3 |

1793 | addiu a3, a3, 1 |

1794 | srl t4, a3, 2 |

1795 | addiu t9, t9, 1 |

1796 | srl t5, t9, 2 |

1797 | 1: |

1798 | lhu t0, 0(a1) |

1799 | lhu t1, 0(a0) |

1800 | addiu a2, a2, -1 |

1801 | andi t2, t0, 0x07e0 |

1802 | andi t0, t0, 0xf81f |

1803 | mul t2, t2, a3 |

1804 | mul t0, t0, t4 |

1805 | andi t3, t1, 0x07e0 |

1806 | andi t1, t1, 0xf81f |

1807 | mul t3, t3, t9 |

1808 | mul t1, t1, t5 |

1809 | addiu a1, a1, 2 |

1810 | srl t2, t2, 8 |

1811 | srl t0, t0, 6 |

1812 | andi t2, t2, 0x07e0 |

1813 | andi t0, t0, 0xf81f |

1814 | or t0, t0, t2 |

1815 | srl t3, t3, 8 |

1816 | srl t1, t1, 6 |

1817 | andi t3, t3, 0x07e0 |

1818 | andi t1, t1, 0xf81f |

1819 | or t1, t1, t3 |

1820 | addu t0, t0, t1 |

1821 | sh t0, 0(a0) |

1822 | bgtz a2, 1b |

1823 | addiu a0, a0, 2 |

1824 | 2: |

1825 | jr ra |

1826 | nop |

1827 | |

1828 | END(qt_blend_rgb16_on_rgb16_mips_dsp_asm) |

1829 | |

1830 | |

1831 | LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp) |

1832 | /* |

1833 | * a0 - dst address (address of 32-bit aRGB value) |

1834 | * a1 - src address |

1835 | * a2 - length |

1836 | */ |

1837 | |

1838 | beqz a2, 4f |

1839 | lui t8, 0xff00 |

1840 | andi t0, a2, 0x1 |

1841 | beqz t0, 1f |

1842 | nop |

1843 | /* case for one pixel */ |

1844 | lbu t1, 0(a1) |

1845 | lbu v1, 2(a1) |

1846 | lbu t0, 1(a1) |

1847 | addiu a1, a1, 3 |

1848 | addiu a2, a2, -1 |

1849 | sll t1, t1, 0x10 |

1850 | or v1, v1, t8 |

1851 | sll t0, t0, 0x8 |

1852 | or v1, v1, t1 |

1853 | or v1, v1, t0 |

1854 | sw v1, 0(a0) |

1855 | addiu a0, a0, 4 |

1856 | |

1857 | beqz a2, 4f /* only one pixel is present (length = 1) */ |

1858 | nop |

1859 | 1: |

1860 | andi t0, a1, 0x1 |

1861 | beqz t0, 3f |

1862 | nop |

1863 | 2: |

1864 | lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */ |

1865 | lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */ |

1866 | addiu a1, a1, 3 |

1867 | lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */ |

1868 | lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */ |

1869 | |

1870 | sll t0, t0, 16 |

1871 | or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */ |

1872 | shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */ |

1873 | srl t5, t1, 8 |

1874 | or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */ |

1875 | or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */ |

1876 | |

1877 | shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */ |

1878 | srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */ |

1879 | or t4, t4, t5 |

1880 | sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */ |

1881 | or t5, t3, t8 |

1882 | or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */ |

1883 | |

1884 | sw t0, 0(a0) |

1885 | addiu a1, a1, 3 |

1886 | sw t2, 4(a0) |

1887 | addiu a2, a2, -2 |

1888 | bnez a2, 2b |

1889 | addiu a0, a0, 8 |

1890 | b 4f |

1891 | nop |

1892 | 3: |

1893 | lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */ |

1894 | lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */ |

1895 | addiu a1, a1, 3 |

1896 | lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */ |

1897 | lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */ |

1898 | |

1899 | srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */ |

1900 | shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */ |

1901 | or t0, t4, t5 |

1902 | sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */ |

1903 | or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */ |

1904 | or t0, t6, t4 |

1905 | |

1906 | sll t2, t2, 16 |

1907 | srl t4, t3, 8 |

1908 | shll.ph t5, t3, 8 |

1909 | or t3, t4, t5 |

1910 | or t2, t2, t3 |

1911 | or t2, t2, t8 |

1912 | |

1913 | sw t0, 0(a0) |

1914 | addiu a1, a1, 3 |

1915 | sw t2, 4(a0) |

1916 | addiu a2, a2, -2 |

1917 | bnez a2, 3b |

1918 | addiu a0, a0, 8 |

1919 | 4: |

1920 | jr ra |

1921 | nop |

1922 | |

1923 | END(fetchUntransformed_888_asm_mips_dsp) |

1924 | |

1925 | |

1926 | LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp) |

1927 | /* |

1928 | * a0 - dst address (address of 32-bit aRGB value) |

1929 | * a1 - src address |

1930 | * a2 - length |

1931 | */ |

1932 | |

1933 | lui t8, 0xff00 |

1934 | li t4, 0x1 |

1935 | |

1936 | beqz a2, 5f |

1937 | move v0, a0 /* just return the address of buffer |

1938 | * for storing returning values */ |

1939 | andi t0, a2, 0x1 |

1940 | beqz t0, 2f /* there is more then one pixel |

1941 | * (check src memory alignment (word)) */ |

1942 | nop |

1943 | 1: |

1944 | lhu v0, 0(a1) |

1945 | addiu a1, a1, 2 |

1946 | addiu a2, a2, -1 |

1947 | andi t0, v0, 0xf00 |

1948 | andi v1, v0, 0xf |

1949 | andi v0, v0, 0xf0 |

1950 | sra t3, t0, 0x4 |

1951 | sra t1, v0, 0x4 |

1952 | sra t0, t0, 0x8 |

1953 | sll t2, v1, 0x4 |

1954 | or t0, t0, t3 |

1955 | or v0, t1, v0 |

1956 | lui t1, 0xff00 |

1957 | or v1, t2, v1 |

1958 | sll t0, t0, 0x10 |

1959 | or v1, v1, t1 |

1960 | sll v0, v0, 0x8 |

1961 | or v1, v1, t0 |

1962 | or v0, v1, v0 |

1963 | sw v0, 0(a0) |

1964 | addiu a0, a0, 4 |

1965 | beqz a2, 5f /* no more pixels for processing */ |

1966 | nop |

1967 | beq a2, t4, 4f /* only one more pixel remained */ |

1968 | nop |

1969 | /* check if src memory address is word aligned */ |

1970 | 2: |

1971 | andi t0, a1, 0x3 |

1972 | beqz t0, 3f /* memory is word aligned */ |

1973 | andi a3, a2, 0x1 /* set the a3 register as the comparation |

1974 | * for ending the unrolled loop |

1975 | * (1 if odd, 0 if even) */ |

1976 | b 1b /* not word aligned, |

1977 | * go another turn with |

1978 | * just one pixel processing */ |

1979 | nop |

1980 | 3: |

1981 | lw t0, 0(a1) |

1982 | addiu a2, a2, -2 |

1983 | preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */ |

1984 | preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */ |

1985 | shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ |

1986 | srl t4, t3, 4 |

1987 | or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ |

1988 | andi t3, t1, 0xf0 |

1989 | sll t3, t3, 8 |

1990 | srl t4, t3, 4 |

1991 | or t1, t3, t4 |

1992 | or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ |

1993 | or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ |

1994 | |

1995 | shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */ |

1996 | srl t4, t3, 4 |

1997 | or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */ |

1998 | andi t3, t2, 0xf0 |

1999 | sll t3, t3, 8 |

2000 | srl t4, t3, 4 |

2001 | or t1, t3, t4 |

2002 | or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */ |

2003 | or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */ |

2004 | |

2005 | sw t0, 0(a0) |

2006 | addiu a1, a1, 4 |

2007 | sw t2, 4(a0) |

2008 | bne a2, a3, 3b |

2009 | addiu a0, a0, 8 |

2010 | beqz a2, 5f /* no more pixels for processing */ |

2011 | nop |

2012 | 4: |

2013 | /* one more pixel remained (after loop unrolling process finished) */ |

2014 | lhu v0, 0(a1) |

2015 | addiu a1, a1, 2 |

2016 | addiu a2, a2, -1 |

2017 | andi t0, v0, 0xf00 |

2018 | andi v1, v0, 0xf |

2019 | andi v0, v0, 0xf0 |

2020 | sra t3, t0, 0x4 |

2021 | sra t1, v0, 0x4 |

2022 | sra t0, t0, 0x8 |

2023 | sll t2, v1, 0x4 |

2024 | or t0, t0, t3 |

2025 | or v0, t1, v0 |

2026 | lui t1, 0xff00 |

2027 | or v1, t2, v1 |

2028 | sll t0, t0, 0x10 |

2029 | or v1, v1, t1 |

2030 | sll v0, v0, 0x8 |

2031 | or v1, v1, t0 |

2032 | or v0, v1, v0 |

2033 | sw v0, 0(a0) |

2034 | addiu a0, a0, 4 |

2035 | 5: |

2036 | jr ra |

2037 | nop |

2038 | |

2039 | END(fetchUntransformed_444_asm_mips_dsp) |

2040 | |

2041 | |

2042 | LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) |

2043 | /* |

2044 | * a0 - dst address |

2045 | * a1 - src address |

2046 | * a2 - length |

2047 | */ |

2048 | |

2049 | beqz a2, 2f |

2050 | nop |

2051 | |

2052 | 1: |

2053 | ulh t1, 0(a1) |

2054 | lbu t2, 2(a1) |

2055 | addiu a2, a2, -1 |

2056 | wsbh t1, t1 |

2057 | sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */ |

2058 | ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */ |

2059 | ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */ |

2060 | srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */ |

2061 | replv.qb t3, t2 |

2062 | ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ |

2063 | ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */ |

2064 | srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */ |

2065 | ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */ |

2066 | ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */ |

2067 | cmpu.lt.qb t3, t0 |

2068 | pick.qb t0, t3, t0 |

2069 | addiu a1, a1, 3 |

2070 | sw t0, 0(a0) |

2071 | bgtz a2, 1b |

2072 | addiu a0, a0, 4 |

2073 | 2: |

2074 | jr ra |

2075 | nop |

2076 | |

2077 | END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp) |

2078 |

Warning: That file was not part of the compilation database. It may have many parsing errors.