1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | * Authors: Christian König <christian.koenig@amd.com> |
26 | */ |
27 | |
28 | #include <linux/firmware.h> |
29 | |
30 | #include "radeon.h" |
31 | #include "radeon_asic.h" |
32 | #include "sid.h" |
33 | #include "vce.h" |
34 | |
35 | #define VCE_V1_0_FW_SIZE (256 * 1024) |
36 | #define VCE_V1_0_STACK_SIZE (64 * 1024) |
37 | #define VCE_V1_0_DATA_SIZE (7808 * (RADEON_MAX_VCE_HANDLES + 1)) |
38 | |
39 | struct vce_v1_0_fw_signature |
40 | { |
41 | int32_t off; |
42 | uint32_t len; |
43 | int32_t num; |
44 | struct { |
45 | uint32_t chip_id; |
46 | uint32_t keyselect; |
47 | uint32_t nonce[4]; |
48 | uint32_t sigval[4]; |
49 | } val[8]; |
50 | }; |
51 | |
52 | /** |
53 | * vce_v1_0_get_rptr - get read pointer |
54 | * |
55 | * @rdev: radeon_device pointer |
56 | * @ring: radeon_ring pointer |
57 | * |
58 | * Returns the current hardware read pointer |
59 | */ |
60 | uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, |
61 | struct radeon_ring *ring) |
62 | { |
63 | if (ring->idx == TN_RING_TYPE_VCE1_INDEX) |
64 | return RREG32(VCE_RB_RPTR); |
65 | else |
66 | return RREG32(VCE_RB_RPTR2); |
67 | } |
68 | |
69 | /** |
70 | * vce_v1_0_get_wptr - get write pointer |
71 | * |
72 | * @rdev: radeon_device pointer |
73 | * @ring: radeon_ring pointer |
74 | * |
75 | * Returns the current hardware write pointer |
76 | */ |
77 | uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, |
78 | struct radeon_ring *ring) |
79 | { |
80 | if (ring->idx == TN_RING_TYPE_VCE1_INDEX) |
81 | return RREG32(VCE_RB_WPTR); |
82 | else |
83 | return RREG32(VCE_RB_WPTR2); |
84 | } |
85 | |
86 | /** |
87 | * vce_v1_0_set_wptr - set write pointer |
88 | * |
89 | * @rdev: radeon_device pointer |
90 | * @ring: radeon_ring pointer |
91 | * |
92 | * Commits the write pointer to the hardware |
93 | */ |
94 | void vce_v1_0_set_wptr(struct radeon_device *rdev, |
95 | struct radeon_ring *ring) |
96 | { |
97 | if (ring->idx == TN_RING_TYPE_VCE1_INDEX) |
98 | WREG32(VCE_RB_WPTR, ring->wptr); |
99 | else |
100 | WREG32(VCE_RB_WPTR2, ring->wptr); |
101 | } |
102 | |
103 | void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable) |
104 | { |
105 | u32 tmp; |
106 | |
107 | if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { |
108 | tmp = RREG32(VCE_CLOCK_GATING_A); |
109 | tmp |= CGC_DYN_CLOCK_MODE; |
110 | WREG32(VCE_CLOCK_GATING_A, tmp); |
111 | |
112 | tmp = RREG32(VCE_UENC_CLOCK_GATING); |
113 | tmp &= ~0x1ff000; |
114 | tmp |= 0xff800000; |
115 | WREG32(VCE_UENC_CLOCK_GATING, tmp); |
116 | |
117 | tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); |
118 | tmp &= ~0x3ff; |
119 | WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); |
120 | } else { |
121 | tmp = RREG32(VCE_CLOCK_GATING_A); |
122 | tmp &= ~CGC_DYN_CLOCK_MODE; |
123 | WREG32(VCE_CLOCK_GATING_A, tmp); |
124 | |
125 | tmp = RREG32(VCE_UENC_CLOCK_GATING); |
126 | tmp |= 0x1ff000; |
127 | tmp &= ~0xff800000; |
128 | WREG32(VCE_UENC_CLOCK_GATING, tmp); |
129 | |
130 | tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); |
131 | tmp |= 0x3ff; |
132 | WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); |
133 | } |
134 | } |
135 | |
136 | static void vce_v1_0_init_cg(struct radeon_device *rdev) |
137 | { |
138 | u32 tmp; |
139 | |
140 | tmp = RREG32(VCE_CLOCK_GATING_A); |
141 | tmp |= CGC_DYN_CLOCK_MODE; |
142 | WREG32(VCE_CLOCK_GATING_A, tmp); |
143 | |
144 | tmp = RREG32(VCE_CLOCK_GATING_B); |
145 | tmp |= 0x1e; |
146 | tmp &= ~0xe100e1; |
147 | WREG32(VCE_CLOCK_GATING_B, tmp); |
148 | |
149 | tmp = RREG32(VCE_UENC_CLOCK_GATING); |
150 | tmp &= ~0xff9ff000; |
151 | WREG32(VCE_UENC_CLOCK_GATING, tmp); |
152 | |
153 | tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); |
154 | tmp &= ~0x3ff; |
155 | WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); |
156 | } |
157 | |
158 | int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) |
159 | { |
160 | struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data; |
161 | uint32_t chip_id; |
162 | int i; |
163 | |
164 | switch (rdev->family) { |
165 | case CHIP_TAHITI: |
166 | chip_id = 0x01000014; |
167 | break; |
168 | case CHIP_VERDE: |
169 | chip_id = 0x01000015; |
170 | break; |
171 | case CHIP_PITCAIRN: |
172 | chip_id = 0x01000016; |
173 | break; |
174 | case CHIP_ARUBA: |
175 | chip_id = 0x01000017; |
176 | break; |
177 | default: |
178 | return -EINVAL; |
179 | } |
180 | |
181 | for (i = 0; i < le32_to_cpu(sign->num); ++i) { |
182 | if (le32_to_cpu(sign->val[i].chip_id) == chip_id) |
183 | break; |
184 | } |
185 | |
186 | if (i == le32_to_cpu(sign->num)) |
187 | return -EINVAL; |
188 | |
189 | data += (256 - 64) / 4; |
190 | data[0] = sign->val[i].nonce[0]; |
191 | data[1] = sign->val[i].nonce[1]; |
192 | data[2] = sign->val[i].nonce[2]; |
193 | data[3] = sign->val[i].nonce[3]; |
194 | data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64); |
195 | |
196 | memset(&data[5], 0, 44); |
197 | memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); |
198 | |
199 | data += (le32_to_cpu(sign->len) + 64) / 4; |
200 | data[0] = sign->val[i].sigval[0]; |
201 | data[1] = sign->val[i].sigval[1]; |
202 | data[2] = sign->val[i].sigval[2]; |
203 | data[3] = sign->val[i].sigval[3]; |
204 | |
205 | rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect); |
206 | |
207 | return 0; |
208 | } |
209 | |
210 | unsigned vce_v1_0_bo_size(struct radeon_device *rdev) |
211 | { |
212 | WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size); |
213 | return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE; |
214 | } |
215 | |
216 | int vce_v1_0_resume(struct radeon_device *rdev) |
217 | { |
218 | uint64_t addr = rdev->vce.gpu_addr; |
219 | uint32_t size; |
220 | int i; |
221 | |
222 | WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); |
223 | WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); |
224 | WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); |
225 | WREG32(VCE_CLOCK_GATING_B, 0); |
226 | |
227 | WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4); |
228 | |
229 | WREG32(VCE_LMI_CTRL, 0x00398000); |
230 | WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); |
231 | WREG32(VCE_LMI_SWAP_CNTL, 0); |
232 | WREG32(VCE_LMI_SWAP_CNTL1, 0); |
233 | WREG32(VCE_LMI_VM_CTRL, 0); |
234 | |
235 | WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES); |
236 | |
237 | addr += 256; |
238 | size = VCE_V1_0_FW_SIZE; |
239 | WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); |
240 | WREG32(VCE_VCPU_CACHE_SIZE0, size); |
241 | |
242 | addr += size; |
243 | size = VCE_V1_0_STACK_SIZE; |
244 | WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); |
245 | WREG32(VCE_VCPU_CACHE_SIZE1, size); |
246 | |
247 | addr += size; |
248 | size = VCE_V1_0_DATA_SIZE; |
249 | WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); |
250 | WREG32(VCE_VCPU_CACHE_SIZE2, size); |
251 | |
252 | WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); |
253 | |
254 | WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect); |
255 | |
256 | for (i = 0; i < 10; ++i) { |
257 | mdelay(10); |
258 | if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE) |
259 | break; |
260 | } |
261 | |
262 | if (i == 10) |
263 | return -ETIMEDOUT; |
264 | |
265 | if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS)) |
266 | return -EINVAL; |
267 | |
268 | for (i = 0; i < 10; ++i) { |
269 | mdelay(10); |
270 | if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY)) |
271 | break; |
272 | } |
273 | |
274 | if (i == 10) |
275 | return -ETIMEDOUT; |
276 | |
277 | vce_v1_0_init_cg(rdev); |
278 | |
279 | return 0; |
280 | } |
281 | |
282 | /** |
283 | * vce_v1_0_start - start VCE block |
284 | * |
285 | * @rdev: radeon_device pointer |
286 | * |
287 | * Setup and start the VCE block |
288 | */ |
289 | int vce_v1_0_start(struct radeon_device *rdev) |
290 | { |
291 | struct radeon_ring *ring; |
292 | int i, j, r; |
293 | |
294 | /* set BUSY flag */ |
295 | WREG32_P(VCE_STATUS, 1, ~1); |
296 | |
297 | ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; |
298 | WREG32(VCE_RB_RPTR, ring->wptr); |
299 | WREG32(VCE_RB_WPTR, ring->wptr); |
300 | WREG32(VCE_RB_BASE_LO, ring->gpu_addr); |
301 | WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
302 | WREG32(VCE_RB_SIZE, ring->ring_size / 4); |
303 | |
304 | ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; |
305 | WREG32(VCE_RB_RPTR2, ring->wptr); |
306 | WREG32(VCE_RB_WPTR2, ring->wptr); |
307 | WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); |
308 | WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); |
309 | WREG32(VCE_RB_SIZE2, ring->ring_size / 4); |
310 | |
311 | WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); |
312 | |
313 | WREG32_P(VCE_SOFT_RESET, |
314 | VCE_ECPU_SOFT_RESET | |
315 | VCE_FME_SOFT_RESET, ~( |
316 | VCE_ECPU_SOFT_RESET | |
317 | VCE_FME_SOFT_RESET)); |
318 | |
319 | mdelay(100); |
320 | |
321 | WREG32_P(VCE_SOFT_RESET, 0, ~( |
322 | VCE_ECPU_SOFT_RESET | |
323 | VCE_FME_SOFT_RESET)); |
324 | |
325 | for (i = 0; i < 10; ++i) { |
326 | uint32_t status; |
327 | for (j = 0; j < 100; ++j) { |
328 | status = RREG32(VCE_STATUS); |
329 | if (status & 2) |
330 | break; |
331 | mdelay(10); |
332 | } |
333 | r = 0; |
334 | if (status & 2) |
335 | break; |
336 | |
337 | DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n" ); |
338 | WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); |
339 | mdelay(10); |
340 | WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); |
341 | mdelay(10); |
342 | r = -1; |
343 | } |
344 | |
345 | /* clear BUSY flag */ |
346 | WREG32_P(VCE_STATUS, 0, ~1); |
347 | |
348 | if (r) { |
349 | DRM_ERROR("VCE not responding, giving up!!!\n" ); |
350 | return r; |
351 | } |
352 | |
353 | return 0; |
354 | } |
355 | |
356 | int vce_v1_0_init(struct radeon_device *rdev) |
357 | { |
358 | struct radeon_ring *ring; |
359 | int r; |
360 | |
361 | r = vce_v1_0_start(rdev); |
362 | if (r) |
363 | return r; |
364 | |
365 | ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; |
366 | ring->ready = true; |
367 | r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); |
368 | if (r) { |
369 | ring->ready = false; |
370 | return r; |
371 | } |
372 | |
373 | ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; |
374 | ring->ready = true; |
375 | r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); |
376 | if (r) { |
377 | ring->ready = false; |
378 | return r; |
379 | } |
380 | |
381 | DRM_INFO("VCE initialized successfully.\n" ); |
382 | |
383 | return 0; |
384 | } |
385 | |