1 | /* |
2 | * Copyright 2016 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | |
27 | #include <linux/firmware.h> |
28 | #include <linux/module.h> |
29 | #include <drm/drmP.h> |
30 | #include <drm/drm.h> |
31 | |
32 | #include "amdgpu.h" |
33 | #include "amdgpu_pm.h" |
34 | #include "amdgpu_vcn.h" |
35 | #include "soc15d.h" |
36 | #include "soc15_common.h" |
37 | |
38 | #include "vcn/vcn_1_0_offset.h" |
39 | #include "vcn/vcn_1_0_sh_mask.h" |
40 | |
41 | /* 1 second timeout */ |
42 | #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) |
43 | |
44 | /* Firmware Names */ |
45 | #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" |
46 | #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" |
47 | #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" |
48 | |
49 | MODULE_FIRMWARE(FIRMWARE_RAVEN); |
50 | MODULE_FIRMWARE(FIRMWARE_PICASSO); |
51 | MODULE_FIRMWARE(FIRMWARE_RAVEN2); |
52 | |
53 | static void amdgpu_vcn_idle_work_handler(struct work_struct *work); |
54 | |
55 | int amdgpu_vcn_sw_init(struct amdgpu_device *adev) |
56 | { |
57 | unsigned long bo_size; |
58 | const char *fw_name; |
59 | const struct common_firmware_header *hdr; |
60 | unsigned char fw_check; |
61 | int r; |
62 | |
63 | INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); |
64 | |
65 | switch (adev->asic_type) { |
66 | case CHIP_RAVEN: |
67 | if (adev->rev_id >= 8) |
68 | fw_name = FIRMWARE_RAVEN2; |
69 | else if (adev->pdev->device == 0x15d8) |
70 | fw_name = FIRMWARE_PICASSO; |
71 | else |
72 | fw_name = FIRMWARE_RAVEN; |
73 | break; |
74 | default: |
75 | return -EINVAL; |
76 | } |
77 | |
78 | r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); |
79 | if (r) { |
80 | dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n" , |
81 | fw_name); |
82 | return r; |
83 | } |
84 | |
85 | r = amdgpu_ucode_validate(adev->vcn.fw); |
86 | if (r) { |
87 | dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n" , |
88 | fw_name); |
89 | release_firmware(adev->vcn.fw); |
90 | adev->vcn.fw = NULL; |
91 | return r; |
92 | } |
93 | |
94 | hdr = (const struct common_firmware_header *)adev->vcn.fw->data; |
95 | adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); |
96 | |
97 | /* Bit 20-23, it is encode major and non-zero for new naming convention. |
98 | * This field is part of version minor and DRM_DISABLED_FLAG in old naming |
99 | * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG |
100 | * is zero in old naming convention, this field is always zero so far. |
101 | * These four bits are used to tell which naming convention is present. |
102 | */ |
103 | fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; |
104 | if (fw_check) { |
105 | unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; |
106 | |
107 | fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; |
108 | enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; |
109 | enc_major = fw_check; |
110 | dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; |
111 | vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; |
112 | DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n" , |
113 | enc_major, enc_minor, dec_ver, vep, fw_rev); |
114 | } else { |
115 | unsigned int version_major, version_minor, family_id; |
116 | |
117 | family_id = le32_to_cpu(hdr->ucode_version) & 0xff; |
118 | version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; |
119 | version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; |
120 | DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n" , |
121 | version_major, version_minor, family_id); |
122 | } |
123 | |
124 | bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; |
125 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) |
126 | bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); |
127 | r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, |
128 | AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, |
129 | &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); |
130 | if (r) { |
131 | dev_err(adev->dev, "(%d) failed to allocate vcn bo\n" , r); |
132 | return r; |
133 | } |
134 | |
135 | return 0; |
136 | } |
137 | |
138 | int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) |
139 | { |
140 | int i; |
141 | |
142 | kvfree(adev->vcn.saved_bo); |
143 | |
144 | amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, |
145 | &adev->vcn.gpu_addr, |
146 | (void **)&adev->vcn.cpu_addr); |
147 | |
148 | amdgpu_ring_fini(&adev->vcn.ring_dec); |
149 | |
150 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) |
151 | amdgpu_ring_fini(&adev->vcn.ring_enc[i]); |
152 | |
153 | amdgpu_ring_fini(&adev->vcn.ring_jpeg); |
154 | |
155 | release_firmware(adev->vcn.fw); |
156 | |
157 | return 0; |
158 | } |
159 | |
160 | int amdgpu_vcn_suspend(struct amdgpu_device *adev) |
161 | { |
162 | unsigned size; |
163 | void *ptr; |
164 | |
165 | cancel_delayed_work_sync(&adev->vcn.idle_work); |
166 | |
167 | if (adev->vcn.vcpu_bo == NULL) |
168 | return 0; |
169 | |
170 | size = amdgpu_bo_size(adev->vcn.vcpu_bo); |
171 | ptr = adev->vcn.cpu_addr; |
172 | |
173 | adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); |
174 | if (!adev->vcn.saved_bo) |
175 | return -ENOMEM; |
176 | |
177 | memcpy_fromio(adev->vcn.saved_bo, ptr, size); |
178 | |
179 | return 0; |
180 | } |
181 | |
182 | int amdgpu_vcn_resume(struct amdgpu_device *adev) |
183 | { |
184 | unsigned size; |
185 | void *ptr; |
186 | |
187 | if (adev->vcn.vcpu_bo == NULL) |
188 | return -EINVAL; |
189 | |
190 | size = amdgpu_bo_size(adev->vcn.vcpu_bo); |
191 | ptr = adev->vcn.cpu_addr; |
192 | |
193 | if (adev->vcn.saved_bo != NULL) { |
194 | memcpy_toio(ptr, adev->vcn.saved_bo, size); |
195 | kvfree(adev->vcn.saved_bo); |
196 | adev->vcn.saved_bo = NULL; |
197 | } else { |
198 | const struct common_firmware_header *hdr; |
199 | unsigned offset; |
200 | |
201 | hdr = (const struct common_firmware_header *)adev->vcn.fw->data; |
202 | if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { |
203 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); |
204 | memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, |
205 | le32_to_cpu(hdr->ucode_size_bytes)); |
206 | size -= le32_to_cpu(hdr->ucode_size_bytes); |
207 | ptr += le32_to_cpu(hdr->ucode_size_bytes); |
208 | } |
209 | memset_io(ptr, 0, size); |
210 | } |
211 | |
212 | return 0; |
213 | } |
214 | |
215 | static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, |
216 | struct dpg_pause_state *new_state) |
217 | { |
218 | int ret_code; |
219 | uint32_t reg_data = 0; |
220 | uint32_t reg_data2 = 0; |
221 | struct amdgpu_ring *ring; |
222 | |
223 | /* pause/unpause if state is changed */ |
224 | if (adev->vcn.pause_state.fw_based != new_state->fw_based) { |
225 | DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d" , |
226 | adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, |
227 | new_state->fw_based, new_state->jpeg); |
228 | |
229 | reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & |
230 | (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); |
231 | |
232 | if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { |
233 | ret_code = 0; |
234 | |
235 | if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) |
236 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, |
237 | UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, |
238 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); |
239 | |
240 | if (!ret_code) { |
241 | /* pause DPG non-jpeg */ |
242 | reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; |
243 | WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); |
244 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, |
245 | UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, |
246 | UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); |
247 | |
248 | /* Restore */ |
249 | ring = &adev->vcn.ring_enc[0]; |
250 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); |
251 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); |
252 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); |
253 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); |
254 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); |
255 | |
256 | ring = &adev->vcn.ring_enc[1]; |
257 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); |
258 | WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); |
259 | WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); |
260 | WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); |
261 | WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); |
262 | |
263 | ring = &adev->vcn.ring_dec; |
264 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, |
265 | RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); |
266 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, |
267 | UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, |
268 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); |
269 | } |
270 | } else { |
271 | /* unpause dpg non-jpeg, no need to wait */ |
272 | reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; |
273 | WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); |
274 | } |
275 | adev->vcn.pause_state.fw_based = new_state->fw_based; |
276 | } |
277 | |
278 | /* pause/unpause if state is changed */ |
279 | if (adev->vcn.pause_state.jpeg != new_state->jpeg) { |
280 | DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d" , |
281 | adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, |
282 | new_state->fw_based, new_state->jpeg); |
283 | |
284 | reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & |
285 | (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); |
286 | |
287 | if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { |
288 | ret_code = 0; |
289 | |
290 | if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) |
291 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, |
292 | UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, |
293 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); |
294 | |
295 | if (!ret_code) { |
296 | /* Make sure JPRG Snoop is disabled before sending the pause */ |
297 | reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); |
298 | reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; |
299 | WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); |
300 | |
301 | /* pause DPG jpeg */ |
302 | reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; |
303 | WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); |
304 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, |
305 | UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, |
306 | UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); |
307 | |
308 | /* Restore */ |
309 | ring = &adev->vcn.ring_jpeg; |
310 | WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); |
311 | WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, |
312 | UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | |
313 | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
314 | WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, |
315 | lower_32_bits(ring->gpu_addr)); |
316 | WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, |
317 | upper_32_bits(ring->gpu_addr)); |
318 | WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); |
319 | WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); |
320 | WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, |
321 | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); |
322 | |
323 | ring = &adev->vcn.ring_dec; |
324 | WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, |
325 | RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); |
326 | SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, |
327 | UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, |
328 | UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); |
329 | } |
330 | } else { |
331 | /* unpause dpg jpeg, no need to wait */ |
332 | reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; |
333 | WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); |
334 | } |
335 | adev->vcn.pause_state.jpeg = new_state->jpeg; |
336 | } |
337 | |
338 | return 0; |
339 | } |
340 | |
341 | static void amdgpu_vcn_idle_work_handler(struct work_struct *work) |
342 | { |
343 | struct amdgpu_device *adev = |
344 | container_of(work, struct amdgpu_device, vcn.idle_work.work); |
345 | unsigned int fences = 0; |
346 | unsigned int i; |
347 | |
348 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { |
349 | fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); |
350 | } |
351 | |
352 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { |
353 | struct dpg_pause_state new_state; |
354 | |
355 | if (fences) |
356 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
357 | else |
358 | new_state.fw_based = VCN_DPG_STATE__UNPAUSE; |
359 | |
360 | if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) |
361 | new_state.jpeg = VCN_DPG_STATE__PAUSE; |
362 | else |
363 | new_state.jpeg = VCN_DPG_STATE__UNPAUSE; |
364 | |
365 | amdgpu_vcn_pause_dpg_mode(adev, &new_state); |
366 | } |
367 | |
368 | fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); |
369 | fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); |
370 | |
371 | if (fences == 0) { |
372 | amdgpu_gfx_off_ctrl(adev, true); |
373 | if (adev->pm.dpm_enabled) |
374 | amdgpu_dpm_enable_uvd(adev, false); |
375 | else |
376 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, |
377 | AMD_PG_STATE_GATE); |
378 | } else { |
379 | schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); |
380 | } |
381 | } |
382 | |
383 | void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) |
384 | { |
385 | struct amdgpu_device *adev = ring->adev; |
386 | bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); |
387 | |
388 | if (set_clocks) { |
389 | amdgpu_gfx_off_ctrl(adev, false); |
390 | if (adev->pm.dpm_enabled) |
391 | amdgpu_dpm_enable_uvd(adev, true); |
392 | else |
393 | amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, |
394 | AMD_PG_STATE_UNGATE); |
395 | } |
396 | |
397 | if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { |
398 | struct dpg_pause_state new_state; |
399 | unsigned int fences = 0; |
400 | unsigned int i; |
401 | |
402 | for (i = 0; i < adev->vcn.num_enc_rings; ++i) { |
403 | fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); |
404 | } |
405 | if (fences) |
406 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
407 | else |
408 | new_state.fw_based = VCN_DPG_STATE__UNPAUSE; |
409 | |
410 | if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) |
411 | new_state.jpeg = VCN_DPG_STATE__PAUSE; |
412 | else |
413 | new_state.jpeg = VCN_DPG_STATE__UNPAUSE; |
414 | |
415 | if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) |
416 | new_state.fw_based = VCN_DPG_STATE__PAUSE; |
417 | else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) |
418 | new_state.jpeg = VCN_DPG_STATE__PAUSE; |
419 | |
420 | amdgpu_vcn_pause_dpg_mode(adev, &new_state); |
421 | } |
422 | } |
423 | |
424 | void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) |
425 | { |
426 | schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); |
427 | } |
428 | |
429 | int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) |
430 | { |
431 | struct amdgpu_device *adev = ring->adev; |
432 | uint32_t tmp = 0; |
433 | unsigned i; |
434 | int r; |
435 | |
436 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); |
437 | r = amdgpu_ring_alloc(ring, 3); |
438 | if (r) |
439 | return r; |
440 | |
441 | amdgpu_ring_write(ring, |
442 | PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); |
443 | amdgpu_ring_write(ring, 0xDEADBEEF); |
444 | amdgpu_ring_commit(ring); |
445 | for (i = 0; i < adev->usec_timeout; i++) { |
446 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); |
447 | if (tmp == 0xDEADBEEF) |
448 | break; |
449 | DRM_UDELAY(1); |
450 | } |
451 | |
452 | if (i >= adev->usec_timeout) |
453 | r = -ETIMEDOUT; |
454 | |
455 | return r; |
456 | } |
457 | |
458 | static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, |
459 | struct amdgpu_bo *bo, |
460 | struct dma_fence **fence) |
461 | { |
462 | struct amdgpu_device *adev = ring->adev; |
463 | struct dma_fence *f = NULL; |
464 | struct amdgpu_job *job; |
465 | struct amdgpu_ib *ib; |
466 | uint64_t addr; |
467 | int i, r; |
468 | |
469 | r = amdgpu_job_alloc_with_ib(adev, 64, &job); |
470 | if (r) |
471 | goto err; |
472 | |
473 | ib = &job->ibs[0]; |
474 | addr = amdgpu_bo_gpu_offset(bo); |
475 | ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); |
476 | ib->ptr[1] = addr; |
477 | ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); |
478 | ib->ptr[3] = addr >> 32; |
479 | ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); |
480 | ib->ptr[5] = 0; |
481 | for (i = 6; i < 16; i += 2) { |
482 | ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); |
483 | ib->ptr[i+1] = 0; |
484 | } |
485 | ib->length_dw = 16; |
486 | |
487 | r = amdgpu_job_submit_direct(job, ring, &f); |
488 | if (r) |
489 | goto err_free; |
490 | |
491 | amdgpu_bo_fence(bo, f, false); |
492 | amdgpu_bo_unreserve(bo); |
493 | amdgpu_bo_unref(&bo); |
494 | |
495 | if (fence) |
496 | *fence = dma_fence_get(f); |
497 | dma_fence_put(f); |
498 | |
499 | return 0; |
500 | |
501 | err_free: |
502 | amdgpu_job_free(job); |
503 | |
504 | err: |
505 | amdgpu_bo_unreserve(bo); |
506 | amdgpu_bo_unref(&bo); |
507 | return r; |
508 | } |
509 | |
510 | static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
511 | struct dma_fence **fence) |
512 | { |
513 | struct amdgpu_device *adev = ring->adev; |
514 | struct amdgpu_bo *bo = NULL; |
515 | uint32_t *msg; |
516 | int r, i; |
517 | |
518 | r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, |
519 | AMDGPU_GEM_DOMAIN_VRAM, |
520 | &bo, NULL, (void **)&msg); |
521 | if (r) |
522 | return r; |
523 | |
524 | msg[0] = cpu_to_le32(0x00000028); |
525 | msg[1] = cpu_to_le32(0x00000038); |
526 | msg[2] = cpu_to_le32(0x00000001); |
527 | msg[3] = cpu_to_le32(0x00000000); |
528 | msg[4] = cpu_to_le32(handle); |
529 | msg[5] = cpu_to_le32(0x00000000); |
530 | msg[6] = cpu_to_le32(0x00000001); |
531 | msg[7] = cpu_to_le32(0x00000028); |
532 | msg[8] = cpu_to_le32(0x00000010); |
533 | msg[9] = cpu_to_le32(0x00000000); |
534 | msg[10] = cpu_to_le32(0x00000007); |
535 | msg[11] = cpu_to_le32(0x00000000); |
536 | msg[12] = cpu_to_le32(0x00000780); |
537 | msg[13] = cpu_to_le32(0x00000440); |
538 | for (i = 14; i < 1024; ++i) |
539 | msg[i] = cpu_to_le32(0x0); |
540 | |
541 | return amdgpu_vcn_dec_send_msg(ring, bo, fence); |
542 | } |
543 | |
544 | static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
545 | struct dma_fence **fence) |
546 | { |
547 | struct amdgpu_device *adev = ring->adev; |
548 | struct amdgpu_bo *bo = NULL; |
549 | uint32_t *msg; |
550 | int r, i; |
551 | |
552 | r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, |
553 | AMDGPU_GEM_DOMAIN_VRAM, |
554 | &bo, NULL, (void **)&msg); |
555 | if (r) |
556 | return r; |
557 | |
558 | msg[0] = cpu_to_le32(0x00000028); |
559 | msg[1] = cpu_to_le32(0x00000018); |
560 | msg[2] = cpu_to_le32(0x00000000); |
561 | msg[3] = cpu_to_le32(0x00000002); |
562 | msg[4] = cpu_to_le32(handle); |
563 | msg[5] = cpu_to_le32(0x00000000); |
564 | for (i = 6; i < 1024; ++i) |
565 | msg[i] = cpu_to_le32(0x0); |
566 | |
567 | return amdgpu_vcn_dec_send_msg(ring, bo, fence); |
568 | } |
569 | |
570 | int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
571 | { |
572 | struct dma_fence *fence; |
573 | long r; |
574 | |
575 | r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); |
576 | if (r) |
577 | goto error; |
578 | |
579 | r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); |
580 | if (r) |
581 | goto error; |
582 | |
583 | r = dma_fence_wait_timeout(fence, false, timeout); |
584 | if (r == 0) |
585 | r = -ETIMEDOUT; |
586 | else if (r > 0) |
587 | r = 0; |
588 | |
589 | dma_fence_put(fence); |
590 | error: |
591 | return r; |
592 | } |
593 | |
594 | int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) |
595 | { |
596 | struct amdgpu_device *adev = ring->adev; |
597 | uint32_t rptr = amdgpu_ring_get_rptr(ring); |
598 | unsigned i; |
599 | int r; |
600 | |
601 | r = amdgpu_ring_alloc(ring, 16); |
602 | if (r) |
603 | return r; |
604 | |
605 | amdgpu_ring_write(ring, VCN_ENC_CMD_END); |
606 | amdgpu_ring_commit(ring); |
607 | |
608 | for (i = 0; i < adev->usec_timeout; i++) { |
609 | if (amdgpu_ring_get_rptr(ring) != rptr) |
610 | break; |
611 | DRM_UDELAY(1); |
612 | } |
613 | |
614 | if (i >= adev->usec_timeout) |
615 | r = -ETIMEDOUT; |
616 | |
617 | return r; |
618 | } |
619 | |
620 | static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, |
621 | struct dma_fence **fence) |
622 | { |
623 | const unsigned ib_size_dw = 16; |
624 | struct amdgpu_job *job; |
625 | struct amdgpu_ib *ib; |
626 | struct dma_fence *f = NULL; |
627 | uint64_t dummy; |
628 | int i, r; |
629 | |
630 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
631 | if (r) |
632 | return r; |
633 | |
634 | ib = &job->ibs[0]; |
635 | dummy = ib->gpu_addr + 1024; |
636 | |
637 | ib->length_dw = 0; |
638 | ib->ptr[ib->length_dw++] = 0x00000018; |
639 | ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ |
640 | ib->ptr[ib->length_dw++] = handle; |
641 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); |
642 | ib->ptr[ib->length_dw++] = dummy; |
643 | ib->ptr[ib->length_dw++] = 0x0000000b; |
644 | |
645 | ib->ptr[ib->length_dw++] = 0x00000014; |
646 | ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ |
647 | ib->ptr[ib->length_dw++] = 0x0000001c; |
648 | ib->ptr[ib->length_dw++] = 0x00000000; |
649 | ib->ptr[ib->length_dw++] = 0x00000000; |
650 | |
651 | ib->ptr[ib->length_dw++] = 0x00000008; |
652 | ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ |
653 | |
654 | for (i = ib->length_dw; i < ib_size_dw; ++i) |
655 | ib->ptr[i] = 0x0; |
656 | |
657 | r = amdgpu_job_submit_direct(job, ring, &f); |
658 | if (r) |
659 | goto err; |
660 | |
661 | if (fence) |
662 | *fence = dma_fence_get(f); |
663 | dma_fence_put(f); |
664 | |
665 | return 0; |
666 | |
667 | err: |
668 | amdgpu_job_free(job); |
669 | return r; |
670 | } |
671 | |
672 | static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, |
673 | struct dma_fence **fence) |
674 | { |
675 | const unsigned ib_size_dw = 16; |
676 | struct amdgpu_job *job; |
677 | struct amdgpu_ib *ib; |
678 | struct dma_fence *f = NULL; |
679 | uint64_t dummy; |
680 | int i, r; |
681 | |
682 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
683 | if (r) |
684 | return r; |
685 | |
686 | ib = &job->ibs[0]; |
687 | dummy = ib->gpu_addr + 1024; |
688 | |
689 | ib->length_dw = 0; |
690 | ib->ptr[ib->length_dw++] = 0x00000018; |
691 | ib->ptr[ib->length_dw++] = 0x00000001; |
692 | ib->ptr[ib->length_dw++] = handle; |
693 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); |
694 | ib->ptr[ib->length_dw++] = dummy; |
695 | ib->ptr[ib->length_dw++] = 0x0000000b; |
696 | |
697 | ib->ptr[ib->length_dw++] = 0x00000014; |
698 | ib->ptr[ib->length_dw++] = 0x00000002; |
699 | ib->ptr[ib->length_dw++] = 0x0000001c; |
700 | ib->ptr[ib->length_dw++] = 0x00000000; |
701 | ib->ptr[ib->length_dw++] = 0x00000000; |
702 | |
703 | ib->ptr[ib->length_dw++] = 0x00000008; |
704 | ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ |
705 | |
706 | for (i = ib->length_dw; i < ib_size_dw; ++i) |
707 | ib->ptr[i] = 0x0; |
708 | |
709 | r = amdgpu_job_submit_direct(job, ring, &f); |
710 | if (r) |
711 | goto err; |
712 | |
713 | if (fence) |
714 | *fence = dma_fence_get(f); |
715 | dma_fence_put(f); |
716 | |
717 | return 0; |
718 | |
719 | err: |
720 | amdgpu_job_free(job); |
721 | return r; |
722 | } |
723 | |
724 | int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
725 | { |
726 | struct dma_fence *fence = NULL; |
727 | long r; |
728 | |
729 | r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); |
730 | if (r) |
731 | goto error; |
732 | |
733 | r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); |
734 | if (r) |
735 | goto error; |
736 | |
737 | r = dma_fence_wait_timeout(fence, false, timeout); |
738 | if (r == 0) |
739 | r = -ETIMEDOUT; |
740 | else if (r > 0) |
741 | r = 0; |
742 | |
743 | error: |
744 | dma_fence_put(fence); |
745 | return r; |
746 | } |
747 | |
748 | int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) |
749 | { |
750 | struct amdgpu_device *adev = ring->adev; |
751 | uint32_t tmp = 0; |
752 | unsigned i; |
753 | int r; |
754 | |
755 | WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); |
756 | r = amdgpu_ring_alloc(ring, 3); |
757 | |
758 | if (r) |
759 | return r; |
760 | |
761 | amdgpu_ring_write(ring, |
762 | PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); |
763 | amdgpu_ring_write(ring, 0xDEADBEEF); |
764 | amdgpu_ring_commit(ring); |
765 | |
766 | for (i = 0; i < adev->usec_timeout; i++) { |
767 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); |
768 | if (tmp == 0xDEADBEEF) |
769 | break; |
770 | DRM_UDELAY(1); |
771 | } |
772 | |
773 | if (i >= adev->usec_timeout) |
774 | r = -ETIMEDOUT; |
775 | |
776 | return r; |
777 | } |
778 | |
779 | static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, |
780 | struct dma_fence **fence) |
781 | { |
782 | struct amdgpu_device *adev = ring->adev; |
783 | struct amdgpu_job *job; |
784 | struct amdgpu_ib *ib; |
785 | struct dma_fence *f = NULL; |
786 | const unsigned ib_size_dw = 16; |
787 | int i, r; |
788 | |
789 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
790 | if (r) |
791 | return r; |
792 | |
793 | ib = &job->ibs[0]; |
794 | |
795 | ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0); |
796 | ib->ptr[1] = 0xDEADBEEF; |
797 | for (i = 2; i < 16; i += 2) { |
798 | ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); |
799 | ib->ptr[i+1] = 0; |
800 | } |
801 | ib->length_dw = 16; |
802 | |
803 | r = amdgpu_job_submit_direct(job, ring, &f); |
804 | if (r) |
805 | goto err; |
806 | |
807 | if (fence) |
808 | *fence = dma_fence_get(f); |
809 | dma_fence_put(f); |
810 | |
811 | return 0; |
812 | |
813 | err: |
814 | amdgpu_job_free(job); |
815 | return r; |
816 | } |
817 | |
818 | int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
819 | { |
820 | struct amdgpu_device *adev = ring->adev; |
821 | uint32_t tmp = 0; |
822 | unsigned i; |
823 | struct dma_fence *fence = NULL; |
824 | long r = 0; |
825 | |
826 | r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); |
827 | if (r) |
828 | goto error; |
829 | |
830 | r = dma_fence_wait_timeout(fence, false, timeout); |
831 | if (r == 0) { |
832 | r = -ETIMEDOUT; |
833 | goto error; |
834 | } else if (r < 0) { |
835 | goto error; |
836 | } else { |
837 | r = 0; |
838 | } |
839 | |
840 | for (i = 0; i < adev->usec_timeout; i++) { |
841 | tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); |
842 | if (tmp == 0xDEADBEEF) |
843 | break; |
844 | DRM_UDELAY(1); |
845 | } |
846 | |
847 | if (i >= adev->usec_timeout) |
848 | r = -ETIMEDOUT; |
849 | |
850 | dma_fence_put(fence); |
851 | error: |
852 | return r; |
853 | } |
854 | |