1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (C) 2013 Red Hat |
4 | * Author: Rob Clark <robdclark@gmail.com> |
5 | * |
6 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. |
7 | */ |
8 | |
9 | #include <linux/ascii85.h> |
10 | #include <linux/interconnect.h> |
11 | #include <linux/firmware/qcom/qcom_scm.h> |
12 | #include <linux/kernel.h> |
13 | #include <linux/of_address.h> |
14 | #include <linux/pm_opp.h> |
15 | #include <linux/slab.h> |
16 | #include <linux/soc/qcom/mdt_loader.h> |
17 | #include <linux/nvmem-consumer.h> |
18 | #include <soc/qcom/ocmem.h> |
19 | #include "adreno_gpu.h" |
20 | #include "a6xx_gpu.h" |
21 | #include "msm_gem.h" |
22 | #include "msm_mmu.h" |
23 | |
24 | static u64 address_space_size = 0; |
25 | MODULE_PARM_DESC(address_space_size, "Override for size of processes private GPU address space" ); |
26 | module_param(address_space_size, ullong, 0600); |
27 | |
28 | static bool zap_available = true; |
29 | |
30 | static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, |
31 | u32 pasid) |
32 | { |
33 | struct device *dev = &gpu->pdev->dev; |
34 | const struct firmware *fw; |
35 | const char *signed_fwname = NULL; |
36 | struct device_node *np, *mem_np; |
37 | struct resource r; |
38 | phys_addr_t mem_phys; |
39 | ssize_t mem_size; |
40 | void *mem_region = NULL; |
41 | int ret; |
42 | |
43 | if (!IS_ENABLED(CONFIG_ARCH_QCOM)) { |
44 | zap_available = false; |
45 | return -EINVAL; |
46 | } |
47 | |
48 | np = of_get_child_by_name(node: dev->of_node, name: "zap-shader" ); |
49 | if (!np) { |
50 | zap_available = false; |
51 | return -ENODEV; |
52 | } |
53 | |
54 | mem_np = of_parse_phandle(np, phandle_name: "memory-region" , index: 0); |
55 | of_node_put(node: np); |
56 | if (!mem_np) { |
57 | zap_available = false; |
58 | return -EINVAL; |
59 | } |
60 | |
61 | ret = of_address_to_resource(dev: mem_np, index: 0, r: &r); |
62 | of_node_put(node: mem_np); |
63 | if (ret) |
64 | return ret; |
65 | |
66 | mem_phys = r.start; |
67 | |
68 | /* |
69 | * Check for a firmware-name property. This is the new scheme |
70 | * to handle firmware that may be signed with device specific |
71 | * keys, allowing us to have a different zap fw path for different |
72 | * devices. |
73 | * |
74 | * If the firmware-name property is found, we bypass the |
75 | * adreno_request_fw() mechanism, because we don't need to handle |
76 | * the /lib/firmware/qcom/... vs /lib/firmware/... case. |
77 | * |
78 | * If the firmware-name property is not found, for backwards |
79 | * compatibility we fall back to the fwname from the gpulist |
80 | * table. |
81 | */ |
82 | of_property_read_string_index(np, propname: "firmware-name" , index: 0, output: &signed_fwname); |
83 | if (signed_fwname) { |
84 | fwname = signed_fwname; |
85 | ret = request_firmware_direct(fw: &fw, name: fwname, device: gpu->dev->dev); |
86 | if (ret) |
87 | fw = ERR_PTR(error: ret); |
88 | } else if (fwname) { |
89 | /* Request the MDT file from the default location: */ |
90 | fw = adreno_request_fw(to_adreno_gpu(gpu), fwname); |
91 | } else { |
92 | /* |
93 | * For new targets, we require the firmware-name property, |
94 | * if a zap-shader is required, rather than falling back |
95 | * to a firmware name specified in gpulist. |
96 | * |
97 | * Because the firmware is signed with a (potentially) |
98 | * device specific key, having the name come from gpulist |
99 | * was a bad idea, and is only provided for backwards |
100 | * compatibility for older targets. |
101 | */ |
102 | return -ENODEV; |
103 | } |
104 | |
105 | if (IS_ERR(ptr: fw)) { |
106 | DRM_DEV_ERROR(dev, "Unable to load %s\n" , fwname); |
107 | return PTR_ERR(ptr: fw); |
108 | } |
109 | |
110 | /* Figure out how much memory we need */ |
111 | mem_size = qcom_mdt_get_size(fw); |
112 | if (mem_size < 0) { |
113 | ret = mem_size; |
114 | goto out; |
115 | } |
116 | |
117 | if (mem_size > resource_size(res: &r)) { |
118 | DRM_DEV_ERROR(dev, |
119 | "memory region is too small to load the MDT\n" ); |
120 | ret = -E2BIG; |
121 | goto out; |
122 | } |
123 | |
124 | /* Allocate memory for the firmware image */ |
125 | mem_region = memremap(offset: mem_phys, size: mem_size, flags: MEMREMAP_WC); |
126 | if (!mem_region) { |
127 | ret = -ENOMEM; |
128 | goto out; |
129 | } |
130 | |
131 | /* |
132 | * Load the rest of the MDT |
133 | * |
134 | * Note that we could be dealing with two different paths, since |
135 | * with upstream linux-firmware it would be in a qcom/ subdir.. |
136 | * adreno_request_fw() handles this, but qcom_mdt_load() does |
137 | * not. But since we've already gotten through adreno_request_fw() |
138 | * we know which of the two cases it is: |
139 | */ |
140 | if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) { |
141 | ret = qcom_mdt_load(dev, fw, fw_name: fwname, pas_id: pasid, |
142 | mem_region, mem_phys, mem_size, NULL); |
143 | } else { |
144 | char *newname; |
145 | |
146 | newname = kasprintf(GFP_KERNEL, fmt: "qcom/%s" , fwname); |
147 | |
148 | ret = qcom_mdt_load(dev, fw, fw_name: newname, pas_id: pasid, |
149 | mem_region, mem_phys, mem_size, NULL); |
150 | kfree(objp: newname); |
151 | } |
152 | if (ret) |
153 | goto out; |
154 | |
155 | /* Send the image to the secure world */ |
156 | ret = qcom_scm_pas_auth_and_reset(peripheral: pasid); |
157 | |
158 | /* |
159 | * If the scm call returns -EOPNOTSUPP we assume that this target |
160 | * doesn't need/support the zap shader so quietly fail |
161 | */ |
162 | if (ret == -EOPNOTSUPP) |
163 | zap_available = false; |
164 | else if (ret) |
165 | DRM_DEV_ERROR(dev, "Unable to authorize the image\n" ); |
166 | |
167 | out: |
168 | if (mem_region) |
169 | memunmap(addr: mem_region); |
170 | |
171 | release_firmware(fw); |
172 | |
173 | return ret; |
174 | } |
175 | |
176 | int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) |
177 | { |
178 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
179 | struct platform_device *pdev = gpu->pdev; |
180 | |
181 | /* Short cut if we determine the zap shader isn't available/needed */ |
182 | if (!zap_available) |
183 | return -ENODEV; |
184 | |
185 | /* We need SCM to be able to load the firmware */ |
186 | if (!qcom_scm_is_available()) { |
187 | DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n" ); |
188 | return -EPROBE_DEFER; |
189 | } |
190 | |
191 | return zap_shader_load_mdt(gpu, fwname: adreno_gpu->info->zapfw, pasid); |
192 | } |
193 | |
194 | struct msm_gem_address_space * |
195 | adreno_create_address_space(struct msm_gpu *gpu, |
196 | struct platform_device *pdev) |
197 | { |
198 | return adreno_iommu_create_address_space(gpu, pdev, quirks: 0); |
199 | } |
200 | |
201 | struct msm_gem_address_space * |
202 | adreno_iommu_create_address_space(struct msm_gpu *gpu, |
203 | struct platform_device *pdev, |
204 | unsigned long quirks) |
205 | { |
206 | struct iommu_domain_geometry *geometry; |
207 | struct msm_mmu *mmu; |
208 | struct msm_gem_address_space *aspace; |
209 | u64 start, size; |
210 | |
211 | mmu = msm_iommu_gpu_new(&pdev->dev, gpu, quirks); |
212 | if (IS_ERR_OR_NULL(ptr: mmu)) |
213 | return ERR_CAST(ptr: mmu); |
214 | |
215 | geometry = msm_iommu_get_geometry(mmu); |
216 | if (IS_ERR(ptr: geometry)) |
217 | return ERR_CAST(ptr: geometry); |
218 | |
219 | /* |
220 | * Use the aperture start or SZ_16M, whichever is greater. This will |
221 | * ensure that we align with the allocated pagetable range while still |
222 | * allowing room in the lower 32 bits for GMEM and whatnot |
223 | */ |
224 | start = max_t(u64, SZ_16M, geometry->aperture_start); |
225 | size = geometry->aperture_end - start + 1; |
226 | |
227 | aspace = msm_gem_address_space_create(mmu, "gpu" , |
228 | start & GENMASK_ULL(48, 0), size); |
229 | |
230 | if (IS_ERR(ptr: aspace) && !IS_ERR(ptr: mmu)) |
231 | mmu->funcs->destroy(mmu); |
232 | |
233 | return aspace; |
234 | } |
235 | |
236 | u64 adreno_private_address_space_size(struct msm_gpu *gpu) |
237 | { |
238 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
239 | |
240 | if (address_space_size) |
241 | return address_space_size; |
242 | |
243 | if (adreno_gpu->info->address_space_size) |
244 | return adreno_gpu->info->address_space_size; |
245 | |
246 | return SZ_4G; |
247 | } |
248 | |
249 | #define ARM_SMMU_FSR_TF BIT(1) |
250 | #define ARM_SMMU_FSR_PF BIT(3) |
251 | #define ARM_SMMU_FSR_EF BIT(4) |
252 | |
253 | int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, |
254 | struct adreno_smmu_fault_info *info, const char *block, |
255 | u32 scratch[4]) |
256 | { |
257 | const char *type = "UNKNOWN" ; |
258 | bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); |
259 | |
260 | /* |
261 | * If we aren't going to be resuming later from fault_worker, then do |
262 | * it now. |
263 | */ |
264 | if (!do_devcoredump) { |
265 | gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); |
266 | } |
267 | |
268 | /* |
269 | * Print a default message if we couldn't get the data from the |
270 | * adreno-smmu-priv |
271 | */ |
272 | if (!info) { |
273 | pr_warn_ratelimited("*** gpu fault: iova=%.16lx flags=%d (%u,%u,%u,%u)\n" , |
274 | iova, flags, |
275 | scratch[0], scratch[1], scratch[2], scratch[3]); |
276 | |
277 | return 0; |
278 | } |
279 | |
280 | if (info->fsr & ARM_SMMU_FSR_TF) |
281 | type = "TRANSLATION" ; |
282 | else if (info->fsr & ARM_SMMU_FSR_PF) |
283 | type = "PERMISSION" ; |
284 | else if (info->fsr & ARM_SMMU_FSR_EF) |
285 | type = "EXTERNAL" ; |
286 | |
287 | pr_warn_ratelimited("*** gpu fault: ttbr0=%.16llx iova=%.16lx dir=%s type=%s source=%s (%u,%u,%u,%u)\n" , |
288 | info->ttbr0, iova, |
289 | flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ" , |
290 | type, block, |
291 | scratch[0], scratch[1], scratch[2], scratch[3]); |
292 | |
293 | if (do_devcoredump) { |
294 | /* Turn off the hangcheck timer to keep it from bothering us */ |
295 | del_timer(timer: &gpu->hangcheck_timer); |
296 | |
297 | gpu->fault_info.ttbr0 = info->ttbr0; |
298 | gpu->fault_info.iova = iova; |
299 | gpu->fault_info.flags = flags; |
300 | gpu->fault_info.type = type; |
301 | gpu->fault_info.block = block; |
302 | |
303 | kthread_queue_work(gpu->worker, &gpu->fault_work); |
304 | } |
305 | |
306 | return 0; |
307 | } |
308 | |
309 | int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, |
310 | uint32_t param, uint64_t *value, uint32_t *len) |
311 | { |
312 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
313 | |
314 | /* No pointer params yet */ |
315 | if (*len != 0) |
316 | return -EINVAL; |
317 | |
318 | switch (param) { |
319 | case MSM_PARAM_GPU_ID: |
320 | *value = adreno_gpu->info->revn; |
321 | return 0; |
322 | case MSM_PARAM_GMEM_SIZE: |
323 | *value = adreno_gpu->info->gmem; |
324 | return 0; |
325 | case MSM_PARAM_GMEM_BASE: |
326 | if (adreno_is_a650_family(gpu: adreno_gpu) || |
327 | adreno_is_a740_family(gpu: adreno_gpu)) |
328 | *value = 0; |
329 | else |
330 | *value = 0x100000; |
331 | return 0; |
332 | case MSM_PARAM_CHIP_ID: |
333 | *value = adreno_gpu->chip_id; |
334 | if (!adreno_gpu->info->revn) |
335 | *value |= ((uint64_t) adreno_gpu->speedbin) << 32; |
336 | return 0; |
337 | case MSM_PARAM_MAX_FREQ: |
338 | *value = adreno_gpu->base.fast_rate; |
339 | return 0; |
340 | case MSM_PARAM_TIMESTAMP: |
341 | if (adreno_gpu->funcs->get_timestamp) { |
342 | int ret; |
343 | |
344 | pm_runtime_get_sync(&gpu->pdev->dev); |
345 | ret = adreno_gpu->funcs->get_timestamp(gpu, value); |
346 | pm_runtime_put_autosuspend(&gpu->pdev->dev); |
347 | |
348 | return ret; |
349 | } |
350 | return -EINVAL; |
351 | case MSM_PARAM_PRIORITIES: |
352 | *value = gpu->nr_rings * NR_SCHED_PRIORITIES; |
353 | return 0; |
354 | case MSM_PARAM_PP_PGTABLE: |
355 | *value = 0; |
356 | return 0; |
357 | case MSM_PARAM_FAULTS: |
358 | if (ctx->aspace) |
359 | *value = gpu->global_faults + ctx->aspace->faults; |
360 | else |
361 | *value = gpu->global_faults; |
362 | return 0; |
363 | case MSM_PARAM_SUSPENDS: |
364 | *value = gpu->suspend_count; |
365 | return 0; |
366 | case MSM_PARAM_VA_START: |
367 | if (ctx->aspace == gpu->aspace) |
368 | return -EINVAL; |
369 | *value = ctx->aspace->va_start; |
370 | return 0; |
371 | case MSM_PARAM_VA_SIZE: |
372 | if (ctx->aspace == gpu->aspace) |
373 | return -EINVAL; |
374 | *value = ctx->aspace->va_size; |
375 | return 0; |
376 | default: |
377 | DBG("%s: invalid param: %u" , gpu->name, param); |
378 | return -EINVAL; |
379 | } |
380 | } |
381 | |
382 | int adreno_set_param(struct msm_gpu *gpu, struct msm_file_private *ctx, |
383 | uint32_t param, uint64_t value, uint32_t len) |
384 | { |
385 | switch (param) { |
386 | case MSM_PARAM_COMM: |
387 | case MSM_PARAM_CMDLINE: |
388 | /* kstrdup_quotable_cmdline() limits to PAGE_SIZE, so |
389 | * that should be a reasonable upper bound |
390 | */ |
391 | if (len > PAGE_SIZE) |
392 | return -EINVAL; |
393 | break; |
394 | default: |
395 | if (len != 0) |
396 | return -EINVAL; |
397 | } |
398 | |
399 | switch (param) { |
400 | case MSM_PARAM_COMM: |
401 | case MSM_PARAM_CMDLINE: { |
402 | char *str, **paramp; |
403 | |
404 | str = memdup_user_nul(u64_to_user_ptr(value), len); |
405 | if (IS_ERR(ptr: str)) |
406 | return PTR_ERR(ptr: str); |
407 | |
408 | mutex_lock(&gpu->lock); |
409 | |
410 | if (param == MSM_PARAM_COMM) { |
411 | paramp = &ctx->comm; |
412 | } else { |
413 | paramp = &ctx->cmdline; |
414 | } |
415 | |
416 | kfree(objp: *paramp); |
417 | *paramp = str; |
418 | |
419 | mutex_unlock(lock: &gpu->lock); |
420 | |
421 | return 0; |
422 | } |
423 | case MSM_PARAM_SYSPROF: |
424 | if (!capable(CAP_SYS_ADMIN)) |
425 | return -EPERM; |
426 | return msm_file_private_set_sysprof(ctx, gpu, value); |
427 | default: |
428 | DBG("%s: invalid param: %u" , gpu->name, param); |
429 | return -EINVAL; |
430 | } |
431 | } |
432 | |
433 | const struct firmware * |
434 | adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname) |
435 | { |
436 | struct drm_device *drm = adreno_gpu->base.dev; |
437 | const struct firmware *fw = NULL; |
438 | char *newname; |
439 | int ret; |
440 | |
441 | newname = kasprintf(GFP_KERNEL, fmt: "qcom/%s" , fwname); |
442 | if (!newname) |
443 | return ERR_PTR(error: -ENOMEM); |
444 | |
445 | /* |
446 | * Try first to load from qcom/$fwfile using a direct load (to avoid |
447 | * a potential timeout waiting for usermode helper) |
448 | */ |
449 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
450 | (adreno_gpu->fwloc == FW_LOCATION_NEW)) { |
451 | |
452 | ret = request_firmware_direct(fw: &fw, name: newname, device: drm->dev); |
453 | if (!ret) { |
454 | DRM_DEV_INFO(drm->dev, "loaded %s from new location\n" , |
455 | newname); |
456 | adreno_gpu->fwloc = FW_LOCATION_NEW; |
457 | goto out; |
458 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
459 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
460 | newname, ret); |
461 | fw = ERR_PTR(error: ret); |
462 | goto out; |
463 | } |
464 | } |
465 | |
466 | /* |
467 | * Then try the legacy location without qcom/ prefix |
468 | */ |
469 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
470 | (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) { |
471 | |
472 | ret = request_firmware_direct(fw: &fw, name: fwname, device: drm->dev); |
473 | if (!ret) { |
474 | DRM_DEV_INFO(drm->dev, "loaded %s from legacy location\n" , |
475 | newname); |
476 | adreno_gpu->fwloc = FW_LOCATION_LEGACY; |
477 | goto out; |
478 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
479 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
480 | fwname, ret); |
481 | fw = ERR_PTR(error: ret); |
482 | goto out; |
483 | } |
484 | } |
485 | |
486 | /* |
487 | * Finally fall back to request_firmware() for cases where the |
488 | * usermode helper is needed (I think mainly android) |
489 | */ |
490 | if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) || |
491 | (adreno_gpu->fwloc == FW_LOCATION_HELPER)) { |
492 | |
493 | ret = request_firmware(fw: &fw, name: newname, device: drm->dev); |
494 | if (!ret) { |
495 | DRM_DEV_INFO(drm->dev, "loaded %s with helper\n" , |
496 | newname); |
497 | adreno_gpu->fwloc = FW_LOCATION_HELPER; |
498 | goto out; |
499 | } else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) { |
500 | DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n" , |
501 | newname, ret); |
502 | fw = ERR_PTR(error: ret); |
503 | goto out; |
504 | } |
505 | } |
506 | |
507 | DRM_DEV_ERROR(drm->dev, "failed to load %s\n" , fwname); |
508 | fw = ERR_PTR(error: -ENOENT); |
509 | out: |
510 | kfree(objp: newname); |
511 | return fw; |
512 | } |
513 | |
514 | int adreno_load_fw(struct adreno_gpu *adreno_gpu) |
515 | { |
516 | int i; |
517 | |
518 | for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) { |
519 | const struct firmware *fw; |
520 | |
521 | if (!adreno_gpu->info->fw[i]) |
522 | continue; |
523 | |
524 | /* Skip loading GMU firwmare with GMU Wrapper */ |
525 | if (adreno_has_gmu_wrapper(gpu: adreno_gpu) && i == ADRENO_FW_GMU) |
526 | continue; |
527 | |
528 | /* Skip if the firmware has already been loaded */ |
529 | if (adreno_gpu->fw[i]) |
530 | continue; |
531 | |
532 | fw = adreno_request_fw(adreno_gpu, fwname: adreno_gpu->info->fw[i]); |
533 | if (IS_ERR(ptr: fw)) |
534 | return PTR_ERR(ptr: fw); |
535 | |
536 | adreno_gpu->fw[i] = fw; |
537 | } |
538 | |
539 | return 0; |
540 | } |
541 | |
542 | struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu, |
543 | const struct firmware *fw, u64 *iova) |
544 | { |
545 | struct drm_gem_object *bo; |
546 | void *ptr; |
547 | |
548 | ptr = msm_gem_kernel_new(gpu->dev, fw->size - 4, |
549 | MSM_BO_WC | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova); |
550 | |
551 | if (IS_ERR(ptr)) |
552 | return ERR_CAST(ptr); |
553 | |
554 | memcpy(ptr, &fw->data[4], fw->size - 4); |
555 | |
556 | msm_gem_put_vaddr(bo); |
557 | |
558 | return bo; |
559 | } |
560 | |
561 | int adreno_hw_init(struct msm_gpu *gpu) |
562 | { |
563 | VERB("%s" , gpu->name); |
564 | |
565 | for (int i = 0; i < gpu->nr_rings; i++) { |
566 | struct msm_ringbuffer *ring = gpu->rb[i]; |
567 | |
568 | if (!ring) |
569 | continue; |
570 | |
571 | ring->cur = ring->start; |
572 | ring->next = ring->start; |
573 | ring->memptrs->rptr = 0; |
574 | ring->memptrs->bv_fence = ring->fctx->completed_fence; |
575 | |
576 | /* Detect and clean up an impossible fence, ie. if GPU managed |
577 | * to scribble something invalid, we don't want that to confuse |
578 | * us into mistakingly believing that submits have completed. |
579 | */ |
580 | if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { |
581 | ring->memptrs->fence = ring->fctx->last_fence; |
582 | } |
583 | } |
584 | |
585 | return 0; |
586 | } |
587 | |
588 | /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ |
589 | static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, |
590 | struct msm_ringbuffer *ring) |
591 | { |
592 | struct msm_gpu *gpu = &adreno_gpu->base; |
593 | |
594 | return gpu->funcs->get_rptr(gpu, ring); |
595 | } |
596 | |
597 | struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) |
598 | { |
599 | return gpu->rb[0]; |
600 | } |
601 | |
602 | void adreno_recover(struct msm_gpu *gpu) |
603 | { |
604 | struct drm_device *dev = gpu->dev; |
605 | int ret; |
606 | |
607 | // XXX pm-runtime?? we *need* the device to be off after this |
608 | // so maybe continuing to call ->pm_suspend/resume() is better? |
609 | |
610 | gpu->funcs->pm_suspend(gpu); |
611 | gpu->funcs->pm_resume(gpu); |
612 | |
613 | ret = msm_gpu_hw_init(gpu); |
614 | if (ret) { |
615 | DRM_DEV_ERROR(dev->dev, "gpu hw init failed: %d\n" , ret); |
616 | /* hmm, oh well? */ |
617 | } |
618 | } |
619 | |
620 | void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg) |
621 | { |
622 | uint32_t wptr; |
623 | |
624 | /* Copy the shadow to the actual register */ |
625 | ring->cur = ring->next; |
626 | |
627 | /* |
628 | * Mask wptr value that we calculate to fit in the HW range. This is |
629 | * to account for the possibility that the last command fit exactly into |
630 | * the ringbuffer and rb->next hasn't wrapped to zero yet |
631 | */ |
632 | wptr = get_wptr(ring); |
633 | |
634 | /* ensure writes to ringbuffer have hit system memory: */ |
635 | mb(); |
636 | |
637 | gpu_write(gpu, reg, wptr); |
638 | } |
639 | |
640 | bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) |
641 | { |
642 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
643 | uint32_t wptr = get_wptr(ring); |
644 | |
645 | /* wait for CP to drain ringbuffer: */ |
646 | if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) |
647 | return true; |
648 | |
649 | /* TODO maybe we need to reset GPU here to recover from hang? */ |
650 | DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n" , |
651 | gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr); |
652 | |
653 | return false; |
654 | } |
655 | |
656 | int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state) |
657 | { |
658 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
659 | int i, count = 0; |
660 | |
661 | WARN_ON(!mutex_is_locked(&gpu->lock)); |
662 | |
663 | kref_init(kref: &state->ref); |
664 | |
665 | ktime_get_real_ts64(tv: &state->time); |
666 | |
667 | for (i = 0; i < gpu->nr_rings; i++) { |
668 | int size = 0, j; |
669 | |
670 | state->ring[i].fence = gpu->rb[i]->memptrs->fence; |
671 | state->ring[i].iova = gpu->rb[i]->iova; |
672 | state->ring[i].seqno = gpu->rb[i]->fctx->last_fence; |
673 | state->ring[i].rptr = get_rptr(adreno_gpu, ring: gpu->rb[i]); |
674 | state->ring[i].wptr = get_wptr(ring: gpu->rb[i]); |
675 | |
676 | /* Copy at least 'wptr' dwords of the data */ |
677 | size = state->ring[i].wptr; |
678 | |
679 | /* After wptr find the last non zero dword to save space */ |
680 | for (j = state->ring[i].wptr; j < MSM_GPU_RINGBUFFER_SZ >> 2; j++) |
681 | if (gpu->rb[i]->start[j]) |
682 | size = j + 1; |
683 | |
684 | if (size) { |
685 | state->ring[i].data = kvmalloc(size: size << 2, GFP_KERNEL); |
686 | if (state->ring[i].data) { |
687 | memcpy(state->ring[i].data, gpu->rb[i]->start, size << 2); |
688 | state->ring[i].data_size = size << 2; |
689 | } |
690 | } |
691 | } |
692 | |
693 | /* Some targets prefer to collect their own registers */ |
694 | if (!adreno_gpu->registers) |
695 | return 0; |
696 | |
697 | /* Count the number of registers */ |
698 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) |
699 | count += adreno_gpu->registers[i + 1] - |
700 | adreno_gpu->registers[i] + 1; |
701 | |
702 | state->registers = kcalloc(n: count * 2, size: sizeof(u32), GFP_KERNEL); |
703 | if (state->registers) { |
704 | int pos = 0; |
705 | |
706 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { |
707 | u32 start = adreno_gpu->registers[i]; |
708 | u32 end = adreno_gpu->registers[i + 1]; |
709 | u32 addr; |
710 | |
711 | for (addr = start; addr <= end; addr++) { |
712 | state->registers[pos++] = addr; |
713 | state->registers[pos++] = gpu_read(gpu, addr); |
714 | } |
715 | } |
716 | |
717 | state->nr_registers = count; |
718 | } |
719 | |
720 | return 0; |
721 | } |
722 | |
723 | void adreno_gpu_state_destroy(struct msm_gpu_state *state) |
724 | { |
725 | int i; |
726 | |
727 | for (i = 0; i < ARRAY_SIZE(state->ring); i++) |
728 | kvfree(addr: state->ring[i].data); |
729 | |
730 | for (i = 0; state->bos && i < state->nr_bos; i++) |
731 | kvfree(addr: state->bos[i].data); |
732 | |
733 | kfree(objp: state->bos); |
734 | kfree(objp: state->comm); |
735 | kfree(objp: state->cmd); |
736 | kfree(objp: state->registers); |
737 | } |
738 | |
739 | static void adreno_gpu_state_kref_destroy(struct kref *kref) |
740 | { |
741 | struct msm_gpu_state *state = container_of(kref, |
742 | struct msm_gpu_state, ref); |
743 | |
744 | adreno_gpu_state_destroy(state); |
745 | kfree(objp: state); |
746 | } |
747 | |
748 | int adreno_gpu_state_put(struct msm_gpu_state *state) |
749 | { |
750 | if (IS_ERR_OR_NULL(ptr: state)) |
751 | return 1; |
752 | |
753 | return kref_put(kref: &state->ref, release: adreno_gpu_state_kref_destroy); |
754 | } |
755 | |
756 | #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) |
757 | |
758 | static char *adreno_gpu_ascii85_encode(u32 *src, size_t len) |
759 | { |
760 | void *buf; |
761 | size_t buf_itr = 0, buffer_size; |
762 | char out[ASCII85_BUFSZ]; |
763 | long l; |
764 | int i; |
765 | |
766 | if (!src || !len) |
767 | return NULL; |
768 | |
769 | l = ascii85_encode_len(len); |
770 | |
771 | /* |
772 | * Ascii85 outputs either a 5 byte string or a 1 byte string. So we |
773 | * account for the worst case of 5 bytes per dword plus the 1 for '\0' |
774 | */ |
775 | buffer_size = (l * 5) + 1; |
776 | |
777 | buf = kvmalloc(size: buffer_size, GFP_KERNEL); |
778 | if (!buf) |
779 | return NULL; |
780 | |
781 | for (i = 0; i < l; i++) |
782 | buf_itr += scnprintf(buf: buf + buf_itr, size: buffer_size - buf_itr, fmt: "%s" , |
783 | ascii85_encode(in: src[i], out)); |
784 | |
785 | return buf; |
786 | } |
787 | |
788 | /* len is expected to be in bytes |
789 | * |
790 | * WARNING: *ptr should be allocated with kvmalloc or friends. It can be free'd |
791 | * with kvfree() and replaced with a newly kvmalloc'd buffer on the first call |
792 | * when the unencoded raw data is encoded |
793 | */ |
794 | void adreno_show_object(struct drm_printer *p, void **ptr, int len, |
795 | bool *encoded) |
796 | { |
797 | if (!*ptr || !len) |
798 | return; |
799 | |
800 | if (!*encoded) { |
801 | long datalen, i; |
802 | u32 *buf = *ptr; |
803 | |
804 | /* |
805 | * Only dump the non-zero part of the buffer - rarely will |
806 | * any data completely fill the entire allocated size of |
807 | * the buffer. |
808 | */ |
809 | for (datalen = 0, i = 0; i < len >> 2; i++) |
810 | if (buf[i]) |
811 | datalen = ((i + 1) << 2); |
812 | |
813 | /* |
814 | * If we reach here, then the originally captured binary buffer |
815 | * will be replaced with the ascii85 encoded string |
816 | */ |
817 | *ptr = adreno_gpu_ascii85_encode(src: buf, len: datalen); |
818 | |
819 | kvfree(addr: buf); |
820 | |
821 | *encoded = true; |
822 | } |
823 | |
824 | if (!*ptr) |
825 | return; |
826 | |
827 | drm_puts(p, " data: !!ascii85 |\n" ); |
828 | drm_puts(p, " " ); |
829 | |
830 | drm_puts(p, *ptr); |
831 | |
832 | drm_puts(p, "\n" ); |
833 | } |
834 | |
835 | void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state, |
836 | struct drm_printer *p) |
837 | { |
838 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
839 | int i; |
840 | |
841 | if (IS_ERR_OR_NULL(ptr: state)) |
842 | return; |
843 | |
844 | drm_printf(p, "revision: %u (%" ADRENO_CHIPID_FMT")\n" , |
845 | adreno_gpu->info->revn, |
846 | ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); |
847 | /* |
848 | * If this is state collected due to iova fault, so fault related info |
849 | * |
850 | * TTBR0 would not be zero, so this is a good way to distinguish |
851 | */ |
852 | if (state->fault_info.ttbr0) { |
853 | const struct msm_gpu_fault_info *info = &state->fault_info; |
854 | |
855 | drm_puts(p, "fault-info:\n" ); |
856 | drm_printf(p, " - ttbr0=%.16llx\n" , info->ttbr0); |
857 | drm_printf(p, " - iova=%.16lx\n" , info->iova); |
858 | drm_printf(p, " - dir=%s\n" , info->flags & IOMMU_FAULT_WRITE ? "WRITE" : "READ" ); |
859 | drm_printf(p, " - type=%s\n" , info->type); |
860 | drm_printf(p, " - source=%s\n" , info->block); |
861 | } |
862 | |
863 | drm_printf(p, "rbbm-status: 0x%08x\n" , state->rbbm_status); |
864 | |
865 | drm_puts(p, "ringbuffer:\n" ); |
866 | |
867 | for (i = 0; i < gpu->nr_rings; i++) { |
868 | drm_printf(p, " - id: %d\n" , i); |
869 | drm_printf(p, " iova: 0x%016llx\n" , state->ring[i].iova); |
870 | drm_printf(p, " last-fence: %u\n" , state->ring[i].seqno); |
871 | drm_printf(p, " retired-fence: %u\n" , state->ring[i].fence); |
872 | drm_printf(p, " rptr: %u\n" , state->ring[i].rptr); |
873 | drm_printf(p, " wptr: %u\n" , state->ring[i].wptr); |
874 | drm_printf(p, " size: %u\n" , MSM_GPU_RINGBUFFER_SZ); |
875 | |
876 | adreno_show_object(p, ptr: &state->ring[i].data, |
877 | len: state->ring[i].data_size, encoded: &state->ring[i].encoded); |
878 | } |
879 | |
880 | if (state->bos) { |
881 | drm_puts(p, "bos:\n" ); |
882 | |
883 | for (i = 0; i < state->nr_bos; i++) { |
884 | drm_printf(p, " - iova: 0x%016llx\n" , |
885 | state->bos[i].iova); |
886 | drm_printf(p, " size: %zd\n" , state->bos[i].size); |
887 | drm_printf(p, " name: %-32s\n" , state->bos[i].name); |
888 | |
889 | adreno_show_object(p, ptr: &state->bos[i].data, |
890 | len: state->bos[i].size, encoded: &state->bos[i].encoded); |
891 | } |
892 | } |
893 | |
894 | if (state->nr_registers) { |
895 | drm_puts(p, "registers:\n" ); |
896 | |
897 | for (i = 0; i < state->nr_registers; i++) { |
898 | drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n" , |
899 | state->registers[i * 2] << 2, |
900 | state->registers[(i * 2) + 1]); |
901 | } |
902 | } |
903 | } |
904 | #endif |
905 | |
906 | /* Dump common gpu status and scratch registers on any hang, to make |
907 | * the hangcheck logs more useful. The scratch registers seem always |
908 | * safe to read when GPU has hung (unlike some other regs, depending |
909 | * on how the GPU hung), and they are useful to match up to cmdstream |
910 | * dumps when debugging hangs: |
911 | */ |
912 | void adreno_dump_info(struct msm_gpu *gpu) |
913 | { |
914 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
915 | int i; |
916 | |
917 | printk("revision: %u (%" ADRENO_CHIPID_FMT")\n" , |
918 | adreno_gpu->info->revn, |
919 | ADRENO_CHIPID_ARGS(adreno_gpu->chip_id)); |
920 | |
921 | for (i = 0; i < gpu->nr_rings; i++) { |
922 | struct msm_ringbuffer *ring = gpu->rb[i]; |
923 | |
924 | printk("rb %d: fence: %d/%d\n" , i, |
925 | ring->memptrs->fence, |
926 | ring->fctx->last_fence); |
927 | |
928 | printk("rptr: %d\n" , get_rptr(adreno_gpu, ring)); |
929 | printk("rb wptr: %d\n" , get_wptr(ring)); |
930 | } |
931 | } |
932 | |
933 | /* would be nice to not have to duplicate the _show() stuff with printk(): */ |
934 | void adreno_dump(struct msm_gpu *gpu) |
935 | { |
936 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
937 | int i; |
938 | |
939 | if (!adreno_gpu->registers) |
940 | return; |
941 | |
942 | /* dump these out in a form that can be parsed by demsm: */ |
943 | printk("IO:region %s 00000000 00020000\n" , gpu->name); |
944 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { |
945 | uint32_t start = adreno_gpu->registers[i]; |
946 | uint32_t end = adreno_gpu->registers[i+1]; |
947 | uint32_t addr; |
948 | |
949 | for (addr = start; addr <= end; addr++) { |
950 | uint32_t val = gpu_read(gpu, addr); |
951 | printk("IO:R %08x %08x\n" , addr<<2, val); |
952 | } |
953 | } |
954 | } |
955 | |
956 | static uint32_t ring_freewords(struct msm_ringbuffer *ring) |
957 | { |
958 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); |
959 | uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; |
960 | /* Use ring->next to calculate free size */ |
961 | uint32_t wptr = ring->next - ring->start; |
962 | uint32_t rptr = get_rptr(adreno_gpu, ring); |
963 | return (rptr + (size - 1) - wptr) % size; |
964 | } |
965 | |
966 | void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) |
967 | { |
968 | if (spin_until(ring_freewords(ring) >= ndwords)) |
969 | DRM_DEV_ERROR(ring->gpu->dev->dev, |
970 | "timeout waiting for space in ringbuffer %d\n" , |
971 | ring->id); |
972 | } |
973 | |
974 | static int adreno_get_pwrlevels(struct device *dev, |
975 | struct msm_gpu *gpu) |
976 | { |
977 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); |
978 | unsigned long freq = ULONG_MAX; |
979 | struct dev_pm_opp *opp; |
980 | int ret; |
981 | |
982 | gpu->fast_rate = 0; |
983 | |
984 | /* devm_pm_opp_of_add_table may error out but will still create an OPP table */ |
985 | ret = devm_pm_opp_of_add_table(dev); |
986 | if (ret == -ENODEV) { |
987 | /* Special cases for ancient hw with ancient DT bindings */ |
988 | if (adreno_is_a2xx(gpu: adreno_gpu)) { |
989 | dev_warn(dev, "Unable to find the OPP table. Falling back to 200 MHz.\n" ); |
990 | dev_pm_opp_add(dev, freq: 200000000, u_volt: 0); |
991 | } else if (adreno_is_a320(gpu: adreno_gpu)) { |
992 | dev_warn(dev, "Unable to find the OPP table. Falling back to 450 MHz.\n" ); |
993 | dev_pm_opp_add(dev, freq: 450000000, u_volt: 0); |
994 | } else { |
995 | DRM_DEV_ERROR(dev, "Unable to find the OPP table\n" ); |
996 | return -ENODEV; |
997 | } |
998 | } else if (ret) { |
999 | DRM_DEV_ERROR(dev, "Unable to set the OPP table\n" ); |
1000 | return ret; |
1001 | } |
1002 | |
1003 | /* Find the fastest defined rate */ |
1004 | opp = dev_pm_opp_find_freq_floor(dev, freq: &freq); |
1005 | if (IS_ERR(ptr: opp)) |
1006 | return PTR_ERR(ptr: opp); |
1007 | |
1008 | gpu->fast_rate = freq; |
1009 | dev_pm_opp_put(opp); |
1010 | |
1011 | DBG("fast_rate=%u, slow_rate=27000000" , gpu->fast_rate); |
1012 | |
1013 | return 0; |
1014 | } |
1015 | |
1016 | int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu, |
1017 | struct adreno_ocmem *adreno_ocmem) |
1018 | { |
1019 | struct ocmem_buf *ocmem_hdl; |
1020 | struct ocmem *ocmem; |
1021 | |
1022 | ocmem = of_get_ocmem(dev); |
1023 | if (IS_ERR(ptr: ocmem)) { |
1024 | if (PTR_ERR(ptr: ocmem) == -ENODEV) { |
1025 | /* |
1026 | * Return success since either the ocmem property was |
1027 | * not specified in device tree, or ocmem support is |
1028 | * not compiled into the kernel. |
1029 | */ |
1030 | return 0; |
1031 | } |
1032 | |
1033 | return PTR_ERR(ptr: ocmem); |
1034 | } |
1035 | |
1036 | ocmem_hdl = ocmem_allocate(ocmem, client: OCMEM_GRAPHICS, size: adreno_gpu->info->gmem); |
1037 | if (IS_ERR(ptr: ocmem_hdl)) |
1038 | return PTR_ERR(ptr: ocmem_hdl); |
1039 | |
1040 | adreno_ocmem->ocmem = ocmem; |
1041 | adreno_ocmem->base = ocmem_hdl->addr; |
1042 | adreno_ocmem->hdl = ocmem_hdl; |
1043 | |
1044 | if (WARN_ON(ocmem_hdl->len != adreno_gpu->info->gmem)) |
1045 | return -ENOMEM; |
1046 | |
1047 | return 0; |
1048 | } |
1049 | |
1050 | void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem) |
1051 | { |
1052 | if (adreno_ocmem && adreno_ocmem->base) |
1053 | ocmem_free(ocmem: adreno_ocmem->ocmem, client: OCMEM_GRAPHICS, |
1054 | buf: adreno_ocmem->hdl); |
1055 | } |
1056 | |
1057 | int adreno_read_speedbin(struct device *dev, u32 *speedbin) |
1058 | { |
1059 | return nvmem_cell_read_variable_le_u32(dev, cell_id: "speed_bin" , val: speedbin); |
1060 | } |
1061 | |
1062 | int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
1063 | struct adreno_gpu *adreno_gpu, |
1064 | const struct adreno_gpu_funcs *funcs, int nr_rings) |
1065 | { |
1066 | struct device *dev = &pdev->dev; |
1067 | struct adreno_platform_config *config = dev->platform_data; |
1068 | struct msm_gpu_config adreno_gpu_config = { 0 }; |
1069 | struct msm_gpu *gpu = &adreno_gpu->base; |
1070 | const char *gpu_name; |
1071 | u32 speedbin; |
1072 | int ret; |
1073 | |
1074 | adreno_gpu->funcs = funcs; |
1075 | adreno_gpu->info = config->info; |
1076 | adreno_gpu->chip_id = config->chip_id; |
1077 | |
1078 | gpu->allow_relocs = config->info->family < ADRENO_6XX_GEN1; |
1079 | |
1080 | /* Only handle the core clock when GMU is not in use (or is absent). */ |
1081 | if (adreno_has_gmu_wrapper(gpu: adreno_gpu) || |
1082 | adreno_gpu->info->family < ADRENO_6XX_GEN1) { |
1083 | /* |
1084 | * This can only be done before devm_pm_opp_of_add_table(), or |
1085 | * dev_pm_opp_set_config() will WARN_ON() |
1086 | */ |
1087 | if (IS_ERR(ptr: devm_clk_get(dev, "core" ))) { |
1088 | /* |
1089 | * If "core" is absent, go for the legacy clock name. |
1090 | * If we got this far in probing, it's a given one of |
1091 | * them exists. |
1092 | */ |
1093 | devm_pm_opp_set_clkname(dev, name: "core_clk" ); |
1094 | } else |
1095 | devm_pm_opp_set_clkname(dev, name: "core" ); |
1096 | } |
1097 | |
1098 | if (adreno_read_speedbin(dev, speedbin: &speedbin) || !speedbin) |
1099 | speedbin = 0xffff; |
1100 | adreno_gpu->speedbin = (uint16_t) (0xffff & speedbin); |
1101 | |
1102 | gpu_name = devm_kasprintf(dev, GFP_KERNEL, fmt: "%" ADRENO_CHIPID_FMT, |
1103 | ADRENO_CHIPID_ARGS(config->chip_id)); |
1104 | if (!gpu_name) |
1105 | return -ENOMEM; |
1106 | |
1107 | adreno_gpu_config.ioname = "kgsl_3d0_reg_memory" ; |
1108 | |
1109 | adreno_gpu_config.nr_rings = nr_rings; |
1110 | |
1111 | ret = adreno_get_pwrlevels(dev, gpu); |
1112 | if (ret) |
1113 | return ret; |
1114 | |
1115 | pm_runtime_set_autosuspend_delay(dev, |
1116 | adreno_gpu->info->inactive_period); |
1117 | pm_runtime_use_autosuspend(dev); |
1118 | |
1119 | return msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, |
1120 | gpu_name, &adreno_gpu_config); |
1121 | } |
1122 | |
1123 | void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) |
1124 | { |
1125 | struct msm_gpu *gpu = &adreno_gpu->base; |
1126 | struct msm_drm_private *priv = gpu->dev ? gpu->dev->dev_private : NULL; |
1127 | unsigned int i; |
1128 | |
1129 | for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) |
1130 | release_firmware(fw: adreno_gpu->fw[i]); |
1131 | |
1132 | if (priv && pm_runtime_enabled(&priv->gpu_pdev->dev)) |
1133 | pm_runtime_disable(&priv->gpu_pdev->dev); |
1134 | |
1135 | msm_gpu_cleanup(&adreno_gpu->base); |
1136 | } |
1137 | |