1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Copyright (C) 2013 Red Hat |
4 | * Author: Rob Clark <robdclark@gmail.com> |
5 | */ |
6 | |
7 | #ifndef __MSM_GPU_H__ |
8 | #define __MSM_GPU_H__ |
9 | |
10 | #include <linux/adreno-smmu-priv.h> |
11 | #include <linux/clk.h> |
12 | #include <linux/devfreq.h> |
13 | #include <linux/interconnect.h> |
14 | #include <linux/pm_opp.h> |
15 | #include <linux/regulator/consumer.h> |
16 | |
17 | #include "msm_drv.h" |
18 | #include "msm_fence.h" |
19 | #include "msm_ringbuffer.h" |
20 | #include "msm_gem.h" |
21 | |
22 | struct msm_gem_submit; |
23 | struct msm_gpu_perfcntr; |
24 | struct msm_gpu_state; |
25 | struct msm_file_private; |
26 | |
27 | struct msm_gpu_config { |
28 | const char *ioname; |
29 | unsigned int nr_rings; |
30 | }; |
31 | |
32 | /* So far, with hardware that I've seen to date, we can have: |
33 | * + zero, one, or two z180 2d cores |
34 | * + a3xx or a2xx 3d core, which share a common CP (the firmware |
35 | * for the CP seems to implement some different PM4 packet types |
36 | * but the basics of cmdstream submission are the same) |
37 | * |
38 | * Which means that the eventual complete "class" hierarchy, once |
39 | * support for all past and present hw is in place, becomes: |
40 | * + msm_gpu |
41 | * + adreno_gpu |
42 | * + a3xx_gpu |
43 | * + a2xx_gpu |
44 | * + z180_gpu |
45 | */ |
46 | struct msm_gpu_funcs { |
47 | int (*get_param)(struct msm_gpu *gpu, struct msm_file_private *ctx, |
48 | uint32_t param, uint64_t *value, uint32_t *len); |
49 | int (*set_param)(struct msm_gpu *gpu, struct msm_file_private *ctx, |
50 | uint32_t param, uint64_t value, uint32_t len); |
51 | int (*hw_init)(struct msm_gpu *gpu); |
52 | |
53 | /** |
54 | * @ucode_load: Optional hook to upload fw to GEM objs |
55 | */ |
56 | int (*ucode_load)(struct msm_gpu *gpu); |
57 | |
58 | int (*pm_suspend)(struct msm_gpu *gpu); |
59 | int (*pm_resume)(struct msm_gpu *gpu); |
60 | void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit); |
61 | void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); |
62 | irqreturn_t (*irq)(struct msm_gpu *irq); |
63 | struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); |
64 | void (*recover)(struct msm_gpu *gpu); |
65 | void (*destroy)(struct msm_gpu *gpu); |
66 | #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP) |
67 | /* show GPU status in debugfs: */ |
68 | void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state, |
69 | struct drm_printer *p); |
70 | /* for generation specific debugfs: */ |
71 | void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); |
72 | #endif |
73 | /* note: gpu_busy() can assume that we have been pm_resumed */ |
74 | u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate); |
75 | struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); |
76 | int (*gpu_state_put)(struct msm_gpu_state *state); |
77 | unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); |
78 | /* note: gpu_set_freq() can assume that we have been pm_resumed */ |
79 | void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp, |
80 | bool suspended); |
81 | struct msm_gem_address_space *(*create_address_space) |
82 | (struct msm_gpu *gpu, struct platform_device *pdev); |
83 | struct msm_gem_address_space *(*create_private_address_space) |
84 | (struct msm_gpu *gpu); |
85 | uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); |
86 | |
87 | /** |
88 | * progress: Has the GPU made progress? |
89 | * |
90 | * Return true if GPU position in cmdstream has advanced (or changed) |
91 | * since the last call. To avoid false negatives, this should account |
92 | * for cmdstream that is buffered in this FIFO upstream of the CP fw. |
93 | */ |
94 | bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); |
95 | }; |
96 | |
97 | /* Additional state for iommu faults: */ |
98 | struct msm_gpu_fault_info { |
99 | u64 ttbr0; |
100 | unsigned long iova; |
101 | int flags; |
102 | const char *type; |
103 | const char *block; |
104 | }; |
105 | |
106 | /** |
107 | * struct msm_gpu_devfreq - devfreq related state |
108 | */ |
109 | struct msm_gpu_devfreq { |
110 | /** devfreq: devfreq instance */ |
111 | struct devfreq *devfreq; |
112 | |
113 | /** lock: lock for "suspended", "busy_cycles", and "time" */ |
114 | struct mutex lock; |
115 | |
116 | /** |
117 | * idle_freq: |
118 | * |
119 | * Shadow frequency used while the GPU is idle. From the PoV of |
120 | * the devfreq governor, we are continuing to sample busyness and |
121 | * adjust frequency while the GPU is idle, but we use this shadow |
122 | * value as the GPU is actually clamped to minimum frequency while |
123 | * it is inactive. |
124 | */ |
125 | unsigned long idle_freq; |
126 | |
127 | /** |
128 | * boost_constraint: |
129 | * |
130 | * A PM QoS constraint to boost min freq for a period of time |
131 | * until the boost expires. |
132 | */ |
133 | struct dev_pm_qos_request boost_freq; |
134 | |
135 | /** |
136 | * busy_cycles: Last busy counter value, for calculating elapsed busy |
137 | * cycles since last sampling period. |
138 | */ |
139 | u64 busy_cycles; |
140 | |
141 | /** time: Time of last sampling period. */ |
142 | ktime_t time; |
143 | |
144 | /** idle_time: Time of last transition to idle: */ |
145 | ktime_t idle_time; |
146 | |
147 | /** |
148 | * idle_work: |
149 | * |
150 | * Used to delay clamping to idle freq on active->idle transition. |
151 | */ |
152 | struct msm_hrtimer_work idle_work; |
153 | |
154 | /** |
155 | * boost_work: |
156 | * |
157 | * Used to reset the boost_constraint after the boost period has |
158 | * elapsed |
159 | */ |
160 | struct msm_hrtimer_work boost_work; |
161 | |
162 | /** suspended: tracks if we're suspended */ |
163 | bool suspended; |
164 | }; |
165 | |
166 | struct msm_gpu { |
167 | const char *name; |
168 | struct drm_device *dev; |
169 | struct platform_device *pdev; |
170 | const struct msm_gpu_funcs *funcs; |
171 | |
172 | struct adreno_smmu_priv adreno_smmu; |
173 | |
174 | /* performance counters (hw & sw): */ |
175 | spinlock_t perf_lock; |
176 | bool perfcntr_active; |
177 | struct { |
178 | bool active; |
179 | ktime_t time; |
180 | } last_sample; |
181 | uint32_t totaltime, activetime; /* sw counters */ |
182 | uint32_t last_cntrs[5]; /* hw counters */ |
183 | const struct msm_gpu_perfcntr *perfcntrs; |
184 | uint32_t num_perfcntrs; |
185 | |
186 | struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; |
187 | int nr_rings; |
188 | |
189 | /** |
190 | * sysprof_active: |
191 | * |
192 | * The count of contexts that have enabled system profiling. |
193 | */ |
194 | refcount_t sysprof_active; |
195 | |
196 | /** |
197 | * cur_ctx_seqno: |
198 | * |
199 | * The ctx->seqno value of the last context to submit rendering, |
200 | * and the one with current pgtables installed (for generations |
201 | * that support per-context pgtables). Tracked by seqno rather |
202 | * than pointer value to avoid dangling pointers, and cases where |
203 | * a ctx can be freed and a new one created with the same address. |
204 | */ |
205 | int cur_ctx_seqno; |
206 | |
207 | /** |
208 | * lock: |
209 | * |
210 | * General lock for serializing all the gpu things. |
211 | * |
212 | * TODO move to per-ring locking where feasible (ie. submit/retire |
213 | * path, etc) |
214 | */ |
215 | struct mutex lock; |
216 | |
217 | /** |
218 | * active_submits: |
219 | * |
220 | * The number of submitted but not yet retired submits, used to |
221 | * determine transitions between active and idle. |
222 | * |
223 | * Protected by active_lock |
224 | */ |
225 | int active_submits; |
226 | |
227 | /** lock: protects active_submits and idle/active transitions */ |
228 | struct mutex active_lock; |
229 | |
230 | /* does gpu need hw_init? */ |
231 | bool needs_hw_init; |
232 | |
233 | /** |
234 | * global_faults: number of GPU hangs not attributed to a particular |
235 | * address space |
236 | */ |
237 | int global_faults; |
238 | |
239 | void __iomem *mmio; |
240 | int irq; |
241 | |
242 | struct msm_gem_address_space *aspace; |
243 | |
244 | /* Power Control: */ |
245 | struct regulator *gpu_reg, *gpu_cx; |
246 | struct clk_bulk_data *grp_clks; |
247 | int nr_clocks; |
248 | struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk; |
249 | uint32_t fast_rate; |
250 | |
251 | /* Hang and Inactivity Detection: |
252 | */ |
253 | #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */ |
254 | |
255 | #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */ |
256 | #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 |
257 | struct timer_list hangcheck_timer; |
258 | |
259 | /* Fault info for most recent iova fault: */ |
260 | struct msm_gpu_fault_info fault_info; |
261 | |
262 | /* work for handling GPU ioval faults: */ |
263 | struct kthread_work fault_work; |
264 | |
265 | /* work for handling GPU recovery: */ |
266 | struct kthread_work recover_work; |
267 | |
268 | /** retire_event: notified when submits are retired: */ |
269 | wait_queue_head_t retire_event; |
270 | |
271 | /* work for handling active-list retiring: */ |
272 | struct kthread_work retire_work; |
273 | |
274 | /* worker for retire/recover: */ |
275 | struct kthread_worker *worker; |
276 | |
277 | struct drm_gem_object *memptrs_bo; |
278 | |
279 | struct msm_gpu_devfreq devfreq; |
280 | |
281 | uint32_t suspend_count; |
282 | |
283 | struct msm_gpu_state *crashstate; |
284 | |
285 | /* True if the hardware supports expanded apriv (a650 and newer) */ |
286 | bool hw_apriv; |
287 | |
288 | /** |
289 | * @allow_relocs: allow relocs in SUBMIT ioctl |
290 | * |
291 | * Mesa won't use relocs for driver version 1.4.0 and later. This |
292 | * switch-over happened early enough in mesa a6xx bringup that we |
293 | * can disallow relocs for a6xx and newer. |
294 | */ |
295 | bool allow_relocs; |
296 | |
297 | struct thermal_cooling_device *cooling; |
298 | }; |
299 | |
300 | static inline struct msm_gpu *dev_to_gpu(struct device *dev) |
301 | { |
302 | struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev); |
303 | |
304 | if (!adreno_smmu) |
305 | return NULL; |
306 | |
307 | return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); |
308 | } |
309 | |
310 | /* It turns out that all targets use the same ringbuffer size */ |
311 | #define MSM_GPU_RINGBUFFER_SZ SZ_32K |
312 | #define MSM_GPU_RINGBUFFER_BLKSIZE 32 |
313 | |
314 | #define MSM_GPU_RB_CNTL_DEFAULT \ |
315 | (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \ |
316 | AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8))) |
317 | |
318 | static inline bool msm_gpu_active(struct msm_gpu *gpu) |
319 | { |
320 | int i; |
321 | |
322 | for (i = 0; i < gpu->nr_rings; i++) { |
323 | struct msm_ringbuffer *ring = gpu->rb[i]; |
324 | |
325 | if (fence_after(a: ring->fctx->last_fence, b: ring->memptrs->fence)) |
326 | return true; |
327 | } |
328 | |
329 | return false; |
330 | } |
331 | |
332 | /* Perf-Counters: |
333 | * The select_reg and select_val are just there for the benefit of the child |
334 | * class that actually enables the perf counter.. but msm_gpu base class |
335 | * will handle sampling/displaying the counters. |
336 | */ |
337 | |
338 | struct msm_gpu_perfcntr { |
339 | uint32_t select_reg; |
340 | uint32_t sample_reg; |
341 | uint32_t select_val; |
342 | const char *name; |
343 | }; |
344 | |
345 | /* |
346 | * The number of priority levels provided by drm gpu scheduler. The |
347 | * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some |
348 | * cases, so we don't use it (no need for kernel generated jobs). |
349 | */ |
350 | #define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_LOW - DRM_SCHED_PRIORITY_HIGH) |
351 | |
352 | /** |
353 | * struct msm_file_private - per-drm_file context |
354 | * |
355 | * @queuelock: synchronizes access to submitqueues list |
356 | * @submitqueues: list of &msm_gpu_submitqueue created by userspace |
357 | * @queueid: counter incremented each time a submitqueue is created, |
358 | * used to assign &msm_gpu_submitqueue.id |
359 | * @aspace: the per-process GPU address-space |
360 | * @ref: reference count |
361 | * @seqno: unique per process seqno |
362 | */ |
363 | struct msm_file_private { |
364 | rwlock_t queuelock; |
365 | struct list_head submitqueues; |
366 | int queueid; |
367 | struct msm_gem_address_space *aspace; |
368 | struct kref ref; |
369 | int seqno; |
370 | |
371 | /** |
372 | * sysprof: |
373 | * |
374 | * The value of MSM_PARAM_SYSPROF set by userspace. This is |
375 | * intended to be used by system profiling tools like Mesa's |
376 | * pps-producer (perfetto), and restricted to CAP_SYS_ADMIN. |
377 | * |
378 | * Setting a value of 1 will preserve performance counters across |
379 | * context switches. Setting a value of 2 will in addition |
380 | * suppress suspend. (Performance counters lose state across |
381 | * power collapse, which is undesirable for profiling in some |
382 | * cases.) |
383 | * |
384 | * The value automatically reverts to zero when the drm device |
385 | * file is closed. |
386 | */ |
387 | int sysprof; |
388 | |
389 | /** |
390 | * comm: Overridden task comm, see MSM_PARAM_COMM |
391 | * |
392 | * Accessed under msm_gpu::lock |
393 | */ |
394 | char *comm; |
395 | |
396 | /** |
397 | * cmdline: Overridden task cmdline, see MSM_PARAM_CMDLINE |
398 | * |
399 | * Accessed under msm_gpu::lock |
400 | */ |
401 | char *cmdline; |
402 | |
403 | /** |
404 | * elapsed: |
405 | * |
406 | * The total (cumulative) elapsed time GPU was busy with rendering |
407 | * from this context in ns. |
408 | */ |
409 | uint64_t elapsed_ns; |
410 | |
411 | /** |
412 | * cycles: |
413 | * |
414 | * The total (cumulative) GPU cycles elapsed attributed to this |
415 | * context. |
416 | */ |
417 | uint64_t cycles; |
418 | |
419 | /** |
420 | * entities: |
421 | * |
422 | * Table of per-priority-level sched entities used by submitqueues |
423 | * associated with this &drm_file. Because some userspace apps |
424 | * make assumptions about rendering from multiple gl contexts |
425 | * (of the same priority) within the process happening in FIFO |
426 | * order without requiring any fencing beyond MakeCurrent(), we |
427 | * create at most one &drm_sched_entity per-process per-priority- |
428 | * level. |
429 | */ |
430 | struct drm_sched_entity *entities[NR_SCHED_PRIORITIES * MSM_GPU_MAX_RINGS]; |
431 | }; |
432 | |
433 | /** |
434 | * msm_gpu_convert_priority - Map userspace priority to ring # and sched priority |
435 | * |
436 | * @gpu: the gpu instance |
437 | * @prio: the userspace priority level |
438 | * @ring_nr: [out] the ringbuffer the userspace priority maps to |
439 | * @sched_prio: [out] the gpu scheduler priority level which the userspace |
440 | * priority maps to |
441 | * |
442 | * With drm/scheduler providing it's own level of prioritization, our total |
443 | * number of available priority levels is (nr_rings * NR_SCHED_PRIORITIES). |
444 | * Each ring is associated with it's own scheduler instance. However, our |
445 | * UABI is that lower numerical values are higher priority. So mapping the |
446 | * single userspace priority level into ring_nr and sched_prio takes some |
447 | * care. The userspace provided priority (when a submitqueue is created) |
448 | * is mapped to ring nr and scheduler priority as such: |
449 | * |
450 | * ring_nr = userspace_prio / NR_SCHED_PRIORITIES |
451 | * sched_prio = NR_SCHED_PRIORITIES - |
452 | * (userspace_prio % NR_SCHED_PRIORITIES) - 1 |
453 | * |
454 | * This allows generations without preemption (nr_rings==1) to have some |
455 | * amount of prioritization, and provides more priority levels for gens |
456 | * that do have preemption. |
457 | */ |
458 | static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio, |
459 | unsigned *ring_nr, enum drm_sched_priority *sched_prio) |
460 | { |
461 | unsigned rn, sp; |
462 | |
463 | rn = div_u64_rem(dividend: prio, NR_SCHED_PRIORITIES, remainder: &sp); |
464 | |
465 | /* invert sched priority to map to higher-numeric-is-higher- |
466 | * priority convention |
467 | */ |
468 | sp = NR_SCHED_PRIORITIES - sp - 1; |
469 | |
470 | if (rn >= gpu->nr_rings) |
471 | return -EINVAL; |
472 | |
473 | *ring_nr = rn; |
474 | *sched_prio = sp; |
475 | |
476 | return 0; |
477 | } |
478 | |
479 | /** |
480 | * struct msm_gpu_submitqueues - Userspace created context. |
481 | * |
482 | * A submitqueue is associated with a gl context or vk queue (or equiv) |
483 | * in userspace. |
484 | * |
485 | * @id: userspace id for the submitqueue, unique within the drm_file |
486 | * @flags: userspace flags for the submitqueue, specified at creation |
487 | * (currently unusued) |
488 | * @ring_nr: the ringbuffer used by this submitqueue, which is determined |
489 | * by the submitqueue's priority |
490 | * @faults: the number of GPU hangs associated with this submitqueue |
491 | * @last_fence: the sequence number of the last allocated fence (for error |
492 | * checking) |
493 | * @ctx: the per-drm_file context associated with the submitqueue (ie. |
494 | * which set of pgtables do submits jobs associated with the |
495 | * submitqueue use) |
496 | * @node: node in the context's list of submitqueues |
497 | * @fence_idr: maps fence-id to dma_fence for userspace visible fence |
498 | * seqno, protected by submitqueue lock |
499 | * @idr_lock: for serializing access to fence_idr |
500 | * @lock: submitqueue lock for serializing submits on a queue |
501 | * @ref: reference count |
502 | * @entity: the submit job-queue |
503 | */ |
504 | struct msm_gpu_submitqueue { |
505 | int id; |
506 | u32 flags; |
507 | u32 ring_nr; |
508 | int faults; |
509 | uint32_t last_fence; |
510 | struct msm_file_private *ctx; |
511 | struct list_head node; |
512 | struct idr fence_idr; |
513 | struct spinlock idr_lock; |
514 | struct mutex lock; |
515 | struct kref ref; |
516 | struct drm_sched_entity *entity; |
517 | }; |
518 | |
519 | struct msm_gpu_state_bo { |
520 | u64 iova; |
521 | size_t size; |
522 | void *data; |
523 | bool encoded; |
524 | char name[32]; |
525 | }; |
526 | |
527 | struct msm_gpu_state { |
528 | struct kref ref; |
529 | struct timespec64 time; |
530 | |
531 | struct { |
532 | u64 iova; |
533 | u32 fence; |
534 | u32 seqno; |
535 | u32 rptr; |
536 | u32 wptr; |
537 | void *data; |
538 | int data_size; |
539 | bool encoded; |
540 | } ring[MSM_GPU_MAX_RINGS]; |
541 | |
542 | int nr_registers; |
543 | u32 *registers; |
544 | |
545 | u32 rbbm_status; |
546 | |
547 | char *comm; |
548 | char *cmd; |
549 | |
550 | struct msm_gpu_fault_info fault_info; |
551 | |
552 | int nr_bos; |
553 | struct msm_gpu_state_bo *bos; |
554 | }; |
555 | |
556 | static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) |
557 | { |
558 | msm_writel(data, gpu->mmio + (reg << 2)); |
559 | } |
560 | |
561 | static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg) |
562 | { |
563 | return msm_readl(gpu->mmio + (reg << 2)); |
564 | } |
565 | |
566 | static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or) |
567 | { |
568 | msm_rmw(addr: gpu->mmio + (reg << 2), mask, or); |
569 | } |
570 | |
571 | static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg) |
572 | { |
573 | u64 val; |
574 | |
575 | /* |
576 | * Why not a readq here? Two reasons: 1) many of the LO registers are |
577 | * not quad word aligned and 2) the GPU hardware designers have a bit |
578 | * of a history of putting registers where they fit, especially in |
579 | * spins. The longer a GPU family goes the higher the chance that |
580 | * we'll get burned. We could do a series of validity checks if we |
581 | * wanted to, but really is a readq() that much better? Nah. |
582 | */ |
583 | |
584 | /* |
585 | * For some lo/hi registers (like perfcounters), the hi value is latched |
586 | * when the lo is read, so make sure to read the lo first to trigger |
587 | * that |
588 | */ |
589 | val = (u64) msm_readl(gpu->mmio + (reg << 2)); |
590 | val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32); |
591 | |
592 | return val; |
593 | } |
594 | |
595 | static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val) |
596 | { |
597 | /* Why not a writeq here? Read the screed above */ |
598 | msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2)); |
599 | msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2)); |
600 | } |
601 | |
602 | int msm_gpu_pm_suspend(struct msm_gpu *gpu); |
603 | int msm_gpu_pm_resume(struct msm_gpu *gpu); |
604 | |
605 | void msm_gpu_show_fdinfo(struct msm_gpu *gpu, struct msm_file_private *ctx, |
606 | struct drm_printer *p); |
607 | |
608 | int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx); |
609 | struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx, |
610 | u32 id); |
611 | int msm_submitqueue_create(struct drm_device *drm, |
612 | struct msm_file_private *ctx, |
613 | u32 prio, u32 flags, u32 *id); |
614 | int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx, |
615 | struct drm_msm_submitqueue_query *args); |
616 | int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id); |
617 | void msm_submitqueue_close(struct msm_file_private *ctx); |
618 | |
619 | void msm_submitqueue_destroy(struct kref *kref); |
620 | |
621 | int msm_file_private_set_sysprof(struct msm_file_private *ctx, |
622 | struct msm_gpu *gpu, int sysprof); |
623 | void __msm_file_private_destroy(struct kref *kref); |
624 | |
625 | static inline void msm_file_private_put(struct msm_file_private *ctx) |
626 | { |
627 | kref_put(kref: &ctx->ref, release: __msm_file_private_destroy); |
628 | } |
629 | |
630 | static inline struct msm_file_private *msm_file_private_get( |
631 | struct msm_file_private *ctx) |
632 | { |
633 | kref_get(kref: &ctx->ref); |
634 | return ctx; |
635 | } |
636 | |
637 | void msm_devfreq_init(struct msm_gpu *gpu); |
638 | void msm_devfreq_cleanup(struct msm_gpu *gpu); |
639 | void msm_devfreq_resume(struct msm_gpu *gpu); |
640 | void msm_devfreq_suspend(struct msm_gpu *gpu); |
641 | void msm_devfreq_boost(struct msm_gpu *gpu, unsigned factor); |
642 | void msm_devfreq_active(struct msm_gpu *gpu); |
643 | void msm_devfreq_idle(struct msm_gpu *gpu); |
644 | |
645 | int msm_gpu_hw_init(struct msm_gpu *gpu); |
646 | |
647 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu); |
648 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu); |
649 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, |
650 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs); |
651 | |
652 | void msm_gpu_retire(struct msm_gpu *gpu); |
653 | void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit); |
654 | |
655 | int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
656 | struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, |
657 | const char *name, struct msm_gpu_config *config); |
658 | |
659 | struct msm_gem_address_space * |
660 | msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *task); |
661 | |
662 | void msm_gpu_cleanup(struct msm_gpu *gpu); |
663 | |
664 | struct msm_gpu *adreno_load_gpu(struct drm_device *dev); |
665 | void __init adreno_register(void); |
666 | void __exit adreno_unregister(void); |
667 | |
668 | static inline void msm_submitqueue_put(struct msm_gpu_submitqueue *queue) |
669 | { |
670 | if (queue) |
671 | kref_put(kref: &queue->ref, release: msm_submitqueue_destroy); |
672 | } |
673 | |
674 | static inline struct msm_gpu_state *msm_gpu_crashstate_get(struct msm_gpu *gpu) |
675 | { |
676 | struct msm_gpu_state *state = NULL; |
677 | |
678 | mutex_lock(&gpu->lock); |
679 | |
680 | if (gpu->crashstate) { |
681 | kref_get(kref: &gpu->crashstate->ref); |
682 | state = gpu->crashstate; |
683 | } |
684 | |
685 | mutex_unlock(lock: &gpu->lock); |
686 | |
687 | return state; |
688 | } |
689 | |
690 | static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) |
691 | { |
692 | mutex_lock(&gpu->lock); |
693 | |
694 | if (gpu->crashstate) { |
695 | if (gpu->funcs->gpu_state_put(gpu->crashstate)) |
696 | gpu->crashstate = NULL; |
697 | } |
698 | |
699 | mutex_unlock(lock: &gpu->lock); |
700 | } |
701 | |
702 | /* |
703 | * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can |
704 | * support expanded privileges |
705 | */ |
706 | #define check_apriv(gpu, flags) \ |
707 | (((gpu)->hw_apriv ? MSM_BO_MAP_PRIV : 0) | (flags)) |
708 | |
709 | |
710 | #endif /* __MSM_GPU_H__ */ |
711 | |