1 | /* |
2 | * Copyright © 2014-2015 Broadcom |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice (including the next |
12 | * paragraph) shall be included in all copies or substantial portions of the |
13 | * Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
21 | * IN THE SOFTWARE. |
22 | */ |
23 | |
24 | #ifndef _UAPI_VC4_DRM_H_ |
25 | #define _UAPI_VC4_DRM_H_ |
26 | |
27 | #include "drm.h" |
28 | |
29 | #if defined(__cplusplus) |
30 | extern "C" { |
31 | #endif |
32 | |
33 | #define DRM_VC4_SUBMIT_CL 0x00 |
34 | #define DRM_VC4_WAIT_SEQNO 0x01 |
35 | #define DRM_VC4_WAIT_BO 0x02 |
36 | #define DRM_VC4_CREATE_BO 0x03 |
37 | #define DRM_VC4_MMAP_BO 0x04 |
38 | #define DRM_VC4_CREATE_SHADER_BO 0x05 |
39 | #define DRM_VC4_GET_HANG_STATE 0x06 |
40 | #define DRM_VC4_GET_PARAM 0x07 |
41 | #define DRM_VC4_SET_TILING 0x08 |
42 | #define DRM_VC4_GET_TILING 0x09 |
43 | #define DRM_VC4_LABEL_BO 0x0a |
44 | #define DRM_VC4_GEM_MADVISE 0x0b |
45 | #define DRM_VC4_PERFMON_CREATE 0x0c |
46 | #define DRM_VC4_PERFMON_DESTROY 0x0d |
47 | #define DRM_VC4_PERFMON_GET_VALUES 0x0e |
48 | |
49 | #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) |
50 | #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) |
51 | #define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) |
52 | #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) |
53 | #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) |
54 | #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) |
55 | #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) |
56 | #define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param) |
57 | #define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling) |
58 | #define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling) |
59 | #define DRM_IOCTL_VC4_LABEL_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) |
60 | #define DRM_IOCTL_VC4_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise) |
61 | #define DRM_IOCTL_VC4_PERFMON_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_CREATE, struct drm_vc4_perfmon_create) |
62 | #define DRM_IOCTL_VC4_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_DESTROY, struct drm_vc4_perfmon_destroy) |
63 | #define DRM_IOCTL_VC4_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, struct drm_vc4_perfmon_get_values) |
64 | |
65 | struct drm_vc4_submit_rcl_surface { |
66 | __u32 hindex; /* Handle index, or ~0 if not present. */ |
67 | __u32 offset; /* Offset to start of buffer. */ |
68 | /* |
69 | * Bits for either render config (color_write) or load/store packet. |
70 | * Bits should all be 0 for MSAA load/stores. |
71 | */ |
72 | __u16 bits; |
73 | |
74 | #define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0) |
75 | __u16 flags; |
76 | }; |
77 | |
78 | /** |
79 | * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D |
80 | * engine. |
81 | * |
82 | * Drivers typically use GPU BOs to store batchbuffers / command lists and |
83 | * their associated state. However, because the VC4 lacks an MMU, we have to |
84 | * do validation of memory accesses by the GPU commands. If we were to store |
85 | * our commands in BOs, we'd need to do uncached readback from them to do the |
86 | * validation process, which is too expensive. Instead, userspace accumulates |
87 | * commands and associated state in plain memory, then the kernel copies the |
88 | * data to its own address space, and then validates and stores it in a GPU |
89 | * BO. |
90 | */ |
91 | struct drm_vc4_submit_cl { |
92 | /* Pointer to the binner command list. |
93 | * |
94 | * This is the first set of commands executed, which runs the |
95 | * coordinate shader to determine where primitives land on the screen, |
96 | * then writes out the state updates and draw calls necessary per tile |
97 | * to the tile allocation BO. |
98 | */ |
99 | __u64 bin_cl; |
100 | |
101 | /* Pointer to the shader records. |
102 | * |
103 | * Shader records are the structures read by the hardware that contain |
104 | * pointers to uniforms, shaders, and vertex attributes. The |
105 | * reference to the shader record has enough information to determine |
106 | * how many pointers are necessary (fixed number for shaders/uniforms, |
107 | * and an attribute count), so those BO indices into bo_handles are |
108 | * just stored as __u32s before each shader record passed in. |
109 | */ |
110 | __u64 shader_rec; |
111 | |
112 | /* Pointer to uniform data and texture handles for the textures |
113 | * referenced by the shader. |
114 | * |
115 | * For each shader state record, there is a set of uniform data in the |
116 | * order referenced by the record (FS, VS, then CS). Each set of |
117 | * uniform data has a __u32 index into bo_handles per texture |
118 | * sample operation, in the order the QPU_W_TMUn_S writes appear in |
119 | * the program. Following the texture BO handle indices is the actual |
120 | * uniform data. |
121 | * |
122 | * The individual uniform state blocks don't have sizes passed in, |
123 | * because the kernel has to determine the sizes anyway during shader |
124 | * code validation. |
125 | */ |
126 | __u64 uniforms; |
127 | __u64 bo_handles; |
128 | |
129 | /* Size in bytes of the binner command list. */ |
130 | __u32 bin_cl_size; |
131 | /* Size in bytes of the set of shader records. */ |
132 | __u32 shader_rec_size; |
133 | /* Number of shader records. |
134 | * |
135 | * This could just be computed from the contents of shader_records and |
136 | * the address bits of references to them from the bin CL, but it |
137 | * keeps the kernel from having to resize some allocations it makes. |
138 | */ |
139 | __u32 shader_rec_count; |
140 | /* Size in bytes of the uniform state. */ |
141 | __u32 uniforms_size; |
142 | |
143 | /* Number of BO handles passed in (size is that times 4). */ |
144 | __u32 bo_handle_count; |
145 | |
146 | /* RCL setup: */ |
147 | __u16 width; |
148 | __u16 height; |
149 | __u8 min_x_tile; |
150 | __u8 min_y_tile; |
151 | __u8 max_x_tile; |
152 | __u8 max_y_tile; |
153 | struct drm_vc4_submit_rcl_surface color_read; |
154 | struct drm_vc4_submit_rcl_surface color_write; |
155 | struct drm_vc4_submit_rcl_surface zs_read; |
156 | struct drm_vc4_submit_rcl_surface zs_write; |
157 | struct drm_vc4_submit_rcl_surface msaa_color_write; |
158 | struct drm_vc4_submit_rcl_surface msaa_zs_write; |
159 | __u32 clear_color[2]; |
160 | __u32 clear_z; |
161 | __u8 clear_s; |
162 | |
163 | __u32 pad:24; |
164 | |
165 | #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) |
166 | /* By default, the kernel gets to choose the order that the tiles are |
167 | * rendered in. If this is set, then the tiles will be rendered in a |
168 | * raster order, with the right-to-left vs left-to-right and |
169 | * top-to-bottom vs bottom-to-top dictated by |
170 | * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping |
171 | * blits to be implemented using the 3D engine. |
172 | */ |
173 | #define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) |
174 | #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) |
175 | #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) |
176 | __u32 flags; |
177 | |
178 | /* Returned value of the seqno of this render job (for the |
179 | * wait ioctl). |
180 | */ |
181 | __u64 seqno; |
182 | |
183 | /* ID of the perfmon to attach to this job. 0 means no perfmon. */ |
184 | __u32 perfmonid; |
185 | |
186 | /* Syncobj handle to wait on. If set, processing of this render job |
187 | * will not start until the syncobj is signaled. 0 means ignore. |
188 | */ |
189 | __u32 in_sync; |
190 | |
191 | /* Syncobj handle to export fence to. If set, the fence in the syncobj |
192 | * will be replaced with a fence that signals upon completion of this |
193 | * render job. 0 means ignore. |
194 | */ |
195 | __u32 out_sync; |
196 | |
197 | __u32 pad2; |
198 | }; |
199 | |
200 | /** |
201 | * struct drm_vc4_wait_seqno - ioctl argument for waiting for |
202 | * DRM_VC4_SUBMIT_CL completion using its returned seqno. |
203 | * |
204 | * timeout_ns is the timeout in nanoseconds, where "0" means "don't |
205 | * block, just return the status." |
206 | */ |
207 | struct drm_vc4_wait_seqno { |
208 | __u64 seqno; |
209 | __u64 timeout_ns; |
210 | }; |
211 | |
212 | /** |
213 | * struct drm_vc4_wait_bo - ioctl argument for waiting for |
214 | * completion of the last DRM_VC4_SUBMIT_CL on a BO. |
215 | * |
216 | * This is useful for cases where multiple processes might be |
217 | * rendering to a BO and you want to wait for all rendering to be |
218 | * completed. |
219 | */ |
220 | struct drm_vc4_wait_bo { |
221 | __u32 handle; |
222 | __u32 pad; |
223 | __u64 timeout_ns; |
224 | }; |
225 | |
226 | /** |
227 | * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. |
228 | * |
229 | * There are currently no values for the flags argument, but it may be |
230 | * used in a future extension. |
231 | */ |
232 | struct drm_vc4_create_bo { |
233 | __u32 size; |
234 | __u32 flags; |
235 | /** Returned GEM handle for the BO. */ |
236 | __u32 handle; |
237 | __u32 pad; |
238 | }; |
239 | |
240 | /** |
241 | * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs. |
242 | * |
243 | * This doesn't actually perform an mmap. Instead, it returns the |
244 | * offset you need to use in an mmap on the DRM device node. This |
245 | * means that tools like valgrind end up knowing about the mapped |
246 | * memory. |
247 | * |
248 | * There are currently no values for the flags argument, but it may be |
249 | * used in a future extension. |
250 | */ |
251 | struct drm_vc4_mmap_bo { |
252 | /** Handle for the object being mapped. */ |
253 | __u32 handle; |
254 | __u32 flags; |
255 | /** offset into the drm node to use for subsequent mmap call. */ |
256 | __u64 offset; |
257 | }; |
258 | |
259 | /** |
260 | * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4 |
261 | * shader BOs. |
262 | * |
263 | * Since allowing a shader to be overwritten while it's also being |
264 | * executed from would allow privlege escalation, shaders must be |
265 | * created using this ioctl, and they can't be mmapped later. |
266 | */ |
267 | struct drm_vc4_create_shader_bo { |
268 | /* Size of the data argument. */ |
269 | __u32 size; |
270 | /* Flags, currently must be 0. */ |
271 | __u32 flags; |
272 | |
273 | /* Pointer to the data. */ |
274 | __u64 data; |
275 | |
276 | /** Returned GEM handle for the BO. */ |
277 | __u32 handle; |
278 | /* Pad, must be 0. */ |
279 | __u32 pad; |
280 | }; |
281 | |
282 | struct drm_vc4_get_hang_state_bo { |
283 | __u32 handle; |
284 | __u32 paddr; |
285 | __u32 size; |
286 | __u32 pad; |
287 | }; |
288 | |
289 | /** |
290 | * struct drm_vc4_hang_state - ioctl argument for collecting state |
291 | * from a GPU hang for analysis. |
292 | */ |
293 | struct drm_vc4_get_hang_state { |
294 | /** Pointer to array of struct drm_vc4_get_hang_state_bo. */ |
295 | __u64 bo; |
296 | /** |
297 | * On input, the size of the bo array. Output is the number |
298 | * of bos to be returned. |
299 | */ |
300 | __u32 bo_count; |
301 | |
302 | __u32 start_bin, start_render; |
303 | |
304 | __u32 ct0ca, ct0ea; |
305 | __u32 ct1ca, ct1ea; |
306 | __u32 ct0cs, ct1cs; |
307 | __u32 ct0ra0, ct1ra0; |
308 | |
309 | __u32 bpca, bpcs; |
310 | __u32 bpoa, bpos; |
311 | |
312 | __u32 vpmbase; |
313 | |
314 | __u32 dbge; |
315 | __u32 fdbgo; |
316 | __u32 fdbgb; |
317 | __u32 fdbgr; |
318 | __u32 fdbgs; |
319 | __u32 errstat; |
320 | |
321 | /* Pad that we may save more registers into in the future. */ |
322 | __u32 pad[16]; |
323 | }; |
324 | |
325 | #define DRM_VC4_PARAM_V3D_IDENT0 0 |
326 | #define DRM_VC4_PARAM_V3D_IDENT1 1 |
327 | #define DRM_VC4_PARAM_V3D_IDENT2 2 |
328 | #define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 |
329 | #define DRM_VC4_PARAM_SUPPORTS_ETC1 4 |
330 | #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 |
331 | #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 |
332 | #define DRM_VC4_PARAM_SUPPORTS_MADVISE 7 |
333 | #define DRM_VC4_PARAM_SUPPORTS_PERFMON 8 |
334 | |
335 | struct drm_vc4_get_param { |
336 | __u32 param; |
337 | __u32 pad; |
338 | __u64 value; |
339 | }; |
340 | |
341 | struct drm_vc4_get_tiling { |
342 | __u32 handle; |
343 | __u32 flags; |
344 | __u64 modifier; |
345 | }; |
346 | |
347 | struct drm_vc4_set_tiling { |
348 | __u32 handle; |
349 | __u32 flags; |
350 | __u64 modifier; |
351 | }; |
352 | |
353 | /** |
354 | * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes. |
355 | */ |
356 | struct drm_vc4_label_bo { |
357 | __u32 handle; |
358 | __u32 len; |
359 | __u64 name; |
360 | }; |
361 | |
362 | /* |
363 | * States prefixed with '__' are internal states and cannot be passed to the |
364 | * DRM_IOCTL_VC4_GEM_MADVISE ioctl. |
365 | */ |
366 | #define VC4_MADV_WILLNEED 0 |
367 | #define VC4_MADV_DONTNEED 1 |
368 | #define __VC4_MADV_PURGED 2 |
369 | #define __VC4_MADV_NOTSUPP 3 |
370 | |
371 | struct drm_vc4_gem_madvise { |
372 | __u32 handle; |
373 | __u32 madv; |
374 | __u32 retained; |
375 | __u32 pad; |
376 | }; |
377 | |
378 | enum { |
379 | VC4_PERFCNT_FEP_VALID_PRIMS_NO_RENDER, |
380 | VC4_PERFCNT_FEP_VALID_PRIMS_RENDER, |
381 | VC4_PERFCNT_FEP_CLIPPED_QUADS, |
382 | VC4_PERFCNT_FEP_VALID_QUADS, |
383 | VC4_PERFCNT_TLB_QUADS_NOT_PASSING_STENCIL, |
384 | VC4_PERFCNT_TLB_QUADS_NOT_PASSING_Z_AND_STENCIL, |
385 | VC4_PERFCNT_TLB_QUADS_PASSING_Z_AND_STENCIL, |
386 | VC4_PERFCNT_TLB_QUADS_ZERO_COVERAGE, |
387 | VC4_PERFCNT_TLB_QUADS_NON_ZERO_COVERAGE, |
388 | VC4_PERFCNT_TLB_QUADS_WRITTEN_TO_COLOR_BUF, |
389 | VC4_PERFCNT_PLB_PRIMS_OUTSIDE_VIEWPORT, |
390 | VC4_PERFCNT_PLB_PRIMS_NEED_CLIPPING, |
391 | VC4_PERFCNT_PSE_PRIMS_REVERSED, |
392 | VC4_PERFCNT_QPU_TOTAL_IDLE_CYCLES, |
393 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING, |
394 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_FRAGMENT_SHADING, |
395 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_EXEC_VALID_INST, |
396 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_TMUS, |
397 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD, |
398 | VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_VARYINGS, |
399 | VC4_PERFCNT_QPU_TOTAL_INST_CACHE_HIT, |
400 | VC4_PERFCNT_QPU_TOTAL_INST_CACHE_MISS, |
401 | VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_HIT, |
402 | VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_MISS, |
403 | VC4_PERFCNT_TMU_TOTAL_TEXT_QUADS_PROCESSED, |
404 | VC4_PERFCNT_TMU_TOTAL_TEXT_CACHE_MISS, |
405 | VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VDW_STALLED, |
406 | VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VCD_STALLED, |
407 | VC4_PERFCNT_L2C_TOTAL_L2_CACHE_HIT, |
408 | VC4_PERFCNT_L2C_TOTAL_L2_CACHE_MISS, |
409 | VC4_PERFCNT_NUM_EVENTS, |
410 | }; |
411 | |
412 | #define DRM_VC4_MAX_PERF_COUNTERS 16 |
413 | |
414 | struct drm_vc4_perfmon_create { |
415 | __u32 id; |
416 | __u32 ncounters; |
417 | __u8 events[DRM_VC4_MAX_PERF_COUNTERS]; |
418 | }; |
419 | |
420 | struct drm_vc4_perfmon_destroy { |
421 | __u32 id; |
422 | }; |
423 | |
424 | /* |
425 | * Returns the values of the performance counters tracked by this |
426 | * perfmon (as an array of ncounters u64 values). |
427 | * |
428 | * No implicit synchronization is performed, so the user has to |
429 | * guarantee that any jobs using this perfmon have already been |
430 | * completed (probably by blocking on the seqno returned by the |
431 | * last exec that used the perfmon). |
432 | */ |
433 | struct drm_vc4_perfmon_get_values { |
434 | __u32 id; |
435 | __u64 values_ptr; |
436 | }; |
437 | |
438 | #if defined(__cplusplus) |
439 | } |
440 | #endif |
441 | |
442 | #endif /* _UAPI_VC4_DRM_H_ */ |
443 | |