1 | /* |
2 | * SPDX-License-Identifier: MIT |
3 | * |
4 | * Copyright © 2016 Intel Corporation |
5 | */ |
6 | |
7 | #ifndef __I915_GEM_OBJECT_TYPES_H__ |
8 | #define __I915_GEM_OBJECT_TYPES_H__ |
9 | |
10 | #include <linux/mmu_notifier.h> |
11 | |
12 | #include <drm/drm_gem.h> |
13 | #include <drm/ttm/ttm_bo.h> |
14 | #include <uapi/drm/i915_drm.h> |
15 | |
16 | #include "i915_active.h" |
17 | #include "i915_selftest.h" |
18 | #include "i915_vma_resource.h" |
19 | |
20 | #include "gt/intel_gt_defines.h" |
21 | |
22 | struct drm_i915_gem_object; |
23 | struct intel_fronbuffer; |
24 | struct intel_memory_region; |
25 | |
26 | /* |
27 | * struct i915_lut_handle tracks the fast lookups from handle to vma used |
28 | * for execbuf. Although we use a radixtree for that mapping, in order to |
29 | * remove them as the object or context is closed, we need a secondary list |
30 | * and a translation entry (i915_lut_handle). |
31 | */ |
32 | struct i915_lut_handle { |
33 | struct list_head obj_link; |
34 | struct i915_gem_context *ctx; |
35 | u32 handle; |
36 | }; |
37 | |
38 | struct drm_i915_gem_object_ops { |
39 | unsigned int flags; |
40 | #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) |
41 | /* Skip the shrinker management in set_pages/unset_pages */ |
42 | #define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) |
43 | #define I915_GEM_OBJECT_IS_PROXY BIT(3) |
44 | #define I915_GEM_OBJECT_NO_MMAP BIT(4) |
45 | |
46 | /* Interface between the GEM object and its backing storage. |
47 | * get_pages() is called once prior to the use of the associated set |
48 | * of pages before to binding them into the GTT, and put_pages() is |
49 | * called after we no longer need them. As we expect there to be |
50 | * associated cost with migrating pages between the backing storage |
51 | * and making them available for the GPU (e.g. clflush), we may hold |
52 | * onto the pages after they are no longer referenced by the GPU |
53 | * in case they may be used again shortly (for example migrating the |
54 | * pages to a different memory domain within the GTT). put_pages() |
55 | * will therefore most likely be called when the object itself is |
56 | * being released or under memory pressure (where we attempt to |
57 | * reap pages for the shrinker). |
58 | */ |
59 | int (*get_pages)(struct drm_i915_gem_object *obj); |
60 | void (*put_pages)(struct drm_i915_gem_object *obj, |
61 | struct sg_table *pages); |
62 | int (*truncate)(struct drm_i915_gem_object *obj); |
63 | /** |
64 | * shrink - Perform further backend specific actions to facilate |
65 | * shrinking. |
66 | * @obj: The gem object |
67 | * @flags: Extra flags to control shrinking behaviour in the backend |
68 | * |
69 | * Possible values for @flags: |
70 | * |
71 | * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the |
72 | * backing pages, if supported. |
73 | * |
74 | * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to |
75 | * idle. Active objects can be considered later. The TTM backend for |
76 | * example might have aync migrations going on, which don't use any |
77 | * i915_vma to track the active GTT binding, and hence having an unbound |
78 | * object might not be enough. |
79 | */ |
80 | #define I915_GEM_OBJECT_SHRINK_WRITEBACK BIT(0) |
81 | #define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1) |
82 | int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags); |
83 | |
84 | int (*pread)(struct drm_i915_gem_object *obj, |
85 | const struct drm_i915_gem_pread *arg); |
86 | int (*pwrite)(struct drm_i915_gem_object *obj, |
87 | const struct drm_i915_gem_pwrite *arg); |
88 | u64 (*mmap_offset)(struct drm_i915_gem_object *obj); |
89 | void (*unmap_virtual)(struct drm_i915_gem_object *obj); |
90 | |
91 | int (*dmabuf_export)(struct drm_i915_gem_object *obj); |
92 | |
93 | /** |
94 | * adjust_lru - notify that the madvise value was updated |
95 | * @obj: The gem object |
96 | * |
97 | * The madvise value may have been updated, or object was recently |
98 | * referenced so act accordingly (Perhaps changing an LRU list etc). |
99 | */ |
100 | void (*adjust_lru)(struct drm_i915_gem_object *obj); |
101 | |
102 | /** |
103 | * delayed_free - Override the default delayed free implementation |
104 | */ |
105 | void (*delayed_free)(struct drm_i915_gem_object *obj); |
106 | |
107 | /** |
108 | * migrate - Migrate object to a different region either for |
109 | * pinning or for as long as the object lock is held. |
110 | */ |
111 | int (*migrate)(struct drm_i915_gem_object *obj, |
112 | struct intel_memory_region *mr, |
113 | unsigned int flags); |
114 | |
115 | void (*release)(struct drm_i915_gem_object *obj); |
116 | |
117 | const struct vm_operations_struct *mmap_ops; |
118 | const char *name; /* friendly name for debug, e.g. lockdep classes */ |
119 | }; |
120 | |
121 | /** |
122 | * enum i915_cache_level - The supported GTT caching values for system memory |
123 | * pages. |
124 | * |
125 | * These translate to some special GTT PTE bits when binding pages into some |
126 | * address space. It also determines whether an object, or rather its pages are |
127 | * coherent with the GPU, when also reading or writing through the CPU cache |
128 | * with those pages. |
129 | * |
130 | * Userspace can also control this through struct drm_i915_gem_caching. |
131 | */ |
132 | enum i915_cache_level { |
133 | /** |
134 | * @I915_CACHE_NONE: |
135 | * |
136 | * GPU access is not coherent with the CPU cache. If the cache is dirty |
137 | * and we need the underlying pages to be coherent with some later GPU |
138 | * access then we need to manually flush the pages. |
139 | * |
140 | * On shared LLC platforms reads and writes through the CPU cache are |
141 | * still coherent even with this setting. See also |
142 | * &drm_i915_gem_object.cache_coherent for more details. Due to this we |
143 | * should only ever use uncached for scanout surfaces, otherwise we end |
144 | * up over-flushing in some places. |
145 | * |
146 | * This is the default on non-LLC platforms. |
147 | */ |
148 | I915_CACHE_NONE = 0, |
149 | /** |
150 | * @I915_CACHE_LLC: |
151 | * |
152 | * GPU access is coherent with the CPU cache. If the cache is dirty, |
153 | * then the GPU will ensure that access remains coherent, when both |
154 | * reading and writing through the CPU cache. GPU writes can dirty the |
155 | * CPU cache. |
156 | * |
157 | * Not used for scanout surfaces. |
158 | * |
159 | * Applies to both platforms with shared LLC(HAS_LLC), and snooping |
160 | * based platforms(HAS_SNOOP). |
161 | * |
162 | * This is the default on shared LLC platforms. The only exception is |
163 | * scanout objects, where the display engine is not coherent with the |
164 | * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is |
165 | * automatically applied by the kernel in pin_for_display, if userspace |
166 | * has not done so already. |
167 | */ |
168 | I915_CACHE_LLC, |
169 | /** |
170 | * @I915_CACHE_L3_LLC: |
171 | * |
172 | * Explicitly enable the Gfx L3 cache, with coherent LLC. |
173 | * |
174 | * The Gfx L3 sits between the domain specific caches, e.g |
175 | * sampler/render caches, and the larger LLC. LLC is coherent with the |
176 | * GPU, but L3 is only visible to the GPU, so likely needs to be flushed |
177 | * when the workload completes. |
178 | * |
179 | * Not used for scanout surfaces. |
180 | * |
181 | * Only exposed on some gen7 + GGTT. More recent hardware has dropped |
182 | * this explicit setting, where it should now be enabled by default. |
183 | */ |
184 | I915_CACHE_L3_LLC, |
185 | /** |
186 | * @I915_CACHE_WT: |
187 | * |
188 | * Write-through. Used for scanout surfaces. |
189 | * |
190 | * The GPU can utilise the caches, while still having the display engine |
191 | * be coherent with GPU writes, as a result we don't need to flush the |
192 | * CPU caches when moving out of the render domain. This is the default |
193 | * setting chosen by the kernel, if supported by the HW, otherwise we |
194 | * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU |
195 | * cache still need to be flushed, to remain coherent with the display |
196 | * engine. |
197 | */ |
198 | I915_CACHE_WT, |
199 | /** |
200 | * @I915_MAX_CACHE_LEVEL: |
201 | * |
202 | * Mark the last entry in the enum. Used for defining cachelevel_to_pat |
203 | * array for cache_level to pat translation table. |
204 | */ |
205 | I915_MAX_CACHE_LEVEL, |
206 | }; |
207 | |
208 | enum i915_map_type { |
209 | I915_MAP_WB = 0, |
210 | I915_MAP_WC, |
211 | #define I915_MAP_OVERRIDE BIT(31) |
212 | I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, |
213 | I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, |
214 | }; |
215 | |
216 | enum i915_mmap_type { |
217 | I915_MMAP_TYPE_GTT = 0, |
218 | I915_MMAP_TYPE_WC, |
219 | I915_MMAP_TYPE_WB, |
220 | I915_MMAP_TYPE_UC, |
221 | I915_MMAP_TYPE_FIXED, |
222 | }; |
223 | |
224 | struct i915_mmap_offset { |
225 | struct drm_vma_offset_node vma_node; |
226 | struct drm_i915_gem_object *obj; |
227 | enum i915_mmap_type mmap_type; |
228 | |
229 | struct rb_node offset; |
230 | }; |
231 | |
232 | struct i915_gem_object_page_iter { |
233 | struct scatterlist *sg_pos; |
234 | unsigned int sg_idx; /* in pages, but 32bit eek! */ |
235 | |
236 | struct radix_tree_root radix; |
237 | struct mutex lock; /* protects this cache */ |
238 | }; |
239 | |
240 | struct drm_i915_gem_object { |
241 | /* |
242 | * We might have reason to revisit the below since it wastes |
243 | * a lot of space for non-ttm gem objects. |
244 | * In any case, always use the accessors for the ttm_buffer_object |
245 | * when accessing it. |
246 | */ |
247 | union { |
248 | struct drm_gem_object base; |
249 | struct ttm_buffer_object __do_not_access; |
250 | }; |
251 | |
252 | const struct drm_i915_gem_object_ops *ops; |
253 | |
254 | struct { |
255 | /** |
256 | * @vma.lock: protect the list/tree of vmas |
257 | */ |
258 | spinlock_t lock; |
259 | |
260 | /** |
261 | * @vma.list: List of VMAs backed by this object |
262 | * |
263 | * The VMA on this list are ordered by type, all GGTT vma are |
264 | * placed at the head and all ppGTT vma are placed at the tail. |
265 | * The different types of GGTT vma are unordered between |
266 | * themselves, use the @vma.tree (which has a defined order |
267 | * between all VMA) to quickly find an exact match. |
268 | */ |
269 | struct list_head list; |
270 | |
271 | /** |
272 | * @vma.tree: Ordered tree of VMAs backed by this object |
273 | * |
274 | * All VMA created for this object are placed in the @vma.tree |
275 | * for fast retrieval via a binary search in |
276 | * i915_vma_instance(). They are also added to @vma.list for |
277 | * easy iteration. |
278 | */ |
279 | struct rb_root tree; |
280 | } vma; |
281 | |
282 | /** |
283 | * @lut_list: List of vma lookup entries in use for this object. |
284 | * |
285 | * If this object is closed, we need to remove all of its VMA from |
286 | * the fast lookup index in associated contexts; @lut_list provides |
287 | * this translation from object to context->handles_vma. |
288 | */ |
289 | struct list_head lut_list; |
290 | spinlock_t lut_lock; /* guards lut_list */ |
291 | |
292 | /** |
293 | * @obj_link: Link into @i915_gem_ww_ctx.obj_list |
294 | * |
295 | * When we lock this object through i915_gem_object_lock() with a |
296 | * context, we add it to the list to ensure we can unlock everything |
297 | * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. |
298 | */ |
299 | struct list_head obj_link; |
300 | /** |
301 | * @shared_resv_from: The object shares the resv from this vm. |
302 | */ |
303 | struct i915_address_space *shares_resv_from; |
304 | |
305 | union { |
306 | struct rcu_head rcu; |
307 | struct llist_node freed; |
308 | }; |
309 | |
310 | /** |
311 | * Whether the object is currently in the GGTT or any other supported |
312 | * fake offset mmap backed by lmem. |
313 | */ |
314 | unsigned int userfault_count; |
315 | struct list_head userfault_link; |
316 | |
317 | struct { |
318 | spinlock_t lock; /* Protects access to mmo offsets */ |
319 | struct rb_root offsets; |
320 | } mmo; |
321 | |
322 | I915_SELFTEST_DECLARE(struct list_head st_link); |
323 | |
324 | unsigned long flags; |
325 | #define I915_BO_ALLOC_CONTIGUOUS BIT(0) |
326 | #define I915_BO_ALLOC_VOLATILE BIT(1) |
327 | #define I915_BO_ALLOC_CPU_CLEAR BIT(2) |
328 | #define I915_BO_ALLOC_USER BIT(3) |
329 | /* Object is allowed to lose its contents on suspend / resume, even if pinned */ |
330 | #define I915_BO_ALLOC_PM_VOLATILE BIT(4) |
331 | /* Object needs to be restored early using memcpy during resume */ |
332 | #define I915_BO_ALLOC_PM_EARLY BIT(5) |
333 | /* |
334 | * Object is likely never accessed by the CPU. This will prioritise the BO to be |
335 | * allocated in the non-mappable portion of lmem. This is merely a hint, and if |
336 | * dealing with userspace objects the CPU fault handler is free to ignore this. |
337 | */ |
338 | #define I915_BO_ALLOC_GPU_ONLY BIT(6) |
339 | #define I915_BO_ALLOC_CCS_AUX BIT(7) |
340 | /* |
341 | * Object is allowed to retain its initial data and will not be cleared on first |
342 | * access if used along with I915_BO_ALLOC_USER. This is mainly to keep |
343 | * preallocated framebuffer data intact while transitioning it to i915drmfb. |
344 | */ |
345 | #define I915_BO_PREALLOC BIT(8) |
346 | #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ |
347 | I915_BO_ALLOC_VOLATILE | \ |
348 | I915_BO_ALLOC_CPU_CLEAR | \ |
349 | I915_BO_ALLOC_USER | \ |
350 | I915_BO_ALLOC_PM_VOLATILE | \ |
351 | I915_BO_ALLOC_PM_EARLY | \ |
352 | I915_BO_ALLOC_GPU_ONLY | \ |
353 | I915_BO_ALLOC_CCS_AUX | \ |
354 | I915_BO_PREALLOC) |
355 | #define I915_BO_READONLY BIT(9) |
356 | #define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */ |
357 | #define I915_BO_PROTECTED BIT(11) |
358 | /** |
359 | * @mem_flags - Mutable placement-related flags |
360 | * |
361 | * These are flags that indicate specifics of the memory region |
362 | * the object is currently in. As such they are only stable |
363 | * either under the object lock or if the object is pinned. |
364 | */ |
365 | unsigned int mem_flags; |
366 | #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ |
367 | #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ |
368 | /** |
369 | * @pat_index: The desired PAT index. |
370 | * |
371 | * See hardware specification for valid PAT indices for each platform. |
372 | * This field replaces the @cache_level that contains a value of enum |
373 | * i915_cache_level since PAT indices are being used by both userspace |
374 | * and kernel mode driver for caching policy control after GEN12. |
375 | * In the meantime platform specific tables are created to translate |
376 | * i915_cache_level into pat index, for more details check the macros |
377 | * defined i915/i915_pci.c, e.g. PVC_CACHELEVEL. |
378 | * For backward compatibility, this field contains values exactly match |
379 | * the entries of enum i915_cache_level for pre-GEN12 platforms (See |
380 | * LEGACY_CACHELEVEL), so that the PTE encode functions for these |
381 | * legacy platforms can stay the same. |
382 | */ |
383 | unsigned int pat_index:6; |
384 | /** |
385 | * @pat_set_by_user: Indicate whether pat_index is set by user space |
386 | * |
387 | * This field is set to false by default, only set to true if the |
388 | * pat_index is set by user space. By design, user space is capable of |
389 | * managing caching behavior by setting pat_index, in which case this |
390 | * kernel mode driver should never touch the pat_index. |
391 | */ |
392 | unsigned int pat_set_by_user:1; |
393 | /** |
394 | * @cache_coherent: |
395 | * |
396 | * Note: with the change above which replaced @cache_level with pat_index, |
397 | * the use of @cache_coherent is limited to the objects created by kernel |
398 | * or by userspace without pat index specified. |
399 | * Check for @pat_set_by_user to find out if an object has pat index set |
400 | * by userspace. The ioctl's to change cache settings have also been |
401 | * disabled for the objects with pat index set by userspace. Please don't |
402 | * assume @cache_coherent having the flags set as describe here. A helper |
403 | * function i915_gem_object_has_cache_level() provides one way to bypass |
404 | * the use of this field. |
405 | * |
406 | * Track whether the pages are coherent with the GPU if reading or |
407 | * writing through the CPU caches. The largely depends on the |
408 | * @cache_level setting. |
409 | * |
410 | * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom |
411 | * platforms, coherency must be explicitly requested with some special |
412 | * GTT caching bits(see enum i915_cache_level). When enabling coherency |
413 | * it does come at a performance and power cost on such platforms. On |
414 | * the flip side the kernel does not need to manually flush any buffers |
415 | * which need to be coherent with the GPU, if the object is not coherent |
416 | * i.e @cache_coherent is zero. |
417 | * |
418 | * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory |
419 | * access will automatically snoop the CPU caches(even with CACHE_NONE). |
420 | * The one exception is when dealing with the display engine, like with |
421 | * scanout surfaces. To handle this the kernel will always flush the |
422 | * surface out of the CPU caches when preparing it for scanout. Also |
423 | * note that since scanout surfaces are only ever read by the display |
424 | * engine we only need to care about flushing any writes through the CPU |
425 | * cache, reads on the other hand will always be coherent. |
426 | * |
427 | * Something strange here is why @cache_coherent is not a simple |
428 | * boolean, i.e coherent vs non-coherent. The reasoning for this is back |
429 | * to the display engine not being fully coherent. As a result scanout |
430 | * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. |
431 | * In the case of seeing I915_CACHE_NONE the kernel makes the assumption |
432 | * that this is likely a scanout surface, and will set @cache_coherent |
433 | * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared |
434 | * LLC. The kernel uses this to always flush writes through the CPU |
435 | * cache as early as possible, where it can, in effect keeping |
436 | * @cache_dirty clean, so we can potentially avoid stalling when |
437 | * flushing the surface just before doing the scanout. This does mean |
438 | * we might unnecessarily flush non-scanout objects in some places, but |
439 | * the default assumption is that all normal objects should be using |
440 | * I915_CACHE_LLC, at least on platforms with the shared LLC. |
441 | * |
442 | * Supported values: |
443 | * |
444 | * I915_BO_CACHE_COHERENT_FOR_READ: |
445 | * |
446 | * On shared LLC platforms, we use this for special scanout surfaces, |
447 | * where the display engine is not coherent with the CPU cache. As such |
448 | * we need to ensure we flush any writes before doing the scanout. As an |
449 | * optimisation we try to flush any writes as early as possible to avoid |
450 | * stalling later. |
451 | * |
452 | * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC |
453 | * platforms, we use: |
454 | * |
455 | * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
456 | * |
457 | * While for normal objects that are fully coherent, including special |
458 | * scanout surfaces marked as I915_CACHE_WT, we use: |
459 | * |
460 | * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | |
461 | * I915_BO_CACHE_COHERENT_FOR_WRITE |
462 | * |
463 | * And then for objects that are not coherent at all we use: |
464 | * |
465 | * cache_coherent = 0 |
466 | * |
467 | * I915_BO_CACHE_COHERENT_FOR_WRITE: |
468 | * |
469 | * When writing through the CPU cache, the GPU is still coherent. Note |
470 | * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. |
471 | */ |
472 | #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) |
473 | #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) |
474 | unsigned int cache_coherent:2; |
475 | |
476 | /** |
477 | * @cache_dirty: |
478 | * |
479 | * Note: with the change above which replaced cache_level with pat_index, |
480 | * the use of @cache_dirty is limited to the objects created by kernel |
481 | * or by userspace without pat index specified. |
482 | * Check for @pat_set_by_user to find out if an object has pat index set |
483 | * by userspace. The ioctl's to change cache settings have also been |
484 | * disabled for the objects with pat_index set by userspace. Please don't |
485 | * assume @cache_dirty is set as describe here. Also see helper function |
486 | * i915_gem_object_has_cache_level() for possible ways to bypass the use |
487 | * of this field. |
488 | * |
489 | * Track if we are we dirty with writes through the CPU cache for this |
490 | * object. As a result reading directly from main memory might yield |
491 | * stale data. |
492 | * |
493 | * This also ties into whether the kernel is tracking the object as |
494 | * coherent with the GPU, as per @cache_coherent, as it determines if |
495 | * flushing might be needed at various points. |
496 | * |
497 | * Another part of @cache_dirty is managing flushing when first |
498 | * acquiring the pages for system memory, at this point the pages are |
499 | * considered foreign, so the default assumption is that the cache is |
500 | * dirty, for example the page zeroing done by the kernel might leave |
501 | * writes though the CPU cache, or swapping-in, while the actual data in |
502 | * main memory is potentially stale. Note that this is a potential |
503 | * security issue when dealing with userspace objects and zeroing. Now, |
504 | * whether we actually need apply the big sledgehammer of flushing all |
505 | * the pages on acquire depends on if @cache_coherent is marked as |
506 | * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent |
507 | * for both reads and writes though the CPU cache. |
508 | * |
509 | * Note that on shared LLC platforms we still apply the heavy flush for |
510 | * I915_CACHE_NONE objects, under the assumption that this is going to |
511 | * be used for scanout. |
512 | * |
513 | * Update: On some hardware there is now also the 'Bypass LLC' MOCS |
514 | * entry, which defeats our @cache_coherent tracking, since userspace |
515 | * can freely bypass the CPU cache when touching the pages with the GPU, |
516 | * where the kernel is completely unaware. On such platform we need |
517 | * apply the sledgehammer-on-acquire regardless of the @cache_coherent. |
518 | * |
519 | * Special care is taken on non-LLC platforms, to prevent potential |
520 | * information leak. The driver currently ensures: |
521 | * |
522 | * 1. All userspace objects, by default, have @cache_level set as |
523 | * I915_CACHE_NONE. The only exception is userptr objects, where we |
524 | * instead force I915_CACHE_LLC, but we also don't allow userspace to |
525 | * ever change the @cache_level for such objects. Another special case |
526 | * is dma-buf, which doesn't rely on @cache_dirty, but there we |
527 | * always do a forced flush when acquiring the pages, if there is a |
528 | * chance that the pages can be read directly from main memory with |
529 | * the GPU. |
530 | * |
531 | * 2. All I915_CACHE_NONE objects have @cache_dirty initially true. |
532 | * |
533 | * 3. All swapped-out objects(i.e shmem) have @cache_dirty set to |
534 | * true. |
535 | * |
536 | * 4. The @cache_dirty is never freely reset before the initial |
537 | * flush, even if userspace adjusts the @cache_level through the |
538 | * i915_gem_set_caching_ioctl. |
539 | * |
540 | * 5. All @cache_dirty objects(including swapped-in) are initially |
541 | * flushed with a synchronous call to drm_clflush_sg in |
542 | * __i915_gem_object_set_pages. The @cache_dirty can be freely reset |
543 | * at this point. All further asynchronous clfushes are never security |
544 | * critical, i.e userspace is free to race against itself. |
545 | */ |
546 | unsigned int cache_dirty:1; |
547 | |
548 | /* @is_dpt: Object houses a display page table (DPT) */ |
549 | unsigned int is_dpt:1; |
550 | |
551 | /** |
552 | * @read_domains: Read memory domains. |
553 | * |
554 | * These monitor which caches contain read/write data related to the |
555 | * object. When transitioning from one set of domains to another, |
556 | * the driver is called to ensure that caches are suitably flushed and |
557 | * invalidated. |
558 | */ |
559 | u16 read_domains; |
560 | |
561 | /** |
562 | * @write_domain: Corresponding unique write memory domain. |
563 | */ |
564 | u16 write_domain; |
565 | |
566 | struct intel_frontbuffer __rcu *frontbuffer; |
567 | |
568 | /** Current tiling stride for the object, if it's tiled. */ |
569 | unsigned int tiling_and_stride; |
570 | #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ |
571 | #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1) |
572 | #define STRIDE_MASK (~TILING_MASK) |
573 | |
574 | struct { |
575 | /* |
576 | * Protects the pages and their use. Do not use directly, but |
577 | * instead go through the pin/unpin interfaces. |
578 | */ |
579 | atomic_t pages_pin_count; |
580 | |
581 | /** |
582 | * @shrink_pin: Prevents the pages from being made visible to |
583 | * the shrinker, while the shrink_pin is non-zero. Most users |
584 | * should pretty much never have to care about this, outside of |
585 | * some special use cases. |
586 | * |
587 | * By default most objects will start out as visible to the |
588 | * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the |
589 | * backing pages are attached to the object, like in |
590 | * __i915_gem_object_set_pages(). They will then be removed the |
591 | * shrinker list once the pages are released. |
592 | * |
593 | * The @shrink_pin is incremented by calling |
594 | * i915_gem_object_make_unshrinkable(), which will also remove |
595 | * the object from the shrinker list, if the pin count was zero. |
596 | * |
597 | * Callers will then typically call |
598 | * i915_gem_object_make_shrinkable() or |
599 | * i915_gem_object_make_purgeable() to decrement the pin count, |
600 | * and make the pages visible again. |
601 | */ |
602 | atomic_t shrink_pin; |
603 | |
604 | /** |
605 | * @ttm_shrinkable: True when the object is using shmem pages |
606 | * underneath. Protected by the object lock. |
607 | */ |
608 | bool ttm_shrinkable; |
609 | |
610 | /** |
611 | * @unknown_state: Indicate that the object is effectively |
612 | * borked. This is write-once and set if we somehow encounter a |
613 | * fatal error when moving/clearing the pages, and we are not |
614 | * able to fallback to memcpy/memset, like on small-BAR systems. |
615 | * The GPU should also be wedged (or in the process) at this |
616 | * point. |
617 | * |
618 | * Only valid to read this after acquiring the dma-resv lock and |
619 | * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled, |
620 | * or if we otherwise know that the moving fence has signalled, |
621 | * and we are certain the pages underneath are valid for |
622 | * immediate access (under normal operation), like just prior to |
623 | * binding the object or when setting up the CPU fault handler. |
624 | * See i915_gem_object_has_unknown_state(); |
625 | */ |
626 | bool unknown_state; |
627 | |
628 | /** |
629 | * Priority list of potential placements for this object. |
630 | */ |
631 | struct intel_memory_region **placements; |
632 | int n_placements; |
633 | |
634 | /** |
635 | * Memory region for this object. |
636 | */ |
637 | struct intel_memory_region *region; |
638 | |
639 | /** |
640 | * Memory manager resource allocated for this object. Only |
641 | * needed for the mock region. |
642 | */ |
643 | struct ttm_resource *res; |
644 | |
645 | /** |
646 | * Element within memory_region->objects or region->purgeable |
647 | * if the object is marked as DONTNEED. Access is protected by |
648 | * region->obj_lock. |
649 | */ |
650 | struct list_head region_link; |
651 | |
652 | struct i915_refct_sgt *rsgt; |
653 | struct sg_table *pages; |
654 | void *mapping; |
655 | |
656 | struct i915_page_sizes page_sizes; |
657 | |
658 | I915_SELFTEST_DECLARE(unsigned int page_mask); |
659 | |
660 | struct i915_gem_object_page_iter get_page; |
661 | struct i915_gem_object_page_iter get_dma_page; |
662 | |
663 | /** |
664 | * Element within i915->mm.shrink_list or i915->mm.purge_list, |
665 | * locked by i915->mm.obj_lock. |
666 | */ |
667 | struct list_head link; |
668 | |
669 | /** |
670 | * Advice: are the backing pages purgeable? |
671 | */ |
672 | unsigned int madv:2; |
673 | |
674 | /** |
675 | * This is set if the object has been written to since the |
676 | * pages were last acquired. |
677 | */ |
678 | bool dirty:1; |
679 | |
680 | u32 tlb[I915_MAX_GT]; |
681 | } mm; |
682 | |
683 | struct { |
684 | struct i915_refct_sgt *cached_io_rsgt; |
685 | struct i915_gem_object_page_iter get_io_page; |
686 | struct drm_i915_gem_object *backup; |
687 | bool created:1; |
688 | } ttm; |
689 | |
690 | /* |
691 | * Record which PXP key instance this object was created against (if |
692 | * any), so we can use it to determine if the encryption is valid by |
693 | * comparing against the current key instance. |
694 | */ |
695 | u32 pxp_key_instance; |
696 | |
697 | /** Record of address bit 17 of each page at last unbind. */ |
698 | unsigned long *bit_17; |
699 | |
700 | union { |
701 | #ifdef CONFIG_MMU_NOTIFIER |
702 | struct i915_gem_userptr { |
703 | uintptr_t ptr; |
704 | unsigned long notifier_seq; |
705 | |
706 | struct mmu_interval_notifier notifier; |
707 | struct page **pvec; |
708 | int page_ref; |
709 | } userptr; |
710 | #endif |
711 | |
712 | struct drm_mm_node *stolen; |
713 | |
714 | resource_size_t bo_offset; |
715 | |
716 | unsigned long scratch; |
717 | u64 encode; |
718 | |
719 | void *gvt_info; |
720 | }; |
721 | }; |
722 | |
723 | #define intel_bo_to_drm_bo(bo) (&(bo)->base) |
724 | #define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev) |
725 | |
726 | static inline struct drm_i915_gem_object * |
727 | to_intel_bo(struct drm_gem_object *gem) |
728 | { |
729 | /* Assert that to_intel_bo(NULL) == NULL */ |
730 | BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); |
731 | |
732 | return container_of(gem, struct drm_i915_gem_object, base); |
733 | } |
734 | |
735 | #endif |
736 | |