1 | /* |
2 | * Copyright 2018 Advanced Micro Devices, Inc. |
3 | * All Rights Reserved. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
24 | * |
25 | */ |
26 | #ifndef __AMDGPU_GMC_H__ |
27 | #define __AMDGPU_GMC_H__ |
28 | |
29 | #include <linux/types.h> |
30 | |
31 | #include "amdgpu_irq.h" |
32 | #include "amdgpu_ras.h" |
33 | |
34 | /* VA hole for 48bit addresses on Vega10 */ |
35 | #define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL |
36 | #define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL |
37 | |
38 | /* |
39 | * Hardware is programmed as if the hole doesn't exists with start and end |
40 | * address values. |
41 | * |
42 | * This mask is used to remove the upper 16bits of the VA and so come up with |
43 | * the linear addr value. |
44 | */ |
45 | #define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL |
46 | |
47 | /* |
48 | * Ring size as power of two for the log of recent faults. |
49 | */ |
50 | #define AMDGPU_GMC_FAULT_RING_ORDER 8 |
51 | #define AMDGPU_GMC_FAULT_RING_SIZE (1 << AMDGPU_GMC_FAULT_RING_ORDER) |
52 | |
53 | /* |
54 | * Hash size as power of two for the log of recent faults |
55 | */ |
56 | #define AMDGPU_GMC_FAULT_HASH_ORDER 8 |
57 | #define AMDGPU_GMC_FAULT_HASH_SIZE (1 << AMDGPU_GMC_FAULT_HASH_ORDER) |
58 | |
59 | /* |
60 | * Number of IH timestamp ticks until a fault is considered handled |
61 | */ |
62 | #define AMDGPU_GMC_FAULT_TIMEOUT 5000ULL |
63 | |
64 | struct firmware; |
65 | |
66 | enum amdgpu_memory_partition { |
67 | UNKNOWN_MEMORY_PARTITION_MODE = 0, |
68 | AMDGPU_NPS1_PARTITION_MODE = 1, |
69 | AMDGPU_NPS2_PARTITION_MODE = 2, |
70 | AMDGPU_NPS3_PARTITION_MODE = 3, |
71 | AMDGPU_NPS4_PARTITION_MODE = 4, |
72 | AMDGPU_NPS6_PARTITION_MODE = 6, |
73 | AMDGPU_NPS8_PARTITION_MODE = 8, |
74 | }; |
75 | |
76 | /* |
77 | * GMC page fault information |
78 | */ |
79 | struct amdgpu_gmc_fault { |
80 | uint64_t timestamp:48; |
81 | uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; |
82 | atomic64_t key; |
83 | uint64_t timestamp_expiry:48; |
84 | }; |
85 | |
86 | /* |
87 | * VMHUB structures, functions & helpers |
88 | */ |
89 | struct amdgpu_vmhub_funcs { |
90 | void (*print_l2_protection_fault_status)(struct amdgpu_device *adev, |
91 | uint32_t status); |
92 | uint32_t (*get_invalidate_req)(unsigned int vmid, uint32_t flush_type); |
93 | }; |
94 | |
95 | struct amdgpu_vmhub { |
96 | uint32_t ctx0_ptb_addr_lo32; |
97 | uint32_t ctx0_ptb_addr_hi32; |
98 | uint32_t vm_inv_eng0_sem; |
99 | uint32_t vm_inv_eng0_req; |
100 | uint32_t vm_inv_eng0_ack; |
101 | uint32_t vm_context0_cntl; |
102 | uint32_t vm_l2_pro_fault_status; |
103 | uint32_t vm_l2_pro_fault_cntl; |
104 | |
105 | /* |
106 | * store the register distances between two continuous context domain |
107 | * and invalidation engine. |
108 | */ |
109 | uint32_t ctx_distance; |
110 | uint32_t ctx_addr_distance; /* include LO32/HI32 */ |
111 | uint32_t eng_distance; |
112 | uint32_t eng_addr_distance; /* include LO32/HI32 */ |
113 | |
114 | uint32_t vm_cntx_cntl; |
115 | uint32_t vm_cntx_cntl_vm_fault; |
116 | uint32_t vm_l2_bank_select_reserved_cid2; |
117 | |
118 | uint32_t vm_contexts_disable; |
119 | |
120 | bool sdma_invalidation_workaround; |
121 | |
122 | const struct amdgpu_vmhub_funcs *vmhub_funcs; |
123 | }; |
124 | |
125 | /* |
126 | * GPU MC structures, functions & helpers |
127 | */ |
128 | struct amdgpu_gmc_funcs { |
129 | /* flush the vm tlb via mmio */ |
130 | void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, |
131 | uint32_t vmhub, uint32_t flush_type); |
132 | /* flush the vm tlb via pasid */ |
133 | void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, |
134 | uint32_t flush_type, bool all_hub, |
135 | uint32_t inst); |
136 | /* flush the vm tlb via ring */ |
137 | uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, |
138 | uint64_t pd_addr); |
139 | /* Change the VMID -> PASID mapping */ |
140 | void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, |
141 | unsigned pasid); |
142 | /* enable/disable PRT support */ |
143 | void (*set_prt)(struct amdgpu_device *adev, bool enable); |
144 | /* map mtype to hardware flags */ |
145 | uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags); |
146 | /* get the pde for a given mc addr */ |
147 | void (*get_vm_pde)(struct amdgpu_device *adev, int level, |
148 | u64 *dst, u64 *flags); |
149 | /* get the pte flags to use for a BO VA mapping */ |
150 | void (*get_vm_pte)(struct amdgpu_device *adev, |
151 | struct amdgpu_bo_va_mapping *mapping, |
152 | uint64_t *flags); |
153 | /* override per-page pte flags */ |
154 | void (*override_vm_pte_flags)(struct amdgpu_device *dev, |
155 | struct amdgpu_vm *vm, |
156 | uint64_t addr, uint64_t *flags); |
157 | /* get the amount of memory used by the vbios for pre-OS console */ |
158 | unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); |
159 | |
160 | enum amdgpu_memory_partition (*query_mem_partition_mode)( |
161 | struct amdgpu_device *adev); |
162 | }; |
163 | |
164 | struct amdgpu_xgmi_ras { |
165 | struct amdgpu_ras_block_object ras_block; |
166 | }; |
167 | |
168 | struct amdgpu_xgmi { |
169 | /* from psp */ |
170 | u64 node_id; |
171 | u64 hive_id; |
172 | /* fixed per family */ |
173 | u64 node_segment_size; |
174 | /* physical node (0-3) */ |
175 | unsigned physical_node_id; |
176 | /* number of nodes (0-4) */ |
177 | unsigned num_physical_nodes; |
178 | /* gpu list in the same hive */ |
179 | struct list_head head; |
180 | bool supported; |
181 | struct ras_common_if *ras_if; |
182 | bool connected_to_cpu; |
183 | bool pending_reset; |
184 | struct amdgpu_xgmi_ras *ras; |
185 | }; |
186 | |
187 | struct amdgpu_mem_partition_info { |
188 | union { |
189 | struct { |
190 | uint32_t fpfn; |
191 | uint32_t lpfn; |
192 | } range; |
193 | struct { |
194 | int node; |
195 | } numa; |
196 | }; |
197 | uint64_t size; |
198 | }; |
199 | |
200 | #define INVALID_PFN -1 |
201 | |
202 | enum amdgpu_gart_placement { |
203 | AMDGPU_GART_PLACEMENT_BEST_FIT = 0, |
204 | AMDGPU_GART_PLACEMENT_HIGH, |
205 | AMDGPU_GART_PLACEMENT_LOW, |
206 | }; |
207 | |
208 | struct amdgpu_gmc { |
209 | /* FB's physical address in MMIO space (for CPU to |
210 | * map FB). This is different compared to the agp/ |
211 | * gart/vram_start/end field as the later is from |
212 | * GPU's view and aper_base is from CPU's view. |
213 | */ |
214 | resource_size_t aper_size; |
215 | resource_size_t aper_base; |
216 | /* for some chips with <= 32MB we need to lie |
217 | * about vram size near mc fb location */ |
218 | u64 mc_vram_size; |
219 | u64 visible_vram_size; |
220 | /* AGP aperture start and end in MC address space |
221 | * Driver find a hole in the MC address space |
222 | * to place AGP by setting MC_VM_AGP_BOT/TOP registers |
223 | * Under VMID0, logical address == MC address. AGP |
224 | * aperture maps to physical bus or IOVA addressed. |
225 | * AGP aperture is used to simulate FB in ZFB case. |
226 | * AGP aperture is also used for page table in system |
227 | * memory (mainly for APU). |
228 | * |
229 | */ |
230 | u64 agp_size; |
231 | u64 agp_start; |
232 | u64 agp_end; |
233 | /* GART aperture start and end in MC address space |
234 | * Driver find a hole in the MC address space |
235 | * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR |
236 | * registers |
237 | * Under VMID0, logical address inside GART aperture will |
238 | * be translated through gpuvm gart page table to access |
239 | * paged system memory |
240 | */ |
241 | u64 gart_size; |
242 | u64 gart_start; |
243 | u64 gart_end; |
244 | /* Frame buffer aperture of this GPU device. Different from |
245 | * fb_start (see below), this only covers the local GPU device. |
246 | * If driver uses FB aperture to access FB, driver get fb_start from |
247 | * MC_VM_FB_LOCATION_BASE (set by vbios) and calculate vram_start |
248 | * of this local device by adding an offset inside the XGMI hive. |
249 | * If driver uses GART table for VMID0 FB access, driver finds a hole in |
250 | * VMID0's virtual address space to place the SYSVM aperture inside |
251 | * which the first part is vram and the second part is gart (covering |
252 | * system ram). |
253 | */ |
254 | u64 vram_start; |
255 | u64 vram_end; |
256 | /* FB region , it's same as local vram region in single GPU, in XGMI |
257 | * configuration, this region covers all GPUs in the same hive , |
258 | * each GPU in the hive has the same view of this FB region . |
259 | * GPU0's vram starts at offset (0 * segment size) , |
260 | * GPU1 starts at offset (1 * segment size), etc. |
261 | */ |
262 | u64 fb_start; |
263 | u64 fb_end; |
264 | unsigned vram_width; |
265 | u64 real_vram_size; |
266 | int vram_mtrr; |
267 | u64 mc_mask; |
268 | const struct firmware *fw; /* MC firmware */ |
269 | uint32_t fw_version; |
270 | struct amdgpu_irq_src vm_fault; |
271 | uint32_t vram_type; |
272 | uint8_t vram_vendor; |
273 | uint32_t srbm_soft_reset; |
274 | bool prt_warning; |
275 | uint32_t sdpif_register; |
276 | /* apertures */ |
277 | u64 shared_aperture_start; |
278 | u64 shared_aperture_end; |
279 | u64 private_aperture_start; |
280 | u64 private_aperture_end; |
281 | /* protects concurrent invalidation */ |
282 | spinlock_t invalidate_lock; |
283 | bool translate_further; |
284 | struct kfd_vm_fault_info *vm_fault_info; |
285 | atomic_t vm_fault_info_updated; |
286 | |
287 | struct amdgpu_gmc_fault fault_ring[AMDGPU_GMC_FAULT_RING_SIZE]; |
288 | struct { |
289 | uint64_t idx:AMDGPU_GMC_FAULT_RING_ORDER; |
290 | } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE]; |
291 | uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; |
292 | |
293 | bool tmz_enabled; |
294 | bool is_app_apu; |
295 | |
296 | struct amdgpu_mem_partition_info *mem_partitions; |
297 | uint8_t num_mem_partitions; |
298 | const struct amdgpu_gmc_funcs *gmc_funcs; |
299 | |
300 | struct amdgpu_xgmi xgmi; |
301 | struct amdgpu_irq_src ecc_irq; |
302 | int noretry; |
303 | |
304 | uint32_t vmid0_page_table_block_size; |
305 | uint32_t vmid0_page_table_depth; |
306 | struct amdgpu_bo *pdb0_bo; |
307 | /* CPU kmapped address of pdb0*/ |
308 | void *ptr_pdb0; |
309 | |
310 | /* MALL size */ |
311 | u64 mall_size; |
312 | uint32_t m_half_use; |
313 | |
314 | /* number of UMC instances */ |
315 | int num_umc; |
316 | /* mode2 save restore */ |
317 | u64 VM_L2_CNTL; |
318 | u64 VM_L2_CNTL2; |
319 | u64 VM_DUMMY_PAGE_FAULT_CNTL; |
320 | u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32; |
321 | u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32; |
322 | u64 VM_L2_PROTECTION_FAULT_CNTL; |
323 | u64 VM_L2_PROTECTION_FAULT_CNTL2; |
324 | u64 VM_L2_PROTECTION_FAULT_MM_CNTL3; |
325 | u64 VM_L2_PROTECTION_FAULT_MM_CNTL4; |
326 | u64 VM_L2_PROTECTION_FAULT_ADDR_LO32; |
327 | u64 VM_L2_PROTECTION_FAULT_ADDR_HI32; |
328 | u64 VM_DEBUG; |
329 | u64 VM_L2_MM_GROUP_RT_CLASSES; |
330 | u64 VM_L2_BANK_SELECT_RESERVED_CID; |
331 | u64 VM_L2_BANK_SELECT_RESERVED_CID2; |
332 | u64 VM_L2_CACHE_PARITY_CNTL; |
333 | u64 VM_L2_IH_LOG_CNTL; |
334 | u64 VM_CONTEXT_CNTL[16]; |
335 | u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16]; |
336 | u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16]; |
337 | u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16]; |
338 | u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16]; |
339 | u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; |
340 | u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; |
341 | u64 MC_VM_MX_L1_TLB_CNTL; |
342 | |
343 | u64 noretry_flags; |
344 | |
345 | bool ; |
346 | bool ; |
347 | bool flush_pasid_uses_kiq; |
348 | }; |
349 | |
350 | #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) |
351 | #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) |
352 | #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) |
353 | #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) |
354 | #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) |
355 | #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ |
356 | (adev)->gmc.gmc_funcs->override_vm_pte_flags \ |
357 | ((adev), (vm), (addr), (pte_flags)) |
358 | #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) |
359 | |
360 | /** |
361 | * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR |
362 | * |
363 | * @adev: amdgpu_device pointer |
364 | * |
365 | * Returns: |
366 | * True if full VRAM is visible through the BAR |
367 | */ |
368 | static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) |
369 | { |
370 | WARN_ON(gmc->real_vram_size < gmc->visible_vram_size); |
371 | |
372 | return (gmc->real_vram_size == gmc->visible_vram_size); |
373 | } |
374 | |
375 | /** |
376 | * amdgpu_gmc_sign_extend - sign extend the given gmc address |
377 | * |
378 | * @addr: address to extend |
379 | */ |
380 | static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) |
381 | { |
382 | if (addr >= AMDGPU_GMC_HOLE_START) |
383 | addr |= AMDGPU_GMC_HOLE_END; |
384 | |
385 | return addr; |
386 | } |
387 | |
388 | int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev); |
389 | void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, |
390 | uint64_t *addr, uint64_t *flags); |
391 | int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, |
392 | uint32_t gpu_page_idx, uint64_t addr, |
393 | uint64_t flags); |
394 | uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo); |
395 | uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo); |
396 | void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); |
397 | void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, |
398 | u64 base); |
399 | void amdgpu_gmc_gart_location(struct amdgpu_device *adev, |
400 | struct amdgpu_gmc *mc, |
401 | enum amdgpu_gart_placement gart_placement); |
402 | void amdgpu_gmc_agp_location(struct amdgpu_device *adev, |
403 | struct amdgpu_gmc *mc); |
404 | void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, |
405 | struct amdgpu_gmc *mc); |
406 | bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, |
407 | struct amdgpu_ih_ring *ih, uint64_t addr, |
408 | uint16_t pasid, uint64_t timestamp); |
409 | void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, |
410 | uint16_t pasid); |
411 | int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); |
412 | int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); |
413 | void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); |
414 | int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); |
415 | void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, |
416 | uint32_t vmhub, uint32_t flush_type); |
417 | int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, |
418 | uint32_t flush_type, bool all_hub, |
419 | uint32_t inst); |
420 | |
421 | extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); |
422 | extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); |
423 | |
424 | extern void |
425 | amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, |
426 | bool enable); |
427 | |
428 | void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); |
429 | |
430 | void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); |
431 | uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); |
432 | uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); |
433 | uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); |
434 | int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); |
435 | int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); |
436 | void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); |
437 | |
438 | #endif |
439 | |