1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | #include <linux/types.h> |
25 | #include <linux/kernel.h> |
26 | #include <linux/log2.h> |
27 | #include <linux/sched.h> |
28 | #include <linux/slab.h> |
29 | #include <linux/mutex.h> |
30 | #include <linux/device.h> |
31 | |
32 | #include "kfd_pm4_headers.h" |
33 | #include "kfd_pm4_headers_diq.h" |
34 | #include "kfd_kernel_queue.h" |
35 | #include "kfd_priv.h" |
36 | #include "kfd_pm4_opcodes.h" |
37 | #include "cik_regs.h" |
38 | #include "kfd_dbgmgr.h" |
39 | #include "kfd_dbgdev.h" |
40 | #include "kfd_device_queue_manager.h" |
41 | |
42 | static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) |
43 | { |
44 | dev->kfd2kgd->address_watch_disable(dev->kgd); |
45 | } |
46 | |
47 | static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, |
48 | unsigned int pasid, uint64_t vmid0_address, |
49 | uint32_t *packet_buff, size_t size_in_bytes) |
50 | { |
51 | struct pm4__release_mem *rm_packet; |
52 | struct pm4__indirect_buffer_pasid *ib_packet; |
53 | struct kfd_mem_obj *mem_obj; |
54 | size_t pq_packets_size_in_bytes; |
55 | union ULARGE_INTEGER *largep; |
56 | union ULARGE_INTEGER addr; |
57 | struct kernel_queue *kq; |
58 | uint64_t *rm_state; |
59 | unsigned int *ib_packet_buff; |
60 | int status; |
61 | |
62 | if (WARN_ON(!size_in_bytes)) |
63 | return -EINVAL; |
64 | |
65 | kq = dbgdev->kq; |
66 | |
67 | pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + |
68 | sizeof(struct pm4__indirect_buffer_pasid); |
69 | |
70 | /* |
71 | * We acquire a buffer from DIQ |
72 | * The receive packet buff will be sitting on the Indirect Buffer |
73 | * and in the PQ we put the IB packet + sync packet(s). |
74 | */ |
75 | status = kq->ops.acquire_packet_buffer(kq, |
76 | pq_packets_size_in_bytes / sizeof(uint32_t), |
77 | &ib_packet_buff); |
78 | if (status) { |
79 | pr_err("acquire_packet_buffer failed\n" ); |
80 | return status; |
81 | } |
82 | |
83 | memset(ib_packet_buff, 0, pq_packets_size_in_bytes); |
84 | |
85 | ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); |
86 | |
87 | ib_packet->header.count = 3; |
88 | ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; |
89 | ib_packet->header.type = PM4_TYPE_3; |
90 | |
91 | largep = (union ULARGE_INTEGER *) &vmid0_address; |
92 | |
93 | ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; |
94 | ib_packet->bitfields3.ib_base_hi = largep->u.high_part; |
95 | |
96 | ib_packet->control = (1 << 23) | (1 << 31) | |
97 | ((size_in_bytes / 4) & 0xfffff); |
98 | |
99 | ib_packet->bitfields5.pasid = pasid; |
100 | |
101 | /* |
102 | * for now we use release mem for GPU-CPU synchronization |
103 | * Consider WaitRegMem + WriteData as a better alternative |
104 | * we get a GART allocations ( gpu/cpu mapping), |
105 | * for the sync variable, and wait until: |
106 | * (a) Sync with HW |
107 | * (b) Sync var is written by CP to mem. |
108 | */ |
109 | rm_packet = (struct pm4__release_mem *) (ib_packet_buff + |
110 | (sizeof(struct pm4__indirect_buffer_pasid) / |
111 | sizeof(unsigned int))); |
112 | |
113 | status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), |
114 | &mem_obj); |
115 | |
116 | if (status) { |
117 | pr_err("Failed to allocate GART memory\n" ); |
118 | kq->ops.rollback_packet(kq); |
119 | return status; |
120 | } |
121 | |
122 | rm_state = (uint64_t *) mem_obj->cpu_ptr; |
123 | |
124 | *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; |
125 | |
126 | rm_packet->header.opcode = IT_RELEASE_MEM; |
127 | rm_packet->header.type = PM4_TYPE_3; |
128 | rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2; |
129 | |
130 | rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; |
131 | rm_packet->bitfields2.event_index = |
132 | event_index___release_mem__end_of_pipe; |
133 | |
134 | rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; |
135 | rm_packet->bitfields2.atc = 0; |
136 | rm_packet->bitfields2.tc_wb_action_ena = 1; |
137 | |
138 | addr.quad_part = mem_obj->gpu_addr; |
139 | |
140 | rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; |
141 | rm_packet->address_hi = addr.u.high_part; |
142 | |
143 | rm_packet->bitfields3.data_sel = |
144 | data_sel___release_mem__send_64_bit_data; |
145 | |
146 | rm_packet->bitfields3.int_sel = |
147 | int_sel___release_mem__send_data_after_write_confirm; |
148 | |
149 | rm_packet->bitfields3.dst_sel = |
150 | dst_sel___release_mem__memory_controller; |
151 | |
152 | rm_packet->data_lo = QUEUESTATE__ACTIVE; |
153 | |
154 | kq->ops.submit_packet(kq); |
155 | |
156 | /* Wait till CP writes sync code: */ |
157 | status = amdkfd_fence_wait_timeout( |
158 | (unsigned int *) rm_state, |
159 | QUEUESTATE__ACTIVE, 1500); |
160 | |
161 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
162 | |
163 | return status; |
164 | } |
165 | |
166 | static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) |
167 | { |
168 | /* |
169 | * no action is needed in this case, |
170 | * just make sure diq will not be used |
171 | */ |
172 | |
173 | dbgdev->kq = NULL; |
174 | |
175 | return 0; |
176 | } |
177 | |
178 | static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) |
179 | { |
180 | struct queue_properties properties; |
181 | unsigned int qid; |
182 | struct kernel_queue *kq = NULL; |
183 | int status; |
184 | |
185 | properties.type = KFD_QUEUE_TYPE_DIQ; |
186 | |
187 | status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, |
188 | &properties, &qid); |
189 | |
190 | if (status) { |
191 | pr_err("Failed to create DIQ\n" ); |
192 | return status; |
193 | } |
194 | |
195 | pr_debug("DIQ Created with queue id: %d\n" , qid); |
196 | |
197 | kq = pqm_get_kernel_queue(dbgdev->pqm, qid); |
198 | |
199 | if (!kq) { |
200 | pr_err("Error getting DIQ\n" ); |
201 | pqm_destroy_queue(dbgdev->pqm, qid); |
202 | return -EFAULT; |
203 | } |
204 | |
205 | dbgdev->kq = kq; |
206 | |
207 | return status; |
208 | } |
209 | |
210 | static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) |
211 | { |
212 | /* disable watch address */ |
213 | dbgdev_address_watch_disable_nodiq(dbgdev->dev); |
214 | return 0; |
215 | } |
216 | |
217 | static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) |
218 | { |
219 | /* todo - disable address watch */ |
220 | int status; |
221 | |
222 | status = pqm_destroy_queue(dbgdev->pqm, |
223 | dbgdev->kq->queue->properties.queue_id); |
224 | dbgdev->kq = NULL; |
225 | |
226 | return status; |
227 | } |
228 | |
229 | static void dbgdev_address_watch_set_registers( |
230 | const struct dbg_address_watch_info *adw_info, |
231 | union TCP_WATCH_ADDR_H_BITS *addrHi, |
232 | union TCP_WATCH_ADDR_L_BITS *addrLo, |
233 | union TCP_WATCH_CNTL_BITS *cntl, |
234 | unsigned int index, unsigned int vmid) |
235 | { |
236 | union ULARGE_INTEGER addr; |
237 | |
238 | addr.quad_part = 0; |
239 | addrHi->u32All = 0; |
240 | addrLo->u32All = 0; |
241 | cntl->u32All = 0; |
242 | |
243 | if (adw_info->watch_mask) |
244 | cntl->bitfields.mask = |
245 | (uint32_t) (adw_info->watch_mask[index] & |
246 | ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); |
247 | else |
248 | cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; |
249 | |
250 | addr.quad_part = (unsigned long long) adw_info->watch_address[index]; |
251 | |
252 | addrHi->bitfields.addr = addr.u.high_part & |
253 | ADDRESS_WATCH_REG_ADDHIGH_MASK; |
254 | addrLo->bitfields.addr = |
255 | (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); |
256 | |
257 | cntl->bitfields.mode = adw_info->watch_mode[index]; |
258 | cntl->bitfields.vmid = (uint32_t) vmid; |
259 | /* for now assume it is an ATC address */ |
260 | cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; |
261 | |
262 | pr_debug("\t\t%20s %08x\n" , "set reg mask :" , cntl->bitfields.mask); |
263 | pr_debug("\t\t%20s %08x\n" , "set reg add high :" , |
264 | addrHi->bitfields.addr); |
265 | pr_debug("\t\t%20s %08x\n" , "set reg add low :" , |
266 | addrLo->bitfields.addr); |
267 | } |
268 | |
269 | static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, |
270 | struct dbg_address_watch_info *adw_info) |
271 | { |
272 | union TCP_WATCH_ADDR_H_BITS addrHi; |
273 | union TCP_WATCH_ADDR_L_BITS addrLo; |
274 | union TCP_WATCH_CNTL_BITS cntl; |
275 | struct kfd_process_device *pdd; |
276 | unsigned int i; |
277 | |
278 | /* taking the vmid for that process on the safe way using pdd */ |
279 | pdd = kfd_get_process_device_data(dbgdev->dev, |
280 | adw_info->process); |
281 | if (!pdd) { |
282 | pr_err("Failed to get pdd for wave control no DIQ\n" ); |
283 | return -EFAULT; |
284 | } |
285 | |
286 | addrHi.u32All = 0; |
287 | addrLo.u32All = 0; |
288 | cntl.u32All = 0; |
289 | |
290 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || |
291 | (adw_info->num_watch_points == 0)) { |
292 | pr_err("num_watch_points is invalid\n" ); |
293 | return -EINVAL; |
294 | } |
295 | |
296 | if (!adw_info->watch_mode || !adw_info->watch_address) { |
297 | pr_err("adw_info fields are not valid\n" ); |
298 | return -EINVAL; |
299 | } |
300 | |
301 | for (i = 0; i < adw_info->num_watch_points; i++) { |
302 | dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, |
303 | &cntl, i, pdd->qpd.vmid); |
304 | |
305 | pr_debug("\t\t%30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
306 | pr_debug("\t\t%20s %08x\n" , "register index :" , i); |
307 | pr_debug("\t\t%20s %08x\n" , "vmid is :" , pdd->qpd.vmid); |
308 | pr_debug("\t\t%20s %08x\n" , "Address Low is :" , |
309 | addrLo.bitfields.addr); |
310 | pr_debug("\t\t%20s %08x\n" , "Address high is :" , |
311 | addrHi.bitfields.addr); |
312 | pr_debug("\t\t%20s %08x\n" , "Address high is :" , |
313 | addrHi.bitfields.addr); |
314 | pr_debug("\t\t%20s %08x\n" , "Control Mask is :" , |
315 | cntl.bitfields.mask); |
316 | pr_debug("\t\t%20s %08x\n" , "Control Mode is :" , |
317 | cntl.bitfields.mode); |
318 | pr_debug("\t\t%20s %08x\n" , "Control Vmid is :" , |
319 | cntl.bitfields.vmid); |
320 | pr_debug("\t\t%20s %08x\n" , "Control atc is :" , |
321 | cntl.bitfields.atc); |
322 | pr_debug("\t\t%30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
323 | |
324 | pdd->dev->kfd2kgd->address_watch_execute( |
325 | dbgdev->dev->kgd, |
326 | i, |
327 | cntl.u32All, |
328 | addrHi.u32All, |
329 | addrLo.u32All); |
330 | } |
331 | |
332 | return 0; |
333 | } |
334 | |
335 | static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, |
336 | struct dbg_address_watch_info *adw_info) |
337 | { |
338 | struct pm4__set_config_reg *packets_vec; |
339 | union TCP_WATCH_ADDR_H_BITS addrHi; |
340 | union TCP_WATCH_ADDR_L_BITS addrLo; |
341 | union TCP_WATCH_CNTL_BITS cntl; |
342 | struct kfd_mem_obj *mem_obj; |
343 | unsigned int aw_reg_add_dword; |
344 | uint32_t *packet_buff_uint; |
345 | unsigned int i; |
346 | int status; |
347 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; |
348 | /* we do not control the vmid in DIQ mode, just a place holder */ |
349 | unsigned int vmid = 0; |
350 | |
351 | addrHi.u32All = 0; |
352 | addrLo.u32All = 0; |
353 | cntl.u32All = 0; |
354 | |
355 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || |
356 | (adw_info->num_watch_points == 0)) { |
357 | pr_err("num_watch_points is invalid\n" ); |
358 | return -EINVAL; |
359 | } |
360 | |
361 | if (!adw_info->watch_mode || !adw_info->watch_address) { |
362 | pr_err("adw_info fields are not valid\n" ); |
363 | return -EINVAL; |
364 | } |
365 | |
366 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); |
367 | |
368 | if (status) { |
369 | pr_err("Failed to allocate GART memory\n" ); |
370 | return status; |
371 | } |
372 | |
373 | packet_buff_uint = mem_obj->cpu_ptr; |
374 | |
375 | memset(packet_buff_uint, 0, ib_size); |
376 | |
377 | packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); |
378 | |
379 | packets_vec[0].header.count = 1; |
380 | packets_vec[0].header.opcode = IT_SET_CONFIG_REG; |
381 | packets_vec[0].header.type = PM4_TYPE_3; |
382 | packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; |
383 | packets_vec[0].bitfields2.insert_vmid = 1; |
384 | packets_vec[1].ordinal1 = packets_vec[0].ordinal1; |
385 | packets_vec[1].bitfields2.insert_vmid = 0; |
386 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; |
387 | packets_vec[2].bitfields2.insert_vmid = 0; |
388 | packets_vec[3].ordinal1 = packets_vec[0].ordinal1; |
389 | packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; |
390 | packets_vec[3].bitfields2.insert_vmid = 1; |
391 | |
392 | for (i = 0; i < adw_info->num_watch_points; i++) { |
393 | dbgdev_address_watch_set_registers(adw_info, |
394 | &addrHi, |
395 | &addrLo, |
396 | &cntl, |
397 | i, |
398 | vmid); |
399 | |
400 | pr_debug("\t\t%30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
401 | pr_debug("\t\t%20s %08x\n" , "register index :" , i); |
402 | pr_debug("\t\t%20s %08x\n" , "vmid is :" , vmid); |
403 | pr_debug("\t\t%20s %p\n" , "Add ptr is :" , |
404 | adw_info->watch_address); |
405 | pr_debug("\t\t%20s %08llx\n" , "Add is :" , |
406 | adw_info->watch_address[i]); |
407 | pr_debug("\t\t%20s %08x\n" , "Address Low is :" , |
408 | addrLo.bitfields.addr); |
409 | pr_debug("\t\t%20s %08x\n" , "Address high is :" , |
410 | addrHi.bitfields.addr); |
411 | pr_debug("\t\t%20s %08x\n" , "Control Mask is :" , |
412 | cntl.bitfields.mask); |
413 | pr_debug("\t\t%20s %08x\n" , "Control Mode is :" , |
414 | cntl.bitfields.mode); |
415 | pr_debug("\t\t%20s %08x\n" , "Control Vmid is :" , |
416 | cntl.bitfields.vmid); |
417 | pr_debug("\t\t%20s %08x\n" , "Control atc is :" , |
418 | cntl.bitfields.atc); |
419 | pr_debug("\t\t%30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
420 | |
421 | aw_reg_add_dword = |
422 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
423 | dbgdev->dev->kgd, |
424 | i, |
425 | ADDRESS_WATCH_REG_CNTL); |
426 | |
427 | packets_vec[0].bitfields2.reg_offset = |
428 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
429 | |
430 | packets_vec[0].reg_data[0] = cntl.u32All; |
431 | |
432 | aw_reg_add_dword = |
433 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
434 | dbgdev->dev->kgd, |
435 | i, |
436 | ADDRESS_WATCH_REG_ADDR_HI); |
437 | |
438 | packets_vec[1].bitfields2.reg_offset = |
439 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
440 | packets_vec[1].reg_data[0] = addrHi.u32All; |
441 | |
442 | aw_reg_add_dword = |
443 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
444 | dbgdev->dev->kgd, |
445 | i, |
446 | ADDRESS_WATCH_REG_ADDR_LO); |
447 | |
448 | packets_vec[2].bitfields2.reg_offset = |
449 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
450 | packets_vec[2].reg_data[0] = addrLo.u32All; |
451 | |
452 | /* enable watch flag if address is not zero*/ |
453 | if (adw_info->watch_address[i] > 0) |
454 | cntl.bitfields.valid = 1; |
455 | else |
456 | cntl.bitfields.valid = 0; |
457 | |
458 | aw_reg_add_dword = |
459 | dbgdev->dev->kfd2kgd->address_watch_get_offset( |
460 | dbgdev->dev->kgd, |
461 | i, |
462 | ADDRESS_WATCH_REG_CNTL); |
463 | |
464 | packets_vec[3].bitfields2.reg_offset = |
465 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
466 | packets_vec[3].reg_data[0] = cntl.u32All; |
467 | |
468 | status = dbgdev_diq_submit_ib( |
469 | dbgdev, |
470 | adw_info->process->pasid, |
471 | mem_obj->gpu_addr, |
472 | packet_buff_uint, |
473 | ib_size); |
474 | |
475 | if (status) { |
476 | pr_err("Failed to submit IB to DIQ\n" ); |
477 | break; |
478 | } |
479 | } |
480 | |
481 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
482 | return status; |
483 | } |
484 | |
485 | static int dbgdev_wave_control_set_registers( |
486 | struct dbg_wave_control_info *wac_info, |
487 | union SQ_CMD_BITS *in_reg_sq_cmd, |
488 | union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) |
489 | { |
490 | int status = 0; |
491 | union SQ_CMD_BITS reg_sq_cmd; |
492 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
493 | struct HsaDbgWaveMsgAMDGen2 *pMsg; |
494 | |
495 | reg_sq_cmd.u32All = 0; |
496 | reg_gfx_index.u32All = 0; |
497 | pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; |
498 | |
499 | switch (wac_info->mode) { |
500 | /* Send command to single wave */ |
501 | case HSA_DBG_WAVEMODE_SINGLE: |
502 | /* |
503 | * Limit access to the process waves only, |
504 | * by setting vmid check |
505 | */ |
506 | reg_sq_cmd.bits.check_vmid = 1; |
507 | reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; |
508 | reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; |
509 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; |
510 | |
511 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; |
512 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; |
513 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; |
514 | |
515 | break; |
516 | |
517 | /* Send command to all waves with matching VMID */ |
518 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: |
519 | |
520 | reg_gfx_index.bits.sh_broadcast_writes = 1; |
521 | reg_gfx_index.bits.se_broadcast_writes = 1; |
522 | reg_gfx_index.bits.instance_broadcast_writes = 1; |
523 | |
524 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; |
525 | |
526 | break; |
527 | |
528 | /* Send command to all CU waves with matching VMID */ |
529 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: |
530 | |
531 | reg_sq_cmd.bits.check_vmid = 1; |
532 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; |
533 | |
534 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; |
535 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; |
536 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; |
537 | |
538 | break; |
539 | |
540 | default: |
541 | return -EINVAL; |
542 | } |
543 | |
544 | switch (wac_info->operand) { |
545 | case HSA_DBG_WAVEOP_HALT: |
546 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; |
547 | break; |
548 | |
549 | case HSA_DBG_WAVEOP_RESUME: |
550 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; |
551 | break; |
552 | |
553 | case HSA_DBG_WAVEOP_KILL: |
554 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; |
555 | break; |
556 | |
557 | case HSA_DBG_WAVEOP_DEBUG: |
558 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; |
559 | break; |
560 | |
561 | case HSA_DBG_WAVEOP_TRAP: |
562 | if (wac_info->trapId < MAX_TRAPID) { |
563 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; |
564 | reg_sq_cmd.bits.trap_id = wac_info->trapId; |
565 | } else { |
566 | status = -EINVAL; |
567 | } |
568 | break; |
569 | |
570 | default: |
571 | status = -EINVAL; |
572 | break; |
573 | } |
574 | |
575 | if (status == 0) { |
576 | *in_reg_sq_cmd = reg_sq_cmd; |
577 | *in_reg_gfx_index = reg_gfx_index; |
578 | } |
579 | |
580 | return status; |
581 | } |
582 | |
583 | static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, |
584 | struct dbg_wave_control_info *wac_info) |
585 | { |
586 | |
587 | int status; |
588 | union SQ_CMD_BITS reg_sq_cmd; |
589 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
590 | struct kfd_mem_obj *mem_obj; |
591 | uint32_t *packet_buff_uint; |
592 | struct pm4__set_config_reg *packets_vec; |
593 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; |
594 | |
595 | reg_sq_cmd.u32All = 0; |
596 | |
597 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, |
598 | ®_gfx_index); |
599 | if (status) { |
600 | pr_err("Failed to set wave control registers\n" ); |
601 | return status; |
602 | } |
603 | |
604 | /* we do not control the VMID in DIQ, so reset it to a known value */ |
605 | reg_sq_cmd.bits.vm_id = 0; |
606 | |
607 | pr_debug("\t\t %30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
608 | |
609 | pr_debug("\t\t mode is: %u\n" , wac_info->mode); |
610 | pr_debug("\t\t operand is: %u\n" , wac_info->operand); |
611 | pr_debug("\t\t trap id is: %u\n" , wac_info->trapId); |
612 | pr_debug("\t\t msg value is: %u\n" , |
613 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); |
614 | pr_debug("\t\t vmid is: N/A\n" ); |
615 | |
616 | pr_debug("\t\t chk_vmid is : %u\n" , reg_sq_cmd.bitfields.check_vmid); |
617 | pr_debug("\t\t command is : %u\n" , reg_sq_cmd.bitfields.cmd); |
618 | pr_debug("\t\t queue id is : %u\n" , reg_sq_cmd.bitfields.queue_id); |
619 | pr_debug("\t\t simd id is : %u\n" , reg_sq_cmd.bitfields.simd_id); |
620 | pr_debug("\t\t mode is : %u\n" , reg_sq_cmd.bitfields.mode); |
621 | pr_debug("\t\t vm_id is : %u\n" , reg_sq_cmd.bitfields.vm_id); |
622 | pr_debug("\t\t wave_id is : %u\n" , reg_sq_cmd.bitfields.wave_id); |
623 | |
624 | pr_debug("\t\t ibw is : %u\n" , |
625 | reg_gfx_index.bitfields.instance_broadcast_writes); |
626 | pr_debug("\t\t ii is : %u\n" , |
627 | reg_gfx_index.bitfields.instance_index); |
628 | pr_debug("\t\t sebw is : %u\n" , |
629 | reg_gfx_index.bitfields.se_broadcast_writes); |
630 | pr_debug("\t\t se_ind is : %u\n" , reg_gfx_index.bitfields.se_index); |
631 | pr_debug("\t\t sh_ind is : %u\n" , reg_gfx_index.bitfields.sh_index); |
632 | pr_debug("\t\t sbw is : %u\n" , |
633 | reg_gfx_index.bitfields.sh_broadcast_writes); |
634 | |
635 | pr_debug("\t\t %30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
636 | |
637 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); |
638 | |
639 | if (status != 0) { |
640 | pr_err("Failed to allocate GART memory\n" ); |
641 | return status; |
642 | } |
643 | |
644 | packet_buff_uint = mem_obj->cpu_ptr; |
645 | |
646 | memset(packet_buff_uint, 0, ib_size); |
647 | |
648 | packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; |
649 | packets_vec[0].header.count = 1; |
650 | packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; |
651 | packets_vec[0].header.type = PM4_TYPE_3; |
652 | packets_vec[0].bitfields2.reg_offset = |
653 | GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; |
654 | |
655 | packets_vec[0].bitfields2.insert_vmid = 0; |
656 | packets_vec[0].reg_data[0] = reg_gfx_index.u32All; |
657 | |
658 | packets_vec[1].header.count = 1; |
659 | packets_vec[1].header.opcode = IT_SET_CONFIG_REG; |
660 | packets_vec[1].header.type = PM4_TYPE_3; |
661 | packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE; |
662 | |
663 | packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; |
664 | packets_vec[1].bitfields2.insert_vmid = 1; |
665 | packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; |
666 | |
667 | /* Restore the GRBM_GFX_INDEX register */ |
668 | |
669 | reg_gfx_index.u32All = 0; |
670 | reg_gfx_index.bits.sh_broadcast_writes = 1; |
671 | reg_gfx_index.bits.instance_broadcast_writes = 1; |
672 | reg_gfx_index.bits.se_broadcast_writes = 1; |
673 | |
674 | |
675 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; |
676 | packets_vec[2].bitfields2.reg_offset = |
677 | GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE; |
678 | |
679 | packets_vec[2].bitfields2.insert_vmid = 0; |
680 | packets_vec[2].reg_data[0] = reg_gfx_index.u32All; |
681 | |
682 | status = dbgdev_diq_submit_ib( |
683 | dbgdev, |
684 | wac_info->process->pasid, |
685 | mem_obj->gpu_addr, |
686 | packet_buff_uint, |
687 | ib_size); |
688 | |
689 | if (status) |
690 | pr_err("Failed to submit IB to DIQ\n" ); |
691 | |
692 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); |
693 | |
694 | return status; |
695 | } |
696 | |
697 | static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, |
698 | struct dbg_wave_control_info *wac_info) |
699 | { |
700 | int status; |
701 | union SQ_CMD_BITS reg_sq_cmd; |
702 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
703 | struct kfd_process_device *pdd; |
704 | |
705 | reg_sq_cmd.u32All = 0; |
706 | |
707 | /* taking the VMID for that process on the safe way using PDD */ |
708 | pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); |
709 | |
710 | if (!pdd) { |
711 | pr_err("Failed to get pdd for wave control no DIQ\n" ); |
712 | return -EFAULT; |
713 | } |
714 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, |
715 | ®_gfx_index); |
716 | if (status) { |
717 | pr_err("Failed to set wave control registers\n" ); |
718 | return status; |
719 | } |
720 | |
721 | /* for non DIQ we need to patch the VMID: */ |
722 | |
723 | reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; |
724 | |
725 | pr_debug("\t\t %30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
726 | |
727 | pr_debug("\t\t mode is: %u\n" , wac_info->mode); |
728 | pr_debug("\t\t operand is: %u\n" , wac_info->operand); |
729 | pr_debug("\t\t trap id is: %u\n" , wac_info->trapId); |
730 | pr_debug("\t\t msg value is: %u\n" , |
731 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); |
732 | pr_debug("\t\t vmid is: %u\n" , pdd->qpd.vmid); |
733 | |
734 | pr_debug("\t\t chk_vmid is : %u\n" , reg_sq_cmd.bitfields.check_vmid); |
735 | pr_debug("\t\t command is : %u\n" , reg_sq_cmd.bitfields.cmd); |
736 | pr_debug("\t\t queue id is : %u\n" , reg_sq_cmd.bitfields.queue_id); |
737 | pr_debug("\t\t simd id is : %u\n" , reg_sq_cmd.bitfields.simd_id); |
738 | pr_debug("\t\t mode is : %u\n" , reg_sq_cmd.bitfields.mode); |
739 | pr_debug("\t\t vm_id is : %u\n" , reg_sq_cmd.bitfields.vm_id); |
740 | pr_debug("\t\t wave_id is : %u\n" , reg_sq_cmd.bitfields.wave_id); |
741 | |
742 | pr_debug("\t\t ibw is : %u\n" , |
743 | reg_gfx_index.bitfields.instance_broadcast_writes); |
744 | pr_debug("\t\t ii is : %u\n" , |
745 | reg_gfx_index.bitfields.instance_index); |
746 | pr_debug("\t\t sebw is : %u\n" , |
747 | reg_gfx_index.bitfields.se_broadcast_writes); |
748 | pr_debug("\t\t se_ind is : %u\n" , reg_gfx_index.bitfields.se_index); |
749 | pr_debug("\t\t sh_ind is : %u\n" , reg_gfx_index.bitfields.sh_index); |
750 | pr_debug("\t\t sbw is : %u\n" , |
751 | reg_gfx_index.bitfields.sh_broadcast_writes); |
752 | |
753 | pr_debug("\t\t %30s\n" , "* * * * * * * * * * * * * * * * * *" ); |
754 | |
755 | return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, |
756 | reg_gfx_index.u32All, |
757 | reg_sq_cmd.u32All); |
758 | } |
759 | |
760 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) |
761 | { |
762 | int status = 0; |
763 | unsigned int vmid; |
764 | union SQ_CMD_BITS reg_sq_cmd; |
765 | union GRBM_GFX_INDEX_BITS reg_gfx_index; |
766 | struct kfd_process_device *pdd; |
767 | struct dbg_wave_control_info wac_info; |
768 | int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; |
769 | int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; |
770 | |
771 | reg_sq_cmd.u32All = 0; |
772 | status = 0; |
773 | |
774 | wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; |
775 | wac_info.operand = HSA_DBG_WAVEOP_KILL; |
776 | |
777 | pr_debug("Killing all process wavefronts\n" ); |
778 | |
779 | /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. |
780 | * ATC_VMID15_PASID_MAPPING |
781 | * to check which VMID the current process is mapped to. |
782 | */ |
783 | |
784 | for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { |
785 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid |
786 | (dev->kgd, vmid)) { |
787 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid |
788 | (dev->kgd, vmid) == p->pasid) { |
789 | pr_debug("Killing wave fronts of vmid %d and pasid %d\n" , |
790 | vmid, p->pasid); |
791 | break; |
792 | } |
793 | } |
794 | } |
795 | |
796 | if (vmid > last_vmid_to_scan) { |
797 | pr_err("Didn't find vmid for pasid %d\n" , p->pasid); |
798 | return -EFAULT; |
799 | } |
800 | |
801 | /* taking the VMID for that process on the safe way using PDD */ |
802 | pdd = kfd_get_process_device_data(dev, p); |
803 | if (!pdd) |
804 | return -EFAULT; |
805 | |
806 | status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, |
807 | ®_gfx_index); |
808 | if (status != 0) |
809 | return -EINVAL; |
810 | |
811 | /* for non DIQ we need to patch the VMID: */ |
812 | reg_sq_cmd.bits.vm_id = vmid; |
813 | |
814 | dev->kfd2kgd->wave_control_execute(dev->kgd, |
815 | reg_gfx_index.u32All, |
816 | reg_sq_cmd.u32All); |
817 | |
818 | return 0; |
819 | } |
820 | |
821 | void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, |
822 | enum DBGDEV_TYPE type) |
823 | { |
824 | pdbgdev->dev = pdev; |
825 | pdbgdev->kq = NULL; |
826 | pdbgdev->type = type; |
827 | pdbgdev->pqm = NULL; |
828 | |
829 | switch (type) { |
830 | case DBGDEV_TYPE_NODIQ: |
831 | pdbgdev->dbgdev_register = dbgdev_register_nodiq; |
832 | pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; |
833 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; |
834 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; |
835 | break; |
836 | case DBGDEV_TYPE_DIQ: |
837 | default: |
838 | pdbgdev->dbgdev_register = dbgdev_register_diq; |
839 | pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; |
840 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; |
841 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; |
842 | break; |
843 | } |
844 | |
845 | } |
846 | |