1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#define pr_fmt(fmt) "habanalabs: " fmt
9
10#include <uapi/drm/habanalabs_accel.h>
11#include "habanalabs.h"
12
13#include <linux/pci.h>
14#include <linux/hwmon.h>
15#include <linux/vmalloc.h>
16
17#include <drm/drm_accel.h>
18#include <drm/drm_drv.h>
19
20#include <trace/events/habanalabs.h>
21
22#define HL_RESET_DELAY_USEC 10000 /* 10ms */
23
24#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30
25
26enum dma_alloc_type {
27 DMA_ALLOC_COHERENT,
28 DMA_ALLOC_POOL,
29};
30
31#define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788
32
33/*
34 * hl_set_dram_bar- sets the bar to allow later access to address
35 *
36 * @hdev: pointer to habanalabs device structure.
37 * @addr: the address the caller wants to access.
38 * @region: the PCI region.
39 * @new_bar_region_base: the new BAR region base address.
40 *
41 * @return: the old BAR base address on success, U64_MAX for failure.
42 * The caller should set it back to the old address after use.
43 *
44 * In case the bar space does not cover the whole address space,
45 * the bar base address should be set to allow access to a given address.
46 * This function can be called also if the bar doesn't need to be set,
47 * in that case it just won't change the base.
48 */
49static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region,
50 u64 *new_bar_region_base)
51{
52 struct asic_fixed_properties *prop = &hdev->asic_prop;
53 u64 bar_base_addr, old_base;
54
55 if (is_power_of_2(n: prop->dram_pci_bar_size))
56 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
57 else
58 bar_base_addr = region->region_base +
59 div64_u64(dividend: (addr - region->region_base), divisor: prop->dram_pci_bar_size) *
60 prop->dram_pci_bar_size;
61
62 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
63
64 /* in case of success we need to update the new BAR base */
65 if ((old_base != U64_MAX) && new_bar_region_base)
66 *new_bar_region_base = bar_base_addr;
67
68 return old_base;
69}
70
71int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
72 enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar)
73{
74 struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
75 u64 old_base = 0, rc, bar_region_base = region->region_base;
76 void __iomem *acc_addr;
77
78 if (set_dram_bar) {
79 old_base = hl_set_dram_bar(hdev, addr, region, new_bar_region_base: &bar_region_base);
80 if (old_base == U64_MAX)
81 return -EIO;
82 }
83
84 acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
85 (addr - bar_region_base);
86
87 switch (acc_type) {
88 case DEBUGFS_READ8:
89 *val = readb(addr: acc_addr);
90 break;
91 case DEBUGFS_WRITE8:
92 writeb(val: *val, addr: acc_addr);
93 break;
94 case DEBUGFS_READ32:
95 *val = readl(addr: acc_addr);
96 break;
97 case DEBUGFS_WRITE32:
98 writel(val: *val, addr: acc_addr);
99 break;
100 case DEBUGFS_READ64:
101 *val = readq(addr: acc_addr);
102 break;
103 case DEBUGFS_WRITE64:
104 writeq(val: *val, addr: acc_addr);
105 break;
106 }
107
108 if (set_dram_bar) {
109 rc = hl_set_dram_bar(hdev, addr: old_base, region, NULL);
110 if (rc == U64_MAX)
111 return -EIO;
112 }
113
114 return 0;
115}
116
117static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
118 gfp_t flag, enum dma_alloc_type alloc_type,
119 const char *caller)
120{
121 void *ptr = NULL;
122
123 switch (alloc_type) {
124 case DMA_ALLOC_COHERENT:
125 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag);
126 break;
127 case DMA_ALLOC_POOL:
128 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle);
129 break;
130 }
131
132 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
133 trace_habanalabs_dma_alloc(dev: hdev->dev, cpu_addr: (u64) (uintptr_t) ptr, dma_addr: *dma_handle, size,
134 caller);
135
136 return ptr;
137}
138
139static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
140 dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
141 const char *caller)
142{
143 /* this is needed to avoid warning on using freed pointer */
144 u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr;
145
146 switch (alloc_type) {
147 case DMA_ALLOC_COHERENT:
148 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
149 break;
150 case DMA_ALLOC_POOL:
151 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
152 break;
153 }
154
155 trace_habanalabs_dma_free(dev: hdev->dev, cpu_addr: store_cpu_addr, dma_addr: dma_handle, size, caller);
156}
157
158void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
159 gfp_t flag, const char *caller)
160{
161 return hl_dma_alloc_common(hdev, size, dma_handle, flag, alloc_type: DMA_ALLOC_COHERENT, caller);
162}
163
164void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
165 dma_addr_t dma_handle, const char *caller)
166{
167 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, alloc_type: DMA_ALLOC_COHERENT, caller);
168}
169
170void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
171 dma_addr_t *dma_handle, const char *caller)
172{
173 return hl_dma_alloc_common(hdev, size, dma_handle, flag: mem_flags, alloc_type: DMA_ALLOC_POOL, caller);
174}
175
176void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
177 const char *caller)
178{
179 hl_asic_dma_free_common(hdev, size: 0, cpu_addr: vaddr, dma_handle: dma_addr, alloc_type: DMA_ALLOC_POOL, caller);
180}
181
182void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
183{
184 return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
185}
186
187void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
188{
189 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr);
190}
191
192int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
193 enum dma_data_direction dir, const char *caller)
194{
195 struct asic_fixed_properties *prop = &hdev->asic_prop;
196 struct scatterlist *sg;
197 int rc, i;
198
199 rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir);
200 if (rc)
201 return rc;
202
203 if (!trace_habanalabs_dma_map_page_enabled())
204 return 0;
205
206 for_each_sgtable_dma_sg(sgt, sg, i)
207 trace_habanalabs_dma_map_page(dev: hdev->dev,
208 page_to_phys(sg_page(sg)),
209 dma_addr: sg->dma_address - prop->device_dma_offset_for_host_access,
210#ifdef CONFIG_NEED_SG_DMA_LENGTH
211 len: sg->dma_length,
212#else
213 sg->length,
214#endif
215 dir, caller);
216
217 return 0;
218}
219
220int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt,
221 enum dma_data_direction dir)
222{
223 struct asic_fixed_properties *prop = &hdev->asic_prop;
224 struct scatterlist *sg;
225 int rc, i;
226
227 rc = dma_map_sgtable(dev: &hdev->pdev->dev, sgt, dir, attrs: 0);
228 if (rc)
229 return rc;
230
231 /* Shift to the device's base physical address of host memory if necessary */
232 if (prop->device_dma_offset_for_host_access)
233 for_each_sgtable_dma_sg(sgt, sg, i)
234 sg->dma_address += prop->device_dma_offset_for_host_access;
235
236 return 0;
237}
238
239void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt,
240 enum dma_data_direction dir, const char *caller)
241{
242 struct asic_fixed_properties *prop = &hdev->asic_prop;
243 struct scatterlist *sg;
244 int i;
245
246 hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir);
247
248 if (trace_habanalabs_dma_unmap_page_enabled()) {
249 for_each_sgtable_dma_sg(sgt, sg, i)
250 trace_habanalabs_dma_unmap_page(dev: hdev->dev, page_to_phys(sg_page(sg)),
251 dma_addr: sg->dma_address - prop->device_dma_offset_for_host_access,
252#ifdef CONFIG_NEED_SG_DMA_LENGTH
253 len: sg->dma_length,
254#else
255 sg->length,
256#endif
257 dir, caller);
258 }
259}
260
261void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
262 enum dma_data_direction dir)
263{
264 struct asic_fixed_properties *prop = &hdev->asic_prop;
265 struct scatterlist *sg;
266 int i;
267
268 /* Cancel the device's base physical address of host memory if necessary */
269 if (prop->device_dma_offset_for_host_access)
270 for_each_sgtable_dma_sg(sgt, sg, i)
271 sg->dma_address -= prop->device_dma_offset_for_host_access;
272
273 dma_unmap_sgtable(dev: &hdev->pdev->dev, sgt, dir, attrs: 0);
274}
275
276/*
277 * hl_access_cfg_region - access the config region
278 *
279 * @hdev: pointer to habanalabs device structure
280 * @addr: the address to access
281 * @val: the value to write from or read to
282 * @acc_type: the type of access (read/write 64/32)
283 */
284int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
285 enum debugfs_access_type acc_type)
286{
287 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG];
288 u32 val_h, val_l;
289
290 if (!IS_ALIGNED(addr, sizeof(u32))) {
291 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32));
292 return -EINVAL;
293 }
294
295 switch (acc_type) {
296 case DEBUGFS_READ32:
297 *val = RREG32(addr - cfg_region->region_base);
298 break;
299 case DEBUGFS_WRITE32:
300 WREG32(addr - cfg_region->region_base, *val);
301 break;
302 case DEBUGFS_READ64:
303 val_l = RREG32(addr - cfg_region->region_base);
304 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base);
305
306 *val = (((u64) val_h) << 32) | val_l;
307 break;
308 case DEBUGFS_WRITE64:
309 WREG32(addr - cfg_region->region_base, lower_32_bits(*val));
310 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val));
311 break;
312 default:
313 dev_err(hdev->dev, "access type %d is not supported\n", acc_type);
314 return -EOPNOTSUPP;
315 }
316
317 return 0;
318}
319
320/*
321 * hl_access_dev_mem - access device memory
322 *
323 * @hdev: pointer to habanalabs device structure
324 * @region_type: the type of the region the address belongs to
325 * @addr: the address to access
326 * @val: the value to write from or read to
327 * @acc_type: the type of access (r/w, 32/64)
328 */
329int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
330 u64 addr, u64 *val, enum debugfs_access_type acc_type)
331{
332 switch (region_type) {
333 case PCI_REGION_CFG:
334 return hl_access_cfg_region(hdev, addr, val, acc_type);
335 case PCI_REGION_SRAM:
336 case PCI_REGION_DRAM:
337 return hl_access_sram_dram_region(hdev, addr, val, acc_type,
338 region_type, set_dram_bar: (region_type == PCI_REGION_DRAM));
339 default:
340 return -EFAULT;
341 }
342
343 return 0;
344}
345
346void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
347{
348 va_list args;
349 int str_size;
350
351 va_start(args, fmt);
352 /* Calculate formatted string length. Assuming each string is null terminated, hence
353 * increment result by 1
354 */
355 str_size = vsnprintf(NULL, size: 0, fmt, args) + 1;
356 va_end(args);
357
358 if ((e->actual_size + str_size) < e->allocated_buf_size) {
359 va_start(args, fmt);
360 vsnprintf(buf: e->buf + e->actual_size, size: str_size, fmt, args);
361 va_end(args);
362 }
363
364 /* Need to update the size even when not updating destination buffer to get the exact size
365 * of all input strings
366 */
367 e->actual_size += str_size;
368}
369
370enum hl_device_status hl_device_status(struct hl_device *hdev)
371{
372 enum hl_device_status status;
373
374 if (hdev->device_fini_pending) {
375 status = HL_DEVICE_STATUS_MALFUNCTION;
376 } else if (hdev->reset_info.in_reset) {
377 if (hdev->reset_info.in_compute_reset)
378 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
379 else
380 status = HL_DEVICE_STATUS_IN_RESET;
381 } else if (hdev->reset_info.needs_reset) {
382 status = HL_DEVICE_STATUS_NEEDS_RESET;
383 } else if (hdev->disabled) {
384 status = HL_DEVICE_STATUS_MALFUNCTION;
385 } else if (!hdev->init_done) {
386 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
387 } else {
388 status = HL_DEVICE_STATUS_OPERATIONAL;
389 }
390
391 return status;
392}
393
394bool hl_device_operational(struct hl_device *hdev,
395 enum hl_device_status *status)
396{
397 enum hl_device_status current_status;
398
399 current_status = hl_device_status(hdev);
400 if (status)
401 *status = current_status;
402
403 switch (current_status) {
404 case HL_DEVICE_STATUS_MALFUNCTION:
405 case HL_DEVICE_STATUS_IN_RESET:
406 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
407 case HL_DEVICE_STATUS_NEEDS_RESET:
408 return false;
409 case HL_DEVICE_STATUS_OPERATIONAL:
410 case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
411 default:
412 return true;
413 }
414}
415
416bool hl_ctrl_device_operational(struct hl_device *hdev,
417 enum hl_device_status *status)
418{
419 enum hl_device_status current_status;
420
421 current_status = hl_device_status(hdev);
422 if (status)
423 *status = current_status;
424
425 switch (current_status) {
426 case HL_DEVICE_STATUS_MALFUNCTION:
427 return false;
428 case HL_DEVICE_STATUS_IN_RESET:
429 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
430 case HL_DEVICE_STATUS_NEEDS_RESET:
431 case HL_DEVICE_STATUS_OPERATIONAL:
432 case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
433 default:
434 return true;
435 }
436}
437
438static void print_idle_status_mask(struct hl_device *hdev, const char *message,
439 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE])
440{
441 if (idle_mask[3])
442 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n",
443 message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
444 else if (idle_mask[2])
445 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n",
446 message, idle_mask[2], idle_mask[1], idle_mask[0]);
447 else if (idle_mask[1])
448 dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n",
449 message, idle_mask[1], idle_mask[0]);
450 else
451 dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]);
452}
453
454static void hpriv_release(struct kref *ref)
455{
456 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
457 bool reset_device, device_is_idle = true;
458 struct hl_fpriv *hpriv;
459 struct hl_device *hdev;
460
461 hpriv = container_of(ref, struct hl_fpriv, refcount);
462
463 hdev = hpriv->hdev;
464
465 hdev->asic_funcs->send_device_activity(hdev, false);
466
467 hl_debugfs_remove_file(hpriv);
468
469 mutex_destroy(lock: &hpriv->ctx_lock);
470 mutex_destroy(lock: &hpriv->restore_phase_mutex);
471
472 /* There should be no memory buffers at this point and handles IDR can be destroyed */
473 hl_mem_mgr_idr_destroy(mmg: &hpriv->mem_mgr);
474
475 /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending
476 * reset that waits for device release.
477 */
478 reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active;
479
480 /* Check the device idle status and reset if not idle.
481 * Skip it if already in reset, or if device is going to be reset in any case.
482 */
483 if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm)
484 device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
485 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
486 if (!device_is_idle) {
487 print_idle_status_mask(hdev, message: "device is not idle after user context is closed",
488 idle_mask);
489 reset_device = true;
490 }
491
492 /* We need to remove the user from the list to make sure the reset process won't
493 * try to kill the user process. Because, if we got here, it means there are no
494 * more driver/device resources that the user process is occupying so there is
495 * no need to kill it
496 *
497 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
498 * a race between the release and opening the device again. We don't want to let
499 * a user open the device while there a reset is about to happen.
500 */
501 mutex_lock(&hdev->fpriv_list_lock);
502 list_del(entry: &hpriv->dev_node);
503 mutex_unlock(lock: &hdev->fpriv_list_lock);
504
505 put_pid(pid: hpriv->taskpid);
506
507 if (reset_device) {
508 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
509 } else {
510 /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
511 int rc = hdev->asic_funcs->scrub_device_mem(hdev);
512
513 if (rc) {
514 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
515 hl_device_reset(hdev, HL_DRV_RESET_HARD);
516 }
517 }
518
519 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
520 * thread, we don't care because the in_reset is marked so if a user will try to open
521 * the device it will fail on that, even if compute_ctx is false.
522 */
523 mutex_lock(&hdev->fpriv_list_lock);
524 hdev->is_compute_ctx_active = false;
525 mutex_unlock(lock: &hdev->fpriv_list_lock);
526
527 hdev->compute_ctx_in_release = 0;
528
529 /* release the eventfd */
530 if (hpriv->notifier_event.eventfd)
531 eventfd_ctx_put(ctx: hpriv->notifier_event.eventfd);
532
533 mutex_destroy(lock: &hpriv->notifier_event.lock);
534
535 kfree(objp: hpriv);
536}
537
538void hl_hpriv_get(struct hl_fpriv *hpriv)
539{
540 kref_get(kref: &hpriv->refcount);
541}
542
543int hl_hpriv_put(struct hl_fpriv *hpriv)
544{
545 return kref_put(kref: &hpriv->refcount, release: hpriv_release);
546}
547
548static void print_device_in_use_info(struct hl_device *hdev, const char *message)
549{
550 u32 active_cs_num, dmabuf_export_cnt;
551 bool unknown_reason = true;
552 char buf[128];
553 size_t size;
554 int offset;
555
556 size = sizeof(buf);
557 offset = 0;
558
559 active_cs_num = hl_get_active_cs_num(hdev);
560 if (active_cs_num) {
561 unknown_reason = false;
562 offset += scnprintf(buf: buf + offset, size: size - offset, fmt: " [%u active CS]", active_cs_num);
563 }
564
565 dmabuf_export_cnt = atomic_read(v: &hdev->dmabuf_export_cnt);
566 if (dmabuf_export_cnt) {
567 unknown_reason = false;
568 offset += scnprintf(buf: buf + offset, size: size - offset, fmt: " [%u exported dma-buf]",
569 dmabuf_export_cnt);
570 }
571
572 if (unknown_reason)
573 scnprintf(buf: buf + offset, size: size - offset, fmt: " [unknown reason]");
574
575 dev_notice(hdev->dev, "%s%s\n", message, buf);
576}
577
578/*
579 * hl_device_release() - release function for habanalabs device.
580 * @ddev: pointer to DRM device structure.
581 * @file: pointer to DRM file private data structure.
582 *
583 * Called when process closes an habanalabs device
584 */
585void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv)
586{
587 struct hl_fpriv *hpriv = file_priv->driver_priv;
588 struct hl_device *hdev = to_hl_device(ddev);
589
590 if (!hdev) {
591 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
592 put_pid(pid: hpriv->taskpid);
593 }
594
595 hl_ctx_mgr_fini(hdev, mgr: &hpriv->ctx_mgr);
596
597 /* Memory buffers might be still in use at this point and thus the handles IDR destruction
598 * is postponed to hpriv_release().
599 */
600 hl_mem_mgr_fini(mmg: &hpriv->mem_mgr);
601
602 hdev->compute_ctx_in_release = 1;
603
604 if (!hl_hpriv_put(hpriv)) {
605 print_device_in_use_info(hdev, message: "User process closed FD but device still in use");
606 hl_device_reset(hdev, HL_DRV_RESET_HARD);
607 }
608
609 hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif;
610}
611
612static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
613{
614 struct hl_fpriv *hpriv = filp->private_data;
615 struct hl_device *hdev = hpriv->hdev;
616
617 filp->private_data = NULL;
618
619 if (!hdev) {
620 pr_err("Closing FD after device was removed\n");
621 goto out;
622 }
623
624 mutex_lock(&hdev->fpriv_ctrl_list_lock);
625 list_del(entry: &hpriv->dev_node);
626 mutex_unlock(lock: &hdev->fpriv_ctrl_list_lock);
627out:
628 put_pid(pid: hpriv->taskpid);
629
630 kfree(objp: hpriv);
631
632 return 0;
633}
634
635static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
636{
637 struct hl_device *hdev = hpriv->hdev;
638 unsigned long vm_pgoff;
639
640 if (!hdev) {
641 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
642 return -ENODEV;
643 }
644
645 vm_pgoff = vma->vm_pgoff;
646
647 switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
648 case HL_MMAP_TYPE_BLOCK:
649 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
650 return hl_hw_block_mmap(hpriv, vma);
651
652 case HL_MMAP_TYPE_CB:
653 case HL_MMAP_TYPE_TS_BUFF:
654 return hl_mem_mgr_mmap(mmg: &hpriv->mem_mgr, vma, NULL);
655 }
656 return -EINVAL;
657}
658
659/*
660 * hl_mmap - mmap function for habanalabs device
661 *
662 * @*filp: pointer to file structure
663 * @*vma: pointer to vm_area_struct of the process
664 *
665 * Called when process does an mmap on habanalabs device. Call the relevant mmap
666 * function at the end of the common code.
667 */
668int hl_mmap(struct file *filp, struct vm_area_struct *vma)
669{
670 struct drm_file *file_priv = filp->private_data;
671 struct hl_fpriv *hpriv = file_priv->driver_priv;
672
673 return __hl_mmap(hpriv, vma);
674}
675
676static const struct file_operations hl_ctrl_ops = {
677 .owner = THIS_MODULE,
678 .open = hl_device_open_ctrl,
679 .release = hl_device_release_ctrl,
680 .unlocked_ioctl = hl_ioctl_control,
681 .compat_ioctl = hl_ioctl_control
682};
683
684static void device_release_func(struct device *dev)
685{
686 kfree(objp: dev);
687}
688
689/*
690 * device_init_cdev - Initialize cdev and device for habanalabs device
691 *
692 * @hdev: pointer to habanalabs device structure
693 * @class: pointer to the class object of the device
694 * @minor: minor number of the specific device
695 * @fops: file operations to install for this device
696 * @name: name of the device as it will appear in the filesystem
697 * @cdev: pointer to the char device object that will be initialized
698 * @dev: pointer to the device object that will be initialized
699 *
700 * Initialize a cdev and a Linux device for habanalabs's device.
701 */
702static int device_init_cdev(struct hl_device *hdev, const struct class *class,
703 int minor, const struct file_operations *fops,
704 char *name, struct cdev *cdev,
705 struct device **dev)
706{
707 cdev_init(cdev, fops);
708 cdev->owner = THIS_MODULE;
709
710 *dev = kzalloc(size: sizeof(**dev), GFP_KERNEL);
711 if (!*dev)
712 return -ENOMEM;
713
714 device_initialize(dev: *dev);
715 (*dev)->devt = MKDEV(hdev->major, minor);
716 (*dev)->class = class;
717 (*dev)->release = device_release_func;
718 dev_set_drvdata(dev: *dev, data: hdev);
719 dev_set_name(dev: *dev, name: "%s", name);
720
721 return 0;
722}
723
724static int cdev_sysfs_debugfs_add(struct hl_device *hdev)
725{
726 const struct class *accel_class = hdev->drm.accel->kdev->class;
727 char name[32];
728 int rc;
729
730 hdev->cdev_idx = hdev->drm.accel->index;
731
732 /* Initialize cdev and device structures for the control device */
733 snprintf(buf: name, size: sizeof(name), fmt: "accel_controlD%d", hdev->cdev_idx);
734 rc = device_init_cdev(hdev, class: accel_class, minor: hdev->cdev_idx, fops: &hl_ctrl_ops, name,
735 cdev: &hdev->cdev_ctrl, dev: &hdev->dev_ctrl);
736 if (rc)
737 return rc;
738
739 rc = cdev_device_add(cdev: &hdev->cdev_ctrl, dev: hdev->dev_ctrl);
740 if (rc) {
741 dev_err(hdev->dev_ctrl,
742 "failed to add an accel control char device to the system\n");
743 goto free_ctrl_device;
744 }
745
746 rc = hl_sysfs_init(hdev);
747 if (rc) {
748 dev_err(hdev->dev, "failed to initialize sysfs\n");
749 goto delete_ctrl_cdev_device;
750 }
751
752 hl_debugfs_add_device(hdev);
753
754 hdev->cdev_sysfs_debugfs_created = true;
755
756 return 0;
757
758delete_ctrl_cdev_device:
759 cdev_device_del(cdev: &hdev->cdev_ctrl, dev: hdev->dev_ctrl);
760free_ctrl_device:
761 put_device(dev: hdev->dev_ctrl);
762 return rc;
763}
764
765static void cdev_sysfs_debugfs_remove(struct hl_device *hdev)
766{
767 if (!hdev->cdev_sysfs_debugfs_created)
768 return;
769
770 hl_sysfs_fini(hdev);
771
772 cdev_device_del(cdev: &hdev->cdev_ctrl, dev: hdev->dev_ctrl);
773 put_device(dev: hdev->dev_ctrl);
774}
775
776static void device_hard_reset_pending(struct work_struct *work)
777{
778 struct hl_device_reset_work *device_reset_work =
779 container_of(work, struct hl_device_reset_work, reset_work.work);
780 struct hl_device *hdev = device_reset_work->hdev;
781 u32 flags;
782 int rc;
783
784 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
785
786 rc = hl_device_reset(hdev, flags);
787
788 if ((rc == -EBUSY) && !hdev->device_fini_pending) {
789 struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
790
791 if (ctx) {
792 /* The read refcount value should subtracted by one, because the read is
793 * protected with hl_get_compute_ctx().
794 */
795 dev_info(hdev->dev,
796 "Could not reset device (compute_ctx refcount %u). will try again in %u seconds",
797 kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC);
798 hl_ctx_put(ctx);
799 } else {
800 dev_info(hdev->dev, "Could not reset device. will try again in %u seconds",
801 HL_PENDING_RESET_PER_SEC);
802 }
803
804 queue_delayed_work(wq: hdev->reset_wq, dwork: &device_reset_work->reset_work,
805 delay: msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
806 }
807}
808
809static void device_release_watchdog_func(struct work_struct *work)
810{
811 struct hl_device_reset_work *watchdog_work =
812 container_of(work, struct hl_device_reset_work, reset_work.work);
813 struct hl_device *hdev = watchdog_work->hdev;
814 u32 flags;
815
816 dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n");
817
818 flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR;
819
820 hl_device_reset(hdev, flags);
821}
822
823/*
824 * device_early_init - do some early initialization for the habanalabs device
825 *
826 * @hdev: pointer to habanalabs device structure
827 *
828 * Install the relevant function pointers and call the early_init function,
829 * if such a function exists
830 */
831static int device_early_init(struct hl_device *hdev)
832{
833 int i, rc;
834 char workq_name[32];
835
836 switch (hdev->asic_type) {
837 case ASIC_GOYA:
838 goya_set_asic_funcs(hdev);
839 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
840 break;
841 case ASIC_GAUDI:
842 gaudi_set_asic_funcs(hdev);
843 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
844 break;
845 case ASIC_GAUDI_SEC:
846 gaudi_set_asic_funcs(hdev);
847 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
848 break;
849 case ASIC_GAUDI2:
850 gaudi2_set_asic_funcs(hdev);
851 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
852 break;
853 case ASIC_GAUDI2B:
854 gaudi2_set_asic_funcs(hdev);
855 strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
856 break;
857 case ASIC_GAUDI2C:
858 gaudi2_set_asic_funcs(hdev);
859 strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name));
860 break;
861 default:
862 dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
863 hdev->asic_type);
864 return -EINVAL;
865 }
866
867 rc = hdev->asic_funcs->early_init(hdev);
868 if (rc)
869 return rc;
870
871 rc = hl_asid_init(hdev);
872 if (rc)
873 goto early_fini;
874
875 if (hdev->asic_prop.completion_queues_count) {
876 hdev->cq_wq = kcalloc(n: hdev->asic_prop.completion_queues_count,
877 size: sizeof(struct workqueue_struct *),
878 GFP_KERNEL);
879 if (!hdev->cq_wq) {
880 rc = -ENOMEM;
881 goto asid_fini;
882 }
883 }
884
885 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
886 snprintf(buf: workq_name, size: 32, fmt: "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i);
887 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
888 if (hdev->cq_wq[i] == NULL) {
889 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
890 rc = -ENOMEM;
891 goto free_cq_wq;
892 }
893 }
894
895 snprintf(buf: workq_name, size: 32, fmt: "hl%u-events", hdev->cdev_idx);
896 hdev->eq_wq = create_singlethread_workqueue(workq_name);
897 if (hdev->eq_wq == NULL) {
898 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
899 rc = -ENOMEM;
900 goto free_cq_wq;
901 }
902
903 snprintf(buf: workq_name, size: 32, fmt: "hl%u-cs-completions", hdev->cdev_idx);
904 hdev->cs_cmplt_wq = alloc_workqueue(fmt: workq_name, flags: WQ_UNBOUND, max_active: 0);
905 if (!hdev->cs_cmplt_wq) {
906 dev_err(hdev->dev,
907 "Failed to allocate CS completions workqueue\n");
908 rc = -ENOMEM;
909 goto free_eq_wq;
910 }
911
912 snprintf(buf: workq_name, size: 32, fmt: "hl%u-ts-free-obj", hdev->cdev_idx);
913 hdev->ts_free_obj_wq = alloc_workqueue(fmt: workq_name, flags: WQ_UNBOUND, max_active: 0);
914 if (!hdev->ts_free_obj_wq) {
915 dev_err(hdev->dev,
916 "Failed to allocate Timestamp registration free workqueue\n");
917 rc = -ENOMEM;
918 goto free_cs_cmplt_wq;
919 }
920
921 snprintf(buf: workq_name, size: 32, fmt: "hl%u-prefetch", hdev->cdev_idx);
922 hdev->prefetch_wq = alloc_workqueue(fmt: workq_name, flags: WQ_UNBOUND, max_active: 0);
923 if (!hdev->prefetch_wq) {
924 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
925 rc = -ENOMEM;
926 goto free_ts_free_wq;
927 }
928
929 hdev->hl_chip_info = kzalloc(size: sizeof(struct hwmon_chip_info), GFP_KERNEL);
930 if (!hdev->hl_chip_info) {
931 rc = -ENOMEM;
932 goto free_prefetch_wq;
933 }
934
935 rc = hl_mmu_if_set_funcs(hdev);
936 if (rc)
937 goto free_chip_info;
938
939 hl_mem_mgr_init(dev: hdev->dev, mmg: &hdev->kernel_mem_mgr);
940
941 snprintf(buf: workq_name, size: 32, fmt: "hl%u_device_reset", hdev->cdev_idx);
942 hdev->reset_wq = create_singlethread_workqueue(workq_name);
943 if (!hdev->reset_wq) {
944 rc = -ENOMEM;
945 dev_err(hdev->dev, "Failed to create device reset WQ\n");
946 goto free_cb_mgr;
947 }
948
949 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending);
950 hdev->device_reset_work.hdev = hdev;
951 hdev->device_fini_pending = 0;
952
953 INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work,
954 device_release_watchdog_func);
955 hdev->device_release_watchdog_work.hdev = hdev;
956
957 mutex_init(&hdev->send_cpu_message_lock);
958 mutex_init(&hdev->debug_lock);
959 INIT_LIST_HEAD(list: &hdev->cs_mirror_list);
960 spin_lock_init(&hdev->cs_mirror_lock);
961 spin_lock_init(&hdev->reset_info.lock);
962 INIT_LIST_HEAD(list: &hdev->fpriv_list);
963 INIT_LIST_HEAD(list: &hdev->fpriv_ctrl_list);
964 mutex_init(&hdev->fpriv_list_lock);
965 mutex_init(&hdev->fpriv_ctrl_list_lock);
966 mutex_init(&hdev->clk_throttling.lock);
967
968 return 0;
969
970free_cb_mgr:
971 hl_mem_mgr_fini(mmg: &hdev->kernel_mem_mgr);
972 hl_mem_mgr_idr_destroy(mmg: &hdev->kernel_mem_mgr);
973free_chip_info:
974 kfree(objp: hdev->hl_chip_info);
975free_prefetch_wq:
976 destroy_workqueue(wq: hdev->prefetch_wq);
977free_ts_free_wq:
978 destroy_workqueue(wq: hdev->ts_free_obj_wq);
979free_cs_cmplt_wq:
980 destroy_workqueue(wq: hdev->cs_cmplt_wq);
981free_eq_wq:
982 destroy_workqueue(wq: hdev->eq_wq);
983free_cq_wq:
984 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
985 if (hdev->cq_wq[i])
986 destroy_workqueue(wq: hdev->cq_wq[i]);
987 kfree(objp: hdev->cq_wq);
988asid_fini:
989 hl_asid_fini(hdev);
990early_fini:
991 if (hdev->asic_funcs->early_fini)
992 hdev->asic_funcs->early_fini(hdev);
993
994 return rc;
995}
996
997/*
998 * device_early_fini - finalize all that was done in device_early_init
999 *
1000 * @hdev: pointer to habanalabs device structure
1001 *
1002 */
1003static void device_early_fini(struct hl_device *hdev)
1004{
1005 int i;
1006
1007 mutex_destroy(lock: &hdev->debug_lock);
1008 mutex_destroy(lock: &hdev->send_cpu_message_lock);
1009
1010 mutex_destroy(lock: &hdev->fpriv_list_lock);
1011 mutex_destroy(lock: &hdev->fpriv_ctrl_list_lock);
1012
1013 mutex_destroy(lock: &hdev->clk_throttling.lock);
1014
1015 hl_mem_mgr_fini(mmg: &hdev->kernel_mem_mgr);
1016 hl_mem_mgr_idr_destroy(mmg: &hdev->kernel_mem_mgr);
1017
1018 kfree(objp: hdev->hl_chip_info);
1019
1020 destroy_workqueue(wq: hdev->prefetch_wq);
1021 destroy_workqueue(wq: hdev->ts_free_obj_wq);
1022 destroy_workqueue(wq: hdev->cs_cmplt_wq);
1023 destroy_workqueue(wq: hdev->eq_wq);
1024 destroy_workqueue(wq: hdev->reset_wq);
1025
1026 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1027 destroy_workqueue(wq: hdev->cq_wq[i]);
1028 kfree(objp: hdev->cq_wq);
1029
1030 hl_asid_fini(hdev);
1031
1032 if (hdev->asic_funcs->early_fini)
1033 hdev->asic_funcs->early_fini(hdev);
1034}
1035
1036static bool is_pci_link_healthy(struct hl_device *hdev)
1037{
1038 u16 device_id;
1039
1040 if (!hdev->pdev)
1041 return false;
1042
1043 pci_read_config_word(dev: hdev->pdev, PCI_DEVICE_ID, val: &device_id);
1044
1045 return (device_id == hdev->pdev->device);
1046}
1047
1048static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
1049{
1050 struct asic_fixed_properties *prop = &hdev->asic_prop;
1051
1052 if (!prop->cpucp_info.eq_health_check_supported)
1053 return 0;
1054
1055 if (hdev->eq_heartbeat_received) {
1056 hdev->eq_heartbeat_received = false;
1057 } else {
1058 dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
1059 return -EIO;
1060 }
1061
1062 return 0;
1063}
1064
1065static void hl_device_heartbeat(struct work_struct *work)
1066{
1067 struct hl_device *hdev = container_of(work, struct hl_device,
1068 work_heartbeat.work);
1069 struct hl_info_fw_err_info info = {0};
1070 u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
1071
1072 /* Start heartbeat checks only after driver has enabled events from FW */
1073 if (!hl_device_operational(hdev, NULL) || !hdev->init_done)
1074 goto reschedule;
1075
1076 /*
1077 * For EQ health check need to check if driver received the heartbeat eq event
1078 * in order to validate the eq is working.
1079 * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule.
1080 */
1081 if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev)))
1082 goto reschedule;
1083
1084 if (hl_device_operational(hdev, NULL))
1085 dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n",
1086 is_pci_link_healthy(hdev) ? "healthy" : "broken");
1087
1088 info.err_type = HL_INFO_FW_HEARTBEAT_ERR;
1089 info.event_mask = &event_mask;
1090 hl_handle_fw_err(hdev, info: &info);
1091 hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask);
1092
1093 return;
1094
1095reschedule:
1096 /*
1097 * prev_reset_trigger tracks consecutive fatal h/w errors until first
1098 * heartbeat immediately post reset.
1099 * If control reached here, then at least one heartbeat work has been
1100 * scheduled since last reset/init cycle.
1101 * So if the device is not already in reset cycle, reset the flag
1102 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
1103 * status for at least one heartbeat. From this point driver restarts
1104 * tracking future consecutive fatal errors.
1105 */
1106 if (!hdev->reset_info.in_reset)
1107 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
1108
1109 schedule_delayed_work(dwork: &hdev->work_heartbeat,
1110 delay: usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
1111}
1112
1113/*
1114 * device_late_init - do late stuff initialization for the habanalabs device
1115 *
1116 * @hdev: pointer to habanalabs device structure
1117 *
1118 * Do stuff that either needs the device H/W queues to be active or needs
1119 * to happen after all the rest of the initialization is finished
1120 */
1121static int device_late_init(struct hl_device *hdev)
1122{
1123 int rc;
1124
1125 if (hdev->asic_funcs->late_init) {
1126 rc = hdev->asic_funcs->late_init(hdev);
1127 if (rc) {
1128 dev_err(hdev->dev,
1129 "failed late initialization for the H/W\n");
1130 return rc;
1131 }
1132 }
1133
1134 hdev->high_pll = hdev->asic_prop.high_pll;
1135
1136 if (hdev->heartbeat) {
1137 /*
1138 * Before scheduling the heartbeat driver will check if eq event has received.
1139 * for the first schedule we need to set the indication as true then for the next
1140 * one this indication will be true only if eq event was sent by FW.
1141 */
1142 hdev->eq_heartbeat_received = true;
1143
1144 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
1145
1146 schedule_delayed_work(dwork: &hdev->work_heartbeat,
1147 delay: usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
1148 }
1149
1150 hdev->late_init_done = true;
1151
1152 return 0;
1153}
1154
1155/*
1156 * device_late_fini - finalize all that was done in device_late_init
1157 *
1158 * @hdev: pointer to habanalabs device structure
1159 *
1160 */
1161static void device_late_fini(struct hl_device *hdev)
1162{
1163 if (!hdev->late_init_done)
1164 return;
1165
1166 if (hdev->heartbeat)
1167 cancel_delayed_work_sync(dwork: &hdev->work_heartbeat);
1168
1169 if (hdev->asic_funcs->late_fini)
1170 hdev->asic_funcs->late_fini(hdev);
1171
1172 hdev->late_init_done = false;
1173}
1174
1175int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
1176{
1177 u64 max_power, curr_power, dc_power, dividend, divisor;
1178 int rc;
1179
1180 max_power = hdev->max_power;
1181 dc_power = hdev->asic_prop.dc_power_default;
1182 divisor = max_power - dc_power;
1183 if (!divisor) {
1184 dev_warn(hdev->dev, "device utilization is not supported\n");
1185 return -EOPNOTSUPP;
1186 }
1187 rc = hl_fw_cpucp_power_get(hdev, power: &curr_power);
1188
1189 if (rc)
1190 return rc;
1191
1192 curr_power = clamp(curr_power, dc_power, max_power);
1193
1194 dividend = (curr_power - dc_power) * 100;
1195 *utilization = (u32) div_u64(dividend, divisor);
1196
1197 return 0;
1198}
1199
1200int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable)
1201{
1202 int rc = 0;
1203
1204 mutex_lock(&hdev->debug_lock);
1205
1206 if (!enable) {
1207 if (!hdev->in_debug) {
1208 dev_err(hdev->dev,
1209 "Failed to disable debug mode because device was not in debug mode\n");
1210 rc = -EFAULT;
1211 goto out;
1212 }
1213
1214 if (!hdev->reset_info.hard_reset_pending)
1215 hdev->asic_funcs->halt_coresight(hdev, ctx);
1216
1217 hdev->in_debug = 0;
1218
1219 goto out;
1220 }
1221
1222 if (hdev->in_debug) {
1223 dev_err(hdev->dev,
1224 "Failed to enable debug mode because device is already in debug mode\n");
1225 rc = -EFAULT;
1226 goto out;
1227 }
1228
1229 hdev->in_debug = 1;
1230
1231out:
1232 mutex_unlock(lock: &hdev->debug_lock);
1233
1234 return rc;
1235}
1236
1237static void take_release_locks(struct hl_device *hdev)
1238{
1239 /* Flush anyone that is inside the critical section of enqueue
1240 * jobs to the H/W
1241 */
1242 hdev->asic_funcs->hw_queues_lock(hdev);
1243 hdev->asic_funcs->hw_queues_unlock(hdev);
1244
1245 /* Flush processes that are sending message to CPU */
1246 mutex_lock(&hdev->send_cpu_message_lock);
1247 mutex_unlock(lock: &hdev->send_cpu_message_lock);
1248
1249 /* Flush anyone that is inside device open */
1250 mutex_lock(&hdev->fpriv_list_lock);
1251 mutex_unlock(lock: &hdev->fpriv_list_lock);
1252 mutex_lock(&hdev->fpriv_ctrl_list_lock);
1253 mutex_unlock(lock: &hdev->fpriv_ctrl_list_lock);
1254}
1255
1256static void hl_abort_waiting_for_completions(struct hl_device *hdev)
1257{
1258 hl_abort_waiting_for_cs_completions(hdev);
1259
1260 /* Release all pending user interrupts, each pending user interrupt
1261 * holds a reference to a user context.
1262 */
1263 hl_release_pending_user_interrupts(hdev);
1264}
1265
1266static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
1267 bool skip_wq_flush)
1268{
1269 if (hard_reset)
1270 device_late_fini(hdev);
1271
1272 /*
1273 * Halt the engines and disable interrupts so we won't get any more
1274 * completions from H/W and we won't have any accesses from the
1275 * H/W to the host machine
1276 */
1277 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
1278
1279 /* Go over all the queues, release all CS and their jobs */
1280 hl_cs_rollback_all(hdev, skip_wq_flush);
1281
1282 /* flush the MMU prefetch workqueue */
1283 flush_workqueue(hdev->prefetch_wq);
1284
1285 hl_abort_waiting_for_completions(hdev);
1286}
1287
1288/*
1289 * hl_device_suspend - initiate device suspend
1290 *
1291 * @hdev: pointer to habanalabs device structure
1292 *
1293 * Puts the hw in the suspend state (all asics).
1294 * Returns 0 for success or an error on failure.
1295 * Called at driver suspend.
1296 */
1297int hl_device_suspend(struct hl_device *hdev)
1298{
1299 int rc;
1300
1301 pci_save_state(dev: hdev->pdev);
1302
1303 /* Block future CS/VM/JOB completion operations */
1304 spin_lock(lock: &hdev->reset_info.lock);
1305 if (hdev->reset_info.in_reset) {
1306 spin_unlock(lock: &hdev->reset_info.lock);
1307 dev_err(hdev->dev, "Can't suspend while in reset\n");
1308 return -EIO;
1309 }
1310 hdev->reset_info.in_reset = 1;
1311 spin_unlock(lock: &hdev->reset_info.lock);
1312
1313 /* This blocks all other stuff that is not blocked by in_reset */
1314 hdev->disabled = true;
1315
1316 take_release_locks(hdev);
1317
1318 rc = hdev->asic_funcs->suspend(hdev);
1319 if (rc)
1320 dev_err(hdev->dev,
1321 "Failed to disable PCI access of device CPU\n");
1322
1323 /* Shut down the device */
1324 pci_disable_device(dev: hdev->pdev);
1325 pci_set_power_state(dev: hdev->pdev, PCI_D3hot);
1326
1327 return 0;
1328}
1329
1330/*
1331 * hl_device_resume - initiate device resume
1332 *
1333 * @hdev: pointer to habanalabs device structure
1334 *
1335 * Bring the hw back to operating state (all asics).
1336 * Returns 0 for success or an error on failure.
1337 * Called at driver resume.
1338 */
1339int hl_device_resume(struct hl_device *hdev)
1340{
1341 int rc;
1342
1343 pci_set_power_state(dev: hdev->pdev, PCI_D0);
1344 pci_restore_state(dev: hdev->pdev);
1345 rc = pci_enable_device_mem(dev: hdev->pdev);
1346 if (rc) {
1347 dev_err(hdev->dev,
1348 "Failed to enable PCI device in resume\n");
1349 return rc;
1350 }
1351
1352 pci_set_master(dev: hdev->pdev);
1353
1354 rc = hdev->asic_funcs->resume(hdev);
1355 if (rc) {
1356 dev_err(hdev->dev, "Failed to resume device after suspend\n");
1357 goto disable_device;
1358 }
1359
1360
1361 /* 'in_reset' was set to true during suspend, now we must clear it in order
1362 * for hard reset to be performed
1363 */
1364 spin_lock(lock: &hdev->reset_info.lock);
1365 hdev->reset_info.in_reset = 0;
1366 spin_unlock(lock: &hdev->reset_info.lock);
1367
1368 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
1369 if (rc) {
1370 dev_err(hdev->dev, "Failed to reset device during resume\n");
1371 goto disable_device;
1372 }
1373
1374 return 0;
1375
1376disable_device:
1377 pci_disable_device(dev: hdev->pdev);
1378
1379 return rc;
1380}
1381
1382static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
1383{
1384 struct task_struct *task = NULL;
1385 struct list_head *hpriv_list;
1386 struct hl_fpriv *hpriv;
1387 struct mutex *hpriv_lock;
1388 u32 pending_cnt;
1389
1390 hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
1391 hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1392
1393 /* Giving time for user to close FD, and for processes that are inside
1394 * hl_device_open to finish
1395 */
1396 if (!list_empty(head: hpriv_list))
1397 ssleep(seconds: 1);
1398
1399 if (timeout) {
1400 pending_cnt = timeout;
1401 } else {
1402 if (hdev->process_kill_trial_cnt) {
1403 /* Processes have been already killed */
1404 pending_cnt = 1;
1405 goto wait_for_processes;
1406 } else {
1407 /* Wait a small period after process kill */
1408 pending_cnt = HL_PENDING_RESET_PER_SEC;
1409 }
1410 }
1411
1412 mutex_lock(hpriv_lock);
1413
1414 /* This section must be protected because we are dereferencing
1415 * pointers that are freed if the process exits
1416 */
1417 list_for_each_entry(hpriv, hpriv_list, dev_node) {
1418 task = get_pid_task(pid: hpriv->taskpid, PIDTYPE_PID);
1419 if (task) {
1420 dev_info(hdev->dev, "Killing user process pid=%d\n",
1421 task_pid_nr(task));
1422 send_sig(SIGKILL, task, 1);
1423 usleep_range(min: 1000, max: 10000);
1424
1425 put_task_struct(t: task);
1426 } else {
1427 dev_dbg(hdev->dev,
1428 "Can't get task struct for user process %d, process was killed from outside the driver\n",
1429 pid_nr(hpriv->taskpid));
1430 }
1431 }
1432
1433 mutex_unlock(lock: hpriv_lock);
1434
1435 /*
1436 * We killed the open users, but that doesn't mean they are closed.
1437 * It could be that they are running a long cleanup phase in the driver
1438 * e.g. MMU unmappings, or running other long teardown flow even before
1439 * our cleanup.
1440 * Therefore we need to wait again to make sure they are closed before
1441 * continuing with the reset.
1442 */
1443
1444wait_for_processes:
1445 while ((!list_empty(head: hpriv_list)) && (pending_cnt)) {
1446 dev_dbg(hdev->dev,
1447 "Waiting for all unmap operations to finish before hard reset\n");
1448
1449 pending_cnt--;
1450
1451 ssleep(seconds: 1);
1452 }
1453
1454 /* All processes exited successfully */
1455 if (list_empty(head: hpriv_list))
1456 return 0;
1457
1458 /* Give up waiting for processes to exit */
1459 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
1460 return -ETIME;
1461
1462 hdev->process_kill_trial_cnt++;
1463
1464 return -EBUSY;
1465}
1466
1467static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
1468{
1469 struct list_head *hpriv_list;
1470 struct hl_fpriv *hpriv;
1471 struct mutex *hpriv_lock;
1472
1473 hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
1474 hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
1475
1476 mutex_lock(hpriv_lock);
1477 list_for_each_entry(hpriv, hpriv_list, dev_node)
1478 hpriv->hdev = NULL;
1479 mutex_unlock(lock: hpriv_lock);
1480}
1481
1482static void send_disable_pci_access(struct hl_device *hdev, u32 flags)
1483{
1484 /* If reset is due to heartbeat, device CPU is no responsive in
1485 * which case no point sending PCI disable message to it.
1486 */
1487 if ((flags & HL_DRV_RESET_HARD) &&
1488 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
1489 /* Disable PCI access from device F/W so he won't send
1490 * us additional interrupts. We disable MSI/MSI-X at
1491 * the halt_engines function and we can't have the F/W
1492 * sending us interrupts after that. We need to disable
1493 * the access here because if the device is marked
1494 * disable, the message won't be send. Also, in case
1495 * of heartbeat, the device CPU is marked as disable
1496 * so this message won't be sent
1497 */
1498 if (hl_fw_send_pci_access_msg(hdev, opcode: CPUCP_PACKET_DISABLE_PCI_ACCESS, value: 0x0)) {
1499 dev_warn(hdev->dev, "Failed to disable FW's PCI access\n");
1500 return;
1501 }
1502
1503 /* verify that last EQs are handled before disabled is set */
1504 if (hdev->cpu_queues_enable)
1505 synchronize_irq(irq: pci_irq_vector(dev: hdev->pdev,
1506 nr: hdev->asic_prop.eq_interrupt_id));
1507 }
1508}
1509
1510static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
1511{
1512 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
1513
1514 /* No consecutive mechanism when user context exists */
1515 if (hdev->is_compute_ctx_active)
1516 return;
1517
1518 /*
1519 * 'reset cause' is being updated here, because getting here
1520 * means that it's the 1st time and the last time we're here
1521 * ('in_reset' makes sure of it). This makes sure that
1522 * 'reset_cause' will continue holding its 1st recorded reason!
1523 */
1524 if (flags & HL_DRV_RESET_HEARTBEAT) {
1525 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
1526 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
1527 } else if (flags & HL_DRV_RESET_TDR) {
1528 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR;
1529 cur_reset_trigger = HL_DRV_RESET_TDR;
1530 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
1531 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
1532 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
1533 } else {
1534 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
1535 }
1536
1537 /*
1538 * If reset cause is same twice, then reset_trigger_repeated
1539 * is set and if this reset is due to a fatal FW error
1540 * device is set to an unstable state.
1541 */
1542 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
1543 hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
1544 hdev->reset_info.reset_trigger_repeated = 0;
1545 } else {
1546 hdev->reset_info.reset_trigger_repeated = 1;
1547 }
1548}
1549
1550/*
1551 * hl_device_reset - reset the device
1552 *
1553 * @hdev: pointer to habanalabs device structure
1554 * @flags: reset flags.
1555 *
1556 * Block future CS and wait for pending CS to be enqueued
1557 * Call ASIC H/W fini
1558 * Flush all completions
1559 * Re-initialize all internal data structures
1560 * Call ASIC H/W init, late_init
1561 * Test queues
1562 * Enable device
1563 *
1564 * Returns 0 for success or an error on failure.
1565 */
1566int hl_device_reset(struct hl_device *hdev, u32 flags)
1567{
1568 bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release,
1569 schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread;
1570 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
1571 struct hl_ctx *ctx;
1572 int i, rc, hw_fini_rc;
1573
1574 if (!hdev->init_done) {
1575 dev_err(hdev->dev, "Can't reset before initialization is done\n");
1576 return 0;
1577 }
1578
1579 hard_reset = !!(flags & HL_DRV_RESET_HARD);
1580 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
1581 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
1582 from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE);
1583 delay_reset = !!(flags & HL_DRV_RESET_DELAY);
1584 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
1585 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release;
1586
1587 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) {
1588 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n");
1589 return 0;
1590 }
1591
1592 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
1593 dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n");
1594 hard_reset = true;
1595 }
1596
1597 if (reset_upon_device_release) {
1598 if (hard_reset) {
1599 dev_crit(hdev->dev,
1600 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
1601 return -EINVAL;
1602 }
1603
1604 goto do_reset;
1605 }
1606
1607 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
1608 dev_dbg(hdev->dev,
1609 "asic doesn't allow inference soft reset - do hard-reset instead\n");
1610 hard_reset = true;
1611 }
1612
1613do_reset:
1614 /* Re-entry of reset thread */
1615 if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
1616 goto kill_processes;
1617
1618 /*
1619 * Prevent concurrency in this function - only one reset should be
1620 * done at any given time. We need to perform this only if we didn't
1621 * get here from a dedicated hard reset thread.
1622 */
1623 if (!from_hard_reset_thread) {
1624 /* Block future CS/VM/JOB completion operations */
1625 spin_lock(lock: &hdev->reset_info.lock);
1626 if (hdev->reset_info.in_reset) {
1627 /* We allow scheduling of a hard reset only during a compute reset */
1628 if (hard_reset && hdev->reset_info.in_compute_reset)
1629 hdev->reset_info.hard_reset_schedule_flags = flags;
1630 spin_unlock(lock: &hdev->reset_info.lock);
1631 return 0;
1632 }
1633
1634 /* This still allows the completion of some KDMA ops
1635 * Update this before in_reset because in_compute_reset implies we are in reset
1636 */
1637 hdev->reset_info.in_compute_reset = !hard_reset;
1638
1639 hdev->reset_info.in_reset = 1;
1640
1641 spin_unlock(lock: &hdev->reset_info.lock);
1642
1643 /* Cancel the device release watchdog work if required.
1644 * In case of reset-upon-device-release while the release watchdog work is
1645 * scheduled due to a hard-reset, do hard-reset instead of compute-reset.
1646 */
1647 if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) {
1648 struct hl_device_reset_work *watchdog_work =
1649 &hdev->device_release_watchdog_work;
1650
1651 hdev->reset_info.watchdog_active = 0;
1652 if (!from_watchdog_thread)
1653 cancel_delayed_work_sync(dwork: &watchdog_work->reset_work);
1654
1655 if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) {
1656 hdev->reset_info.in_compute_reset = 0;
1657 flags |= HL_DRV_RESET_HARD;
1658 flags &= ~HL_DRV_RESET_DEV_RELEASE;
1659 hard_reset = true;
1660 }
1661 }
1662
1663 if (delay_reset)
1664 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
1665
1666escalate_reset_flow:
1667 handle_reset_trigger(hdev, flags);
1668 send_disable_pci_access(hdev, flags);
1669
1670 /* This also blocks future CS/VM/JOB completion operations */
1671 hdev->disabled = true;
1672
1673 take_release_locks(hdev);
1674
1675 if (hard_reset)
1676 dev_info(hdev->dev, "Going to reset device\n");
1677 else if (reset_upon_device_release)
1678 dev_dbg(hdev->dev, "Going to reset device after release by user\n");
1679 else
1680 dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
1681 }
1682
1683 if ((hard_reset) && (!from_hard_reset_thread)) {
1684 hdev->reset_info.hard_reset_pending = true;
1685
1686 hdev->process_kill_trial_cnt = 0;
1687
1688 hdev->device_reset_work.flags = flags;
1689
1690 /*
1691 * Because the reset function can't run from heartbeat work,
1692 * we need to call the reset function from a dedicated work.
1693 */
1694 queue_delayed_work(wq: hdev->reset_wq, dwork: &hdev->device_reset_work.reset_work, delay: 0);
1695
1696 return 0;
1697 }
1698
1699 cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush: from_dev_release);
1700
1701kill_processes:
1702 if (hard_reset) {
1703 /* Kill processes here after CS rollback. This is because the
1704 * process can't really exit until all its CSs are done, which
1705 * is what we do in cs rollback
1706 */
1707 rc = device_kill_open_processes(hdev, timeout: 0, control_dev: false);
1708
1709 if (rc == -EBUSY) {
1710 if (hdev->device_fini_pending) {
1711 dev_crit(hdev->dev,
1712 "%s Failed to kill all open processes, stopping hard reset\n",
1713 dev_name(&(hdev)->pdev->dev));
1714 goto out_err;
1715 }
1716
1717 /* signal reset thread to reschedule */
1718 return rc;
1719 }
1720
1721 if (rc) {
1722 dev_crit(hdev->dev,
1723 "%s Failed to kill all open processes, stopping hard reset\n",
1724 dev_name(&(hdev)->pdev->dev));
1725 goto out_err;
1726 }
1727
1728 /* Flush the Event queue workers to make sure no other thread is
1729 * reading or writing to registers during the reset
1730 */
1731 flush_workqueue(hdev->eq_wq);
1732 }
1733
1734 /* Reset the H/W. It will be in idle state after this returns */
1735 hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
1736
1737 if (hard_reset) {
1738 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
1739
1740 /* Release kernel context */
1741 if (hdev->kernel_ctx && hl_ctx_put(ctx: hdev->kernel_ctx) == 1)
1742 hdev->kernel_ctx = NULL;
1743
1744 hl_vm_fini(hdev);
1745 hl_mmu_fini(hdev);
1746 hl_eq_reset(hdev, q: &hdev->event_queue);
1747 }
1748
1749 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1750 hl_hw_queue_reset(hdev, hard_reset);
1751 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1752 hl_cq_reset(hdev, q: &hdev->completion_queue[i]);
1753
1754 /* Make sure the context switch phase will run again */
1755 ctx = hl_get_compute_ctx(hdev);
1756 if (ctx) {
1757 atomic_set(v: &ctx->thread_ctx_switch_token, i: 1);
1758 ctx->thread_ctx_switch_wait_token = 0;
1759 hl_ctx_put(ctx);
1760 }
1761
1762 if (hw_fini_rc) {
1763 rc = hw_fini_rc;
1764 goto out_err;
1765 }
1766 /* Finished tear-down, starting to re-initialize */
1767
1768 if (hard_reset) {
1769 hdev->device_cpu_disabled = false;
1770 hdev->reset_info.hard_reset_pending = false;
1771
1772 /*
1773 * Put the device in an unusable state if there are 2 back to back resets due to
1774 * fatal errors.
1775 */
1776 if (hdev->reset_info.reset_trigger_repeated &&
1777 (hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR ||
1778 hdev->reset_info.prev_reset_trigger ==
1779 HL_DRV_RESET_HEARTBEAT)) {
1780 dev_crit(hdev->dev,
1781 "%s Consecutive fatal errors, stopping hard reset\n",
1782 dev_name(&(hdev)->pdev->dev));
1783 rc = -EIO;
1784 goto out_err;
1785 }
1786
1787 if (hdev->kernel_ctx) {
1788 dev_crit(hdev->dev,
1789 "%s kernel ctx was alive during hard reset, something is terribly wrong\n",
1790 dev_name(&(hdev)->pdev->dev));
1791 rc = -EBUSY;
1792 goto out_err;
1793 }
1794
1795 rc = hl_mmu_init(hdev);
1796 if (rc) {
1797 dev_err(hdev->dev,
1798 "Failed to initialize MMU S/W after hard reset\n");
1799 goto out_err;
1800 }
1801
1802 /* Allocate the kernel context */
1803 hdev->kernel_ctx = kzalloc(size: sizeof(*hdev->kernel_ctx),
1804 GFP_KERNEL);
1805 if (!hdev->kernel_ctx) {
1806 rc = -ENOMEM;
1807 hl_mmu_fini(hdev);
1808 goto out_err;
1809 }
1810
1811 hdev->is_compute_ctx_active = false;
1812
1813 rc = hl_ctx_init(hdev, ctx: hdev->kernel_ctx, is_kernel_ctx: true);
1814 if (rc) {
1815 dev_err(hdev->dev,
1816 "failed to init kernel ctx in hard reset\n");
1817 kfree(objp: hdev->kernel_ctx);
1818 hdev->kernel_ctx = NULL;
1819 hl_mmu_fini(hdev);
1820 goto out_err;
1821 }
1822 }
1823
1824 /* Device is now enabled as part of the initialization requires
1825 * communication with the device firmware to get information that
1826 * is required for the initialization itself
1827 */
1828 hdev->disabled = false;
1829
1830 /* F/W security enabled indication might be updated after hard-reset */
1831 if (hard_reset) {
1832 rc = hl_fw_read_preboot_status(hdev);
1833 if (rc)
1834 goto out_err;
1835 }
1836
1837 rc = hdev->asic_funcs->hw_init(hdev);
1838 if (rc) {
1839 dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
1840 goto out_err;
1841 }
1842
1843 /* If device is not idle fail the reset process */
1844 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
1845 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
1846 print_idle_status_mask(hdev, message: "device is not idle after reset", idle_mask);
1847 rc = -EIO;
1848 goto out_err;
1849 }
1850
1851 /* Check that the communication with the device is working */
1852 rc = hdev->asic_funcs->test_queues(hdev);
1853 if (rc) {
1854 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
1855 goto out_err;
1856 }
1857
1858 if (hard_reset) {
1859 rc = device_late_init(hdev);
1860 if (rc) {
1861 dev_err(hdev->dev, "Failed late init after hard reset\n");
1862 goto out_err;
1863 }
1864
1865 rc = hl_vm_init(hdev);
1866 if (rc) {
1867 dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
1868 goto out_err;
1869 }
1870
1871 if (!hdev->asic_prop.fw_security_enabled)
1872 hl_fw_set_max_power(hdev);
1873 } else {
1874 rc = hdev->asic_funcs->compute_reset_late_init(hdev);
1875 if (rc) {
1876 if (reset_upon_device_release)
1877 dev_err(hdev->dev,
1878 "Failed late init in reset after device release\n");
1879 else
1880 dev_err(hdev->dev, "Failed late init after compute reset\n");
1881 goto out_err;
1882 }
1883 }
1884
1885 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1886 if (rc) {
1887 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc);
1888 goto out_err;
1889 }
1890
1891 spin_lock(lock: &hdev->reset_info.lock);
1892 hdev->reset_info.in_compute_reset = 0;
1893
1894 /* Schedule hard reset only if requested and if not already in hard reset.
1895 * We keep 'in_reset' enabled, so no other reset can go in during the hard
1896 * reset schedule
1897 */
1898 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
1899 schedule_hard_reset = true;
1900 else
1901 hdev->reset_info.in_reset = 0;
1902
1903 spin_unlock(lock: &hdev->reset_info.lock);
1904
1905 hdev->reset_info.needs_reset = false;
1906
1907 if (hard_reset)
1908 dev_info(hdev->dev,
1909 "Successfully finished resetting the %s device\n",
1910 dev_name(&(hdev)->pdev->dev));
1911 else
1912 dev_dbg(hdev->dev,
1913 "Successfully finished resetting the %s device\n",
1914 dev_name(&(hdev)->pdev->dev));
1915
1916 if (hard_reset) {
1917 hdev->reset_info.hard_reset_cnt++;
1918
1919 /* After reset is done, we are ready to receive events from
1920 * the F/W. We can't do it before because we will ignore events
1921 * and if those events are fatal, we won't know about it and
1922 * the device will be operational although it shouldn't be
1923 */
1924 hdev->asic_funcs->enable_events_from_fw(hdev);
1925 } else {
1926 if (!reset_upon_device_release)
1927 hdev->reset_info.compute_reset_cnt++;
1928
1929 if (schedule_hard_reset) {
1930 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
1931 flags = hdev->reset_info.hard_reset_schedule_flags;
1932 hdev->reset_info.hard_reset_schedule_flags = 0;
1933 hard_reset = true;
1934 goto escalate_reset_flow;
1935 }
1936 }
1937
1938 return 0;
1939
1940out_err:
1941 hdev->disabled = true;
1942
1943 spin_lock(lock: &hdev->reset_info.lock);
1944 hdev->reset_info.in_compute_reset = 0;
1945
1946 if (hard_reset) {
1947 dev_err(hdev->dev,
1948 "%s Failed to reset! Device is NOT usable\n",
1949 dev_name(&(hdev)->pdev->dev));
1950 hdev->reset_info.hard_reset_cnt++;
1951 } else {
1952 if (reset_upon_device_release) {
1953 dev_err(hdev->dev, "Failed to reset device after user release\n");
1954 flags &= ~HL_DRV_RESET_DEV_RELEASE;
1955 } else {
1956 dev_err(hdev->dev, "Failed to do compute reset\n");
1957 hdev->reset_info.compute_reset_cnt++;
1958 }
1959
1960 spin_unlock(lock: &hdev->reset_info.lock);
1961 flags |= HL_DRV_RESET_HARD;
1962 hard_reset = true;
1963 goto escalate_reset_flow;
1964 }
1965
1966 hdev->reset_info.in_reset = 0;
1967
1968 spin_unlock(lock: &hdev->reset_info.lock);
1969
1970 return rc;
1971}
1972
1973/*
1974 * hl_device_cond_reset() - conditionally reset the device.
1975 * @hdev: pointer to habanalabs device structure.
1976 * @reset_flags: reset flags.
1977 * @event_mask: events to notify user about.
1978 *
1979 * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device
1980 * unless another reset precedes it.
1981 */
1982int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
1983{
1984 struct hl_ctx *ctx = NULL;
1985
1986 /* F/W reset cannot be postponed */
1987 if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW)
1988 goto device_reset;
1989
1990 /* Device release watchdog is relevant only if user exists and gets a reset notification */
1991 if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) {
1992 dev_err(hdev->dev, "Resetting device without a reset indication to user\n");
1993 goto device_reset;
1994 }
1995
1996 ctx = hl_get_compute_ctx(hdev);
1997 if (!ctx)
1998 goto device_reset;
1999
2000 /*
2001 * There is no point in postponing the reset if user is not registered for events.
2002 * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it
2003 * just implies that user has unregistered as part of handling a previous event. In this
2004 * case an immediate reset is not required.
2005 */
2006 if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active)
2007 goto device_reset;
2008
2009 /* Schedule the device release watchdog work unless reset is already in progress or if the
2010 * work is already scheduled.
2011 */
2012 spin_lock(lock: &hdev->reset_info.lock);
2013 if (hdev->reset_info.in_reset) {
2014 spin_unlock(lock: &hdev->reset_info.lock);
2015 goto device_reset;
2016 }
2017
2018 if (hdev->reset_info.watchdog_active) {
2019 hdev->device_release_watchdog_work.flags |= flags;
2020 goto out;
2021 }
2022
2023 hdev->device_release_watchdog_work.flags = flags;
2024 dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n",
2025 hdev->device_release_watchdog_timeout_sec);
2026 schedule_delayed_work(dwork: &hdev->device_release_watchdog_work.reset_work,
2027 delay: msecs_to_jiffies(m: hdev->device_release_watchdog_timeout_sec * 1000));
2028 hdev->reset_info.watchdog_active = 1;
2029out:
2030 spin_unlock(lock: &hdev->reset_info.lock);
2031
2032 hl_notifier_event_send_all(hdev, event_mask);
2033
2034 hl_ctx_put(ctx);
2035
2036 hl_abort_waiting_for_completions(hdev);
2037
2038 return 0;
2039
2040device_reset:
2041 if (event_mask)
2042 hl_notifier_event_send_all(hdev, event_mask);
2043 if (ctx)
2044 hl_ctx_put(ctx);
2045
2046 return hl_device_reset(hdev, flags: flags | HL_DRV_RESET_HARD);
2047}
2048
2049static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
2050{
2051 mutex_lock(&notifier_event->lock);
2052 notifier_event->events_mask |= event_mask;
2053
2054 if (notifier_event->eventfd)
2055 eventfd_signal(ctx: notifier_event->eventfd);
2056
2057 mutex_unlock(lock: &notifier_event->lock);
2058}
2059
2060/*
2061 * hl_notifier_event_send_all - notify all user processes via eventfd
2062 *
2063 * @hdev: pointer to habanalabs device structure
2064 * @event_mask: the occurred event/s
2065 * Returns 0 for success or an error on failure.
2066 */
2067void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
2068{
2069 struct hl_fpriv *hpriv;
2070
2071 if (!event_mask) {
2072 dev_warn(hdev->dev, "Skip sending zero event");
2073 return;
2074 }
2075
2076 mutex_lock(&hdev->fpriv_list_lock);
2077
2078 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
2079 hl_notifier_event_send(notifier_event: &hpriv->notifier_event, event_mask);
2080
2081 mutex_unlock(lock: &hdev->fpriv_list_lock);
2082}
2083
2084/*
2085 * hl_device_init - main initialization function for habanalabs device
2086 *
2087 * @hdev: pointer to habanalabs device structure
2088 *
2089 * Allocate an id for the device, do early initialization and then call the
2090 * ASIC specific initialization functions. Finally, create the cdev and the
2091 * Linux device to expose it to the user
2092 */
2093int hl_device_init(struct hl_device *hdev)
2094{
2095 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
2096 struct hl_ts_free_jobs *free_jobs_data;
2097 bool expose_interfaces_on_err = false;
2098 void *p;
2099
2100 /* Initialize ASIC function pointers and perform early init */
2101 rc = device_early_init(hdev);
2102 if (rc)
2103 goto out_disabled;
2104
2105 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
2106 hdev->asic_prop.user_interrupt_count;
2107
2108 if (user_interrupt_cnt) {
2109 hdev->user_interrupt = kcalloc(n: user_interrupt_cnt, size: sizeof(*hdev->user_interrupt),
2110 GFP_KERNEL);
2111 if (!hdev->user_interrupt) {
2112 rc = -ENOMEM;
2113 goto early_fini;
2114 }
2115
2116 /* Timestamp records supported only if CQ supported in device */
2117 if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
2118 for (i = 0 ; i < user_interrupt_cnt ; i++) {
2119 p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
2120 sizeof(struct timestamp_reg_free_node));
2121 if (!p) {
2122 rc = -ENOMEM;
2123 goto free_usr_intr_mem;
2124 }
2125 free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data;
2126 free_jobs_data->free_nodes_pool = p;
2127 free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
2128 free_jobs_data->next_avail_free_node_idx = 0;
2129 }
2130 }
2131 }
2132
2133 free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data;
2134 p = vzalloc(TIMESTAMP_FREE_NODES_NUM *
2135 sizeof(struct timestamp_reg_free_node));
2136 if (!p) {
2137 rc = -ENOMEM;
2138 goto free_usr_intr_mem;
2139 }
2140
2141 free_jobs_data->free_nodes_pool = p;
2142 free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM;
2143 free_jobs_data->next_avail_free_node_idx = 0;
2144
2145 /*
2146 * Start calling ASIC initialization. First S/W then H/W and finally
2147 * late init
2148 */
2149 rc = hdev->asic_funcs->sw_init(hdev);
2150 if (rc)
2151 goto free_common_usr_intr_mem;
2152
2153
2154 /* initialize completion structure for multi CS wait */
2155 hl_multi_cs_completion_init(hdev);
2156
2157 /*
2158 * Initialize the H/W queues. Must be done before hw_init, because
2159 * there the addresses of the kernel queue are being written to the
2160 * registers of the device
2161 */
2162 rc = hl_hw_queues_create(hdev);
2163 if (rc) {
2164 dev_err(hdev->dev, "failed to initialize kernel queues\n");
2165 goto sw_fini;
2166 }
2167
2168 cq_cnt = hdev->asic_prop.completion_queues_count;
2169
2170 /*
2171 * Initialize the completion queues. Must be done before hw_init,
2172 * because there the addresses of the completion queues are being
2173 * passed as arguments to request_irq
2174 */
2175 if (cq_cnt) {
2176 hdev->completion_queue = kcalloc(n: cq_cnt,
2177 size: sizeof(*hdev->completion_queue),
2178 GFP_KERNEL);
2179
2180 if (!hdev->completion_queue) {
2181 dev_err(hdev->dev,
2182 "failed to allocate completion queues\n");
2183 rc = -ENOMEM;
2184 goto hw_queues_destroy;
2185 }
2186 }
2187
2188 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
2189 rc = hl_cq_init(hdev, q: &hdev->completion_queue[i],
2190 hw_queue_id: hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
2191 if (rc) {
2192 dev_err(hdev->dev,
2193 "failed to initialize completion queue\n");
2194 goto cq_fini;
2195 }
2196 hdev->completion_queue[i].cq_idx = i;
2197 }
2198
2199 hdev->shadow_cs_queue = kcalloc(n: hdev->asic_prop.max_pending_cs,
2200 size: sizeof(struct hl_cs *), GFP_KERNEL);
2201 if (!hdev->shadow_cs_queue) {
2202 rc = -ENOMEM;
2203 goto cq_fini;
2204 }
2205
2206 /*
2207 * Initialize the event queue. Must be done before hw_init,
2208 * because there the address of the event queue is being
2209 * passed as argument to request_irq
2210 */
2211 rc = hl_eq_init(hdev, q: &hdev->event_queue);
2212 if (rc) {
2213 dev_err(hdev->dev, "failed to initialize event queue\n");
2214 goto free_shadow_cs_queue;
2215 }
2216
2217 /* MMU S/W must be initialized before kernel context is created */
2218 rc = hl_mmu_init(hdev);
2219 if (rc) {
2220 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
2221 goto eq_fini;
2222 }
2223
2224 /* Allocate the kernel context */
2225 hdev->kernel_ctx = kzalloc(size: sizeof(*hdev->kernel_ctx), GFP_KERNEL);
2226 if (!hdev->kernel_ctx) {
2227 rc = -ENOMEM;
2228 goto mmu_fini;
2229 }
2230
2231 hdev->is_compute_ctx_active = false;
2232
2233 hdev->asic_funcs->state_dump_init(hdev);
2234
2235 hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC;
2236
2237 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
2238
2239 rc = hl_debugfs_device_init(hdev);
2240 if (rc) {
2241 dev_err(hdev->dev, "failed to initialize debugfs entry structure\n");
2242 kfree(objp: hdev->kernel_ctx);
2243 goto mmu_fini;
2244 }
2245
2246 /* The debugfs entry structure is accessed in hl_ctx_init(), so it must be called after
2247 * hl_debugfs_device_init().
2248 */
2249 rc = hl_ctx_init(hdev, ctx: hdev->kernel_ctx, is_kernel_ctx: true);
2250 if (rc) {
2251 dev_err(hdev->dev, "failed to initialize kernel context\n");
2252 kfree(objp: hdev->kernel_ctx);
2253 goto debugfs_device_fini;
2254 }
2255
2256 rc = hl_cb_pool_init(hdev);
2257 if (rc) {
2258 dev_err(hdev->dev, "failed to initialize CB pool\n");
2259 goto release_ctx;
2260 }
2261
2262 rc = hl_dec_init(hdev);
2263 if (rc) {
2264 dev_err(hdev->dev, "Failed to initialize the decoder module\n");
2265 goto cb_pool_fini;
2266 }
2267
2268 /*
2269 * From this point, override rc (=0) in case of an error to allow debugging
2270 * (by adding char devices and creating sysfs/debugfs files as part of the error flow).
2271 */
2272 expose_interfaces_on_err = true;
2273
2274 /* Device is now enabled as part of the initialization requires
2275 * communication with the device firmware to get information that
2276 * is required for the initialization itself
2277 */
2278 hdev->disabled = false;
2279
2280 rc = hdev->asic_funcs->hw_init(hdev);
2281 if (rc) {
2282 dev_err(hdev->dev, "failed to initialize the H/W\n");
2283 rc = 0;
2284 goto out_disabled;
2285 }
2286
2287 /* Check that the communication with the device is working */
2288 rc = hdev->asic_funcs->test_queues(hdev);
2289 if (rc) {
2290 dev_err(hdev->dev, "Failed to detect if device is alive\n");
2291 rc = 0;
2292 goto out_disabled;
2293 }
2294
2295 rc = device_late_init(hdev);
2296 if (rc) {
2297 dev_err(hdev->dev, "Failed late initialization\n");
2298 rc = 0;
2299 goto out_disabled;
2300 }
2301
2302 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
2303 hdev->asic_name,
2304 hdev->asic_prop.dram_size / SZ_1G);
2305
2306 rc = hl_vm_init(hdev);
2307 if (rc) {
2308 dev_err(hdev->dev, "Failed to initialize memory module\n");
2309 rc = 0;
2310 goto out_disabled;
2311 }
2312
2313 /*
2314 * Expose devices and sysfs/debugfs files to user.
2315 * From here there is no need to expose them in case of an error.
2316 */
2317 expose_interfaces_on_err = false;
2318
2319 rc = drm_dev_register(dev: &hdev->drm, flags: 0);
2320 if (rc) {
2321 dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc);
2322 rc = 0;
2323 goto out_disabled;
2324 }
2325
2326 rc = cdev_sysfs_debugfs_add(hdev);
2327 if (rc) {
2328 dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n");
2329 rc = 0;
2330 goto out_disabled;
2331 }
2332
2333 /* Need to call this again because the max power might change,
2334 * depending on card type for certain ASICs
2335 */
2336 if (hdev->asic_prop.set_max_power_on_device_init &&
2337 !hdev->asic_prop.fw_security_enabled)
2338 hl_fw_set_max_power(hdev);
2339
2340 /*
2341 * hl_hwmon_init() must be called after device_late_init(), because only
2342 * there we get the information from the device about which
2343 * hwmon-related sensors the device supports.
2344 * Furthermore, it must be done after adding the device to the system.
2345 */
2346 rc = hl_hwmon_init(hdev);
2347 if (rc) {
2348 dev_err(hdev->dev, "Failed to initialize hwmon\n");
2349 rc = 0;
2350 goto out_disabled;
2351 }
2352
2353 dev_notice(hdev->dev,
2354 "Successfully added device %s to habanalabs driver\n",
2355 dev_name(&(hdev)->pdev->dev));
2356
2357 /* After initialization is done, we are ready to receive events from
2358 * the F/W. We can't do it before because we will ignore events and if
2359 * those events are fatal, we won't know about it and the device will
2360 * be operational although it shouldn't be
2361 */
2362 hdev->asic_funcs->enable_events_from_fw(hdev);
2363
2364 hdev->init_done = true;
2365
2366 return 0;
2367
2368cb_pool_fini:
2369 hl_cb_pool_fini(hdev);
2370release_ctx:
2371 if (hl_ctx_put(ctx: hdev->kernel_ctx) != 1)
2372 dev_err(hdev->dev,
2373 "kernel ctx is still alive on initialization failure\n");
2374debugfs_device_fini:
2375 hl_debugfs_device_fini(hdev);
2376mmu_fini:
2377 hl_mmu_fini(hdev);
2378eq_fini:
2379 hl_eq_fini(hdev, q: &hdev->event_queue);
2380free_shadow_cs_queue:
2381 kfree(objp: hdev->shadow_cs_queue);
2382cq_fini:
2383 for (i = 0 ; i < cq_ready_cnt ; i++)
2384 hl_cq_fini(hdev, q: &hdev->completion_queue[i]);
2385 kfree(objp: hdev->completion_queue);
2386hw_queues_destroy:
2387 hl_hw_queues_destroy(hdev);
2388sw_fini:
2389 hdev->asic_funcs->sw_fini(hdev);
2390free_common_usr_intr_mem:
2391 vfree(addr: hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
2392free_usr_intr_mem:
2393 if (user_interrupt_cnt) {
2394 for (i = 0 ; i < user_interrupt_cnt ; i++) {
2395 if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool)
2396 break;
2397 vfree(addr: hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
2398 }
2399 kfree(objp: hdev->user_interrupt);
2400 }
2401early_fini:
2402 device_early_fini(hdev);
2403out_disabled:
2404 hdev->disabled = true;
2405 if (expose_interfaces_on_err) {
2406 drm_dev_register(dev: &hdev->drm, flags: 0);
2407 cdev_sysfs_debugfs_add(hdev);
2408 }
2409
2410 pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n",
2411 hdev->cdev_idx, dev_name(&hdev->pdev->dev));
2412
2413 return rc;
2414}
2415
2416/*
2417 * hl_device_fini - main tear-down function for habanalabs device
2418 *
2419 * @hdev: pointer to habanalabs device structure
2420 *
2421 * Destroy the device, call ASIC fini functions and release the id
2422 */
2423void hl_device_fini(struct hl_device *hdev)
2424{
2425 u32 user_interrupt_cnt;
2426 bool device_in_reset;
2427 ktime_t timeout;
2428 u64 reset_sec;
2429 int i, rc;
2430
2431 dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev));
2432
2433 hdev->device_fini_pending = 1;
2434 flush_delayed_work(dwork: &hdev->device_reset_work.reset_work);
2435
2436 if (hdev->pldm)
2437 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
2438 else
2439 reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
2440
2441 /*
2442 * This function is competing with the reset function, so try to
2443 * take the reset atomic and if we are already in middle of reset,
2444 * wait until reset function is finished. Reset function is designed
2445 * to always finish. However, in Gaudi, because of all the network
2446 * ports, the hard reset could take between 10-30 seconds
2447 */
2448
2449 timeout = ktime_add_us(kt: ktime_get(), usec: reset_sec * 1000 * 1000);
2450
2451 spin_lock(lock: &hdev->reset_info.lock);
2452 device_in_reset = !!hdev->reset_info.in_reset;
2453 if (!device_in_reset)
2454 hdev->reset_info.in_reset = 1;
2455 spin_unlock(lock: &hdev->reset_info.lock);
2456
2457 while (device_in_reset) {
2458 usleep_range(min: 50, max: 200);
2459
2460 spin_lock(lock: &hdev->reset_info.lock);
2461 device_in_reset = !!hdev->reset_info.in_reset;
2462 if (!device_in_reset)
2463 hdev->reset_info.in_reset = 1;
2464 spin_unlock(lock: &hdev->reset_info.lock);
2465
2466 if (ktime_compare(cmp1: ktime_get(), cmp2: timeout) > 0) {
2467 dev_crit(hdev->dev,
2468 "%s Failed to remove device because reset function did not finish\n",
2469 dev_name(&(hdev)->pdev->dev));
2470 return;
2471 }
2472 }
2473
2474 cancel_delayed_work_sync(dwork: &hdev->device_release_watchdog_work.reset_work);
2475
2476 /* Disable PCI access from device F/W so it won't send us additional
2477 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
2478 * can't have the F/W sending us interrupts after that. We need to
2479 * disable the access here because if the device is marked disable, the
2480 * message won't be send. Also, in case of heartbeat, the device CPU is
2481 * marked as disable so this message won't be sent
2482 */
2483 hl_fw_send_pci_access_msg(hdev, opcode: CPUCP_PACKET_DISABLE_PCI_ACCESS, value: 0x0);
2484
2485 /* Mark device as disabled */
2486 hdev->disabled = true;
2487
2488 take_release_locks(hdev);
2489
2490 hdev->reset_info.hard_reset_pending = true;
2491
2492 hl_hwmon_fini(hdev);
2493
2494 cleanup_resources(hdev, hard_reset: true, fw_reset: false, skip_wq_flush: false);
2495
2496 /* Kill processes here after CS rollback. This is because the process
2497 * can't really exit until all its CSs are done, which is what we
2498 * do in cs rollback
2499 */
2500 dev_info(hdev->dev,
2501 "Waiting for all processes to exit (timeout of %u seconds)",
2502 HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI);
2503
2504 hdev->process_kill_trial_cnt = 0;
2505 rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, control_dev: false);
2506 if (rc) {
2507 dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc);
2508 device_disable_open_processes(hdev, control_dev: false);
2509 }
2510
2511 hdev->process_kill_trial_cnt = 0;
2512 rc = device_kill_open_processes(hdev, timeout: 0, control_dev: true);
2513 if (rc) {
2514 dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc);
2515 device_disable_open_processes(hdev, control_dev: true);
2516 }
2517
2518 hl_cb_pool_fini(hdev);
2519
2520 /* Reset the H/W. It will be in idle state after this returns */
2521 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
2522 if (rc)
2523 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);
2524
2525 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
2526
2527 /* Release kernel context */
2528 if ((hdev->kernel_ctx) && (hl_ctx_put(ctx: hdev->kernel_ctx) != 1))
2529 dev_err(hdev->dev, "kernel ctx is still alive\n");
2530
2531 hl_dec_fini(hdev);
2532
2533 hl_vm_fini(hdev);
2534
2535 hl_mmu_fini(hdev);
2536
2537 vfree(addr: hdev->captured_err_info.page_fault_info.user_mappings);
2538
2539 hl_eq_fini(hdev, q: &hdev->event_queue);
2540
2541 kfree(objp: hdev->shadow_cs_queue);
2542
2543 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2544 hl_cq_fini(hdev, q: &hdev->completion_queue[i]);
2545 kfree(objp: hdev->completion_queue);
2546
2547 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
2548 hdev->asic_prop.user_interrupt_count;
2549
2550 if (user_interrupt_cnt) {
2551 if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) {
2552 for (i = 0 ; i < user_interrupt_cnt ; i++)
2553 vfree(addr: hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool);
2554 }
2555
2556 kfree(objp: hdev->user_interrupt);
2557 }
2558
2559 vfree(addr: hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool);
2560
2561 hl_hw_queues_destroy(hdev);
2562
2563 /* Call ASIC S/W finalize function */
2564 hdev->asic_funcs->sw_fini(hdev);
2565
2566 device_early_fini(hdev);
2567
2568 /* Hide devices and sysfs/debugfs files from user */
2569 cdev_sysfs_debugfs_remove(hdev);
2570 drm_dev_unregister(dev: &hdev->drm);
2571
2572 hl_debugfs_device_fini(hdev);
2573
2574 pr_info("removed device successfully\n");
2575}
2576
2577/*
2578 * MMIO register access helper functions.
2579 */
2580
2581/*
2582 * hl_rreg - Read an MMIO register
2583 *
2584 * @hdev: pointer to habanalabs device structure
2585 * @reg: MMIO register offset (in bytes)
2586 *
2587 * Returns the value of the MMIO register we are asked to read
2588 *
2589 */
2590inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
2591{
2592 u32 val = readl(addr: hdev->rmmio + reg);
2593
2594 if (unlikely(trace_habanalabs_rreg32_enabled()))
2595 trace_habanalabs_rreg32(dev: hdev->dev, addr: reg, val);
2596
2597 return val;
2598}
2599
2600/*
2601 * hl_wreg - Write to an MMIO register
2602 *
2603 * @hdev: pointer to habanalabs device structure
2604 * @reg: MMIO register offset (in bytes)
2605 * @val: 32-bit value
2606 *
2607 * Writes the 32-bit value into the MMIO register
2608 *
2609 */
2610inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
2611{
2612 if (unlikely(trace_habanalabs_wreg32_enabled()))
2613 trace_habanalabs_wreg32(dev: hdev->dev, addr: reg, val);
2614
2615 writel(val, addr: hdev->rmmio + reg);
2616}
2617
2618void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
2619 u8 flags)
2620{
2621 struct razwi_info *razwi_info = &hdev->captured_err_info.razwi_info;
2622
2623 if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) {
2624 dev_err(hdev->dev,
2625 "Number of possible razwi initiators (%u) exceeded limit (%u)\n",
2626 num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR);
2627 return;
2628 }
2629
2630 /* In case it's the first razwi since the device was opened, capture its parameters */
2631 if (atomic_cmpxchg(v: &hdev->captured_err_info.razwi_info.razwi_detected, old: 0, new: 1))
2632 return;
2633
2634 razwi_info->razwi.timestamp = ktime_to_ns(kt: ktime_get());
2635 razwi_info->razwi.addr = addr;
2636 razwi_info->razwi.num_of_possible_engines = num_of_engines;
2637 memcpy(&razwi_info->razwi.engine_id[0], &engine_id[0],
2638 num_of_engines * sizeof(u16));
2639 razwi_info->razwi.flags = flags;
2640
2641 razwi_info->razwi_info_available = true;
2642}
2643
2644void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
2645 u8 flags, u64 *event_mask)
2646{
2647 hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags);
2648
2649 if (event_mask)
2650 *event_mask |= HL_NOTIFIER_EVENT_RAZWI;
2651}
2652
2653static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
2654{
2655 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info;
2656 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
2657 struct hl_vm_hash_node *hnode;
2658 struct hl_userptr *userptr;
2659 enum vm_type *vm_type;
2660 struct hl_ctx *ctx;
2661 u32 map_idx = 0;
2662 int i;
2663
2664 /* Reset previous session count*/
2665 pgf_info->num_of_user_mappings = 0;
2666
2667 ctx = hl_get_compute_ctx(hdev);
2668 if (!ctx) {
2669 dev_err(hdev->dev, "Can't get user context for user mappings\n");
2670 return;
2671 }
2672
2673 mutex_lock(&ctx->mem_hash_lock);
2674 hash_for_each(ctx->mem_hash, i, hnode, node) {
2675 vm_type = hnode->ptr;
2676 if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) ||
2677 ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu))
2678 pgf_info->num_of_user_mappings++;
2679
2680 }
2681
2682 if (!pgf_info->num_of_user_mappings)
2683 goto finish;
2684
2685 /* In case we already allocated in previous session, need to release it before
2686 * allocating new buffer.
2687 */
2688 vfree(addr: pgf_info->user_mappings);
2689 pgf_info->user_mappings =
2690 vzalloc(size: pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
2691 if (!pgf_info->user_mappings) {
2692 pgf_info->num_of_user_mappings = 0;
2693 goto finish;
2694 }
2695
2696 hash_for_each(ctx->mem_hash, i, hnode, node) {
2697 vm_type = hnode->ptr;
2698 if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) {
2699 userptr = hnode->ptr;
2700 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
2701 pgf_info->user_mappings[map_idx].size = userptr->size;
2702 map_idx++;
2703 } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) {
2704 phys_pg_pack = hnode->ptr;
2705 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
2706 pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size;
2707 map_idx++;
2708 }
2709 }
2710finish:
2711 mutex_unlock(lock: &ctx->mem_hash_lock);
2712 hl_ctx_put(ctx);
2713}
2714
2715void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu)
2716{
2717 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info;
2718
2719 /* Capture only the first page fault */
2720 if (atomic_cmpxchg(v: &pgf_info->page_fault_detected, old: 0, new: 1))
2721 return;
2722
2723 pgf_info->page_fault.timestamp = ktime_to_ns(kt: ktime_get());
2724 pgf_info->page_fault.addr = addr;
2725 pgf_info->page_fault.engine_id = eng_id;
2726 hl_capture_user_mappings(hdev, is_pmmu);
2727
2728 pgf_info->page_fault_info_available = true;
2729}
2730
2731void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
2732 u64 *event_mask)
2733{
2734 hl_capture_page_fault(hdev, addr, eng_id, is_pmmu);
2735
2736 if (event_mask)
2737 *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT;
2738}
2739
2740static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id)
2741{
2742 struct hw_err_info *info = &hdev->captured_err_info.hw_err;
2743
2744 /* Capture only the first HW err */
2745 if (atomic_cmpxchg(v: &info->event_detected, old: 0, new: 1))
2746 return;
2747
2748 info->event.timestamp = ktime_to_ns(kt: ktime_get());
2749 info->event.event_id = event_id;
2750
2751 info->event_info_available = true;
2752}
2753
2754void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask)
2755{
2756 hl_capture_hw_err(hdev, event_id);
2757
2758 if (event_mask)
2759 *event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR;
2760}
2761
2762static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info)
2763{
2764 struct fw_err_info *info = &hdev->captured_err_info.fw_err;
2765
2766 /* Capture only the first FW error */
2767 if (atomic_cmpxchg(v: &info->event_detected, old: 0, new: 1))
2768 return;
2769
2770 info->event.timestamp = ktime_to_ns(kt: ktime_get());
2771 info->event.err_type = fw_info->err_type;
2772 if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR)
2773 info->event.event_id = fw_info->event_id;
2774
2775 info->event_info_available = true;
2776}
2777
2778void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info)
2779{
2780 hl_capture_fw_err(hdev, fw_info: info);
2781
2782 if (info->event_mask)
2783 *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
2784}
2785
2786void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count)
2787{
2788 struct engine_err_info *info = &hdev->captured_err_info.engine_err;
2789
2790 /* Capture only the first engine error */
2791 if (atomic_cmpxchg(v: &info->event_detected, old: 0, new: 1))
2792 return;
2793
2794 info->event.timestamp = ktime_to_ns(kt: ktime_get());
2795 info->event.engine_id = engine_id;
2796 info->event.error_count = error_count;
2797 info->event_info_available = true;
2798}
2799
2800void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
2801{
2802 vfree(addr: captured_err_info->page_fault_info.user_mappings);
2803 memset(captured_err_info, 0, sizeof(struct hl_error_info));
2804 atomic_set(v: &captured_err_info->cs_timeout.write_enable, i: 1);
2805 captured_err_info->undef_opcode.write_enable = true;
2806}
2807
2808void hl_init_cpu_for_irq(struct hl_device *hdev)
2809{
2810#ifdef CONFIG_NUMA
2811 struct cpumask *available_mask = &hdev->irq_affinity_mask;
2812 int numa_node = hdev->pdev->dev.numa_node, i;
2813 static struct cpumask cpu_mask;
2814
2815 if (numa_node < 0)
2816 return;
2817
2818 if (!cpumask_and(dstp: &cpu_mask, src1p: cpumask_of_node(node: numa_node), cpu_online_mask)) {
2819 dev_err(hdev->dev, "No available affinities in current numa node\n");
2820 return;
2821 }
2822
2823 /* Remove HT siblings */
2824 for_each_cpu(i, &cpu_mask)
2825 cpumask_set_cpu(cpu: cpumask_first(topology_sibling_cpumask(i)), dstp: available_mask);
2826#endif
2827}
2828
2829void hl_set_irq_affinity(struct hl_device *hdev, int irq)
2830{
2831 if (cpumask_empty(srcp: &hdev->irq_affinity_mask)) {
2832 dev_dbg(hdev->dev, "affinity mask is empty\n");
2833 return;
2834 }
2835
2836 if (irq_set_affinity_and_hint(irq, m: &hdev->irq_affinity_mask))
2837 dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
2838}
2839

source code of linux/drivers/accel/habanalabs/common/device.c