1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * SN Platform GRU Driver |
4 | * |
5 | * FILE OPERATIONS & DRIVER INITIALIZATION |
6 | * |
7 | * This file supports the user system call for file open, close, mmap, etc. |
8 | * This also incudes the driver initialization code. |
9 | * |
10 | * (C) Copyright 2020 Hewlett Packard Enterprise Development LP |
11 | * Copyright (c) 2008-2014 Silicon Graphics, Inc. All Rights Reserved. |
12 | */ |
13 | |
14 | #include <linux/module.h> |
15 | #include <linux/kernel.h> |
16 | #include <linux/errno.h> |
17 | #include <linux/slab.h> |
18 | #include <linux/mm.h> |
19 | #include <linux/io.h> |
20 | #include <linux/spinlock.h> |
21 | #include <linux/device.h> |
22 | #include <linux/miscdevice.h> |
23 | #include <linux/interrupt.h> |
24 | #include <linux/proc_fs.h> |
25 | #include <linux/uaccess.h> |
26 | #ifdef CONFIG_X86_64 |
27 | #include <asm/uv/uv_irq.h> |
28 | #endif |
29 | #include <asm/uv/uv.h> |
30 | #include "gru.h" |
31 | #include "grulib.h" |
32 | #include "grutables.h" |
33 | |
34 | #include <asm/uv/uv_hub.h> |
35 | #include <asm/uv/uv_mmrs.h> |
36 | |
37 | struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; |
38 | unsigned long gru_start_paddr __read_mostly; |
39 | void *gru_start_vaddr __read_mostly; |
40 | unsigned long gru_end_paddr __read_mostly; |
41 | unsigned int gru_max_gids __read_mostly; |
42 | struct gru_stats_s gru_stats; |
43 | |
44 | /* Guaranteed user available resources on each node */ |
45 | static int max_user_cbrs, max_user_dsr_bytes; |
46 | |
47 | static struct miscdevice gru_miscdev; |
48 | |
49 | static int gru_supported(void) |
50 | { |
51 | return is_uv_system() && |
52 | (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE); |
53 | } |
54 | |
55 | /* |
56 | * gru_vma_close |
57 | * |
58 | * Called when unmapping a device mapping. Frees all gru resources |
59 | * and tables belonging to the vma. |
60 | */ |
61 | static void gru_vma_close(struct vm_area_struct *vma) |
62 | { |
63 | struct gru_vma_data *vdata; |
64 | struct gru_thread_state *gts; |
65 | struct list_head *entry, *next; |
66 | |
67 | if (!vma->vm_private_data) |
68 | return; |
69 | |
70 | vdata = vma->vm_private_data; |
71 | vma->vm_private_data = NULL; |
72 | gru_dbg(grudev, "vma %p, file %p, vdata %p\n" , vma, vma->vm_file, |
73 | vdata); |
74 | list_for_each_safe(entry, next, &vdata->vd_head) { |
75 | gts = |
76 | list_entry(entry, struct gru_thread_state, ts_next); |
77 | list_del(entry: >s->ts_next); |
78 | mutex_lock(>s->ts_ctxlock); |
79 | if (gts->ts_gru) |
80 | gru_unload_context(gts, savestate: 0); |
81 | mutex_unlock(lock: >s->ts_ctxlock); |
82 | gts_drop(gts); |
83 | } |
84 | kfree(objp: vdata); |
85 | STAT(vdata_free); |
86 | } |
87 | |
88 | /* |
89 | * gru_file_mmap |
90 | * |
91 | * Called when mmapping the device. Initializes the vma with a fault handler |
92 | * and private data structure necessary to allocate, track, and free the |
93 | * underlying pages. |
94 | */ |
95 | static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) |
96 | { |
97 | if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) |
98 | return -EPERM; |
99 | |
100 | if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || |
101 | vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) |
102 | return -EINVAL; |
103 | |
104 | vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_LOCKED | |
105 | VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP); |
106 | vma->vm_page_prot = PAGE_SHARED; |
107 | vma->vm_ops = &gru_vm_ops; |
108 | |
109 | vma->vm_private_data = gru_alloc_vma_data(vma, tsid: 0); |
110 | if (!vma->vm_private_data) |
111 | return -ENOMEM; |
112 | |
113 | gru_dbg(grudev, "file %p, vaddr 0x%lx, vma %p, vdata %p\n" , |
114 | file, vma->vm_start, vma, vma->vm_private_data); |
115 | return 0; |
116 | } |
117 | |
118 | /* |
119 | * Create a new GRU context |
120 | */ |
121 | static int gru_create_new_context(unsigned long arg) |
122 | { |
123 | struct gru_create_context_req req; |
124 | struct vm_area_struct *vma; |
125 | struct gru_vma_data *vdata; |
126 | int ret = -EINVAL; |
127 | |
128 | if (copy_from_user(to: &req, from: (void __user *)arg, n: sizeof(req))) |
129 | return -EFAULT; |
130 | |
131 | if (req.data_segment_bytes > max_user_dsr_bytes) |
132 | return -EINVAL; |
133 | if (req.control_blocks > max_user_cbrs || !req.maximum_thread_count) |
134 | return -EINVAL; |
135 | |
136 | if (!(req.options & GRU_OPT_MISS_MASK)) |
137 | req.options |= GRU_OPT_MISS_FMM_INTR; |
138 | |
139 | mmap_write_lock(current->mm); |
140 | vma = gru_find_vma(vaddr: req.gseg); |
141 | if (vma) { |
142 | vdata = vma->vm_private_data; |
143 | vdata->vd_user_options = req.options; |
144 | vdata->vd_dsr_au_count = |
145 | GRU_DS_BYTES_TO_AU(req.data_segment_bytes); |
146 | vdata->vd_cbr_au_count = GRU_CB_COUNT_TO_AU(req.control_blocks); |
147 | vdata->vd_tlb_preload_count = req.tlb_preload_count; |
148 | ret = 0; |
149 | } |
150 | mmap_write_unlock(current->mm); |
151 | |
152 | return ret; |
153 | } |
154 | |
155 | /* |
156 | * Get GRU configuration info (temp - for emulator testing) |
157 | */ |
158 | static long gru_get_config_info(unsigned long arg) |
159 | { |
160 | struct gru_config_info info; |
161 | int nodesperblade; |
162 | |
163 | if (num_online_nodes() > 1 && |
164 | (uv_node_to_blade_id(nid: 1) == uv_node_to_blade_id(nid: 0))) |
165 | nodesperblade = 2; |
166 | else |
167 | nodesperblade = 1; |
168 | memset(&info, 0, sizeof(info)); |
169 | info.cpus = num_online_cpus(); |
170 | info.nodes = num_online_nodes(); |
171 | info.blades = info.nodes / nodesperblade; |
172 | info.chiplets = GRU_CHIPLETS_PER_BLADE * info.blades; |
173 | |
174 | if (copy_to_user(to: (void __user *)arg, from: &info, n: sizeof(info))) |
175 | return -EFAULT; |
176 | return 0; |
177 | } |
178 | |
179 | /* |
180 | * gru_file_unlocked_ioctl |
181 | * |
182 | * Called to update file attributes via IOCTL calls. |
183 | */ |
184 | static long gru_file_unlocked_ioctl(struct file *file, unsigned int req, |
185 | unsigned long arg) |
186 | { |
187 | int err = -EBADRQC; |
188 | |
189 | gru_dbg(grudev, "file %p, req 0x%x, 0x%lx\n" , file, req, arg); |
190 | |
191 | switch (req) { |
192 | case GRU_CREATE_CONTEXT: |
193 | err = gru_create_new_context(arg); |
194 | break; |
195 | case GRU_SET_CONTEXT_OPTION: |
196 | err = gru_set_context_option(address: arg); |
197 | break; |
198 | case GRU_USER_GET_EXCEPTION_DETAIL: |
199 | err = gru_get_exception_detail(arg); |
200 | break; |
201 | case GRU_USER_UNLOAD_CONTEXT: |
202 | err = gru_user_unload_context(arg); |
203 | break; |
204 | case GRU_USER_FLUSH_TLB: |
205 | err = gru_user_flush_tlb(arg); |
206 | break; |
207 | case GRU_USER_CALL_OS: |
208 | err = gru_handle_user_call_os(address: arg); |
209 | break; |
210 | case GRU_GET_GSEG_STATISTICS: |
211 | err = gru_get_gseg_statistics(arg); |
212 | break; |
213 | case GRU_KTEST: |
214 | err = gru_ktest(arg); |
215 | break; |
216 | case GRU_GET_CONFIG_INFO: |
217 | err = gru_get_config_info(arg); |
218 | break; |
219 | case GRU_DUMP_CHIPLET_STATE: |
220 | err = gru_dump_chiplet_request(arg); |
221 | break; |
222 | } |
223 | return err; |
224 | } |
225 | |
226 | /* |
227 | * Called at init time to build tables for all GRUs that are present in the |
228 | * system. |
229 | */ |
230 | static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, |
231 | void *vaddr, int blade_id, int chiplet_id) |
232 | { |
233 | spin_lock_init(&gru->gs_lock); |
234 | spin_lock_init(&gru->gs_asid_lock); |
235 | gru->gs_gru_base_paddr = paddr; |
236 | gru->gs_gru_base_vaddr = vaddr; |
237 | gru->gs_gid = blade_id * GRU_CHIPLETS_PER_BLADE + chiplet_id; |
238 | gru->gs_blade = gru_base[blade_id]; |
239 | gru->gs_blade_id = blade_id; |
240 | gru->gs_chiplet_id = chiplet_id; |
241 | gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; |
242 | gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; |
243 | gru->gs_asid_limit = MAX_ASID; |
244 | gru_tgh_flush_init(gru); |
245 | if (gru->gs_gid >= gru_max_gids) |
246 | gru_max_gids = gru->gs_gid + 1; |
247 | gru_dbg(grudev, "bid %d, gid %d, vaddr %p (0x%lx)\n" , |
248 | blade_id, gru->gs_gid, gru->gs_gru_base_vaddr, |
249 | gru->gs_gru_base_paddr); |
250 | } |
251 | |
252 | static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) |
253 | { |
254 | int pnode, nid, bid, chip; |
255 | int cbrs, dsrbytes, n; |
256 | int order = get_order(size: sizeof(struct gru_blade_state)); |
257 | struct page *page; |
258 | struct gru_state *gru; |
259 | unsigned long paddr; |
260 | void *vaddr; |
261 | |
262 | max_user_cbrs = GRU_NUM_CB; |
263 | max_user_dsr_bytes = GRU_NUM_DSR_BYTES; |
264 | for_each_possible_blade(bid) { |
265 | pnode = uv_blade_to_pnode(bid); |
266 | nid = uv_blade_to_memory_nid(bid);/* -1 if no memory on blade */ |
267 | page = alloc_pages_node(nid, GFP_KERNEL, order); |
268 | if (!page) |
269 | goto fail; |
270 | gru_base[bid] = page_address(page); |
271 | memset(gru_base[bid], 0, sizeof(struct gru_blade_state)); |
272 | gru_base[bid]->bs_lru_gru = &gru_base[bid]->bs_grus[0]; |
273 | spin_lock_init(&gru_base[bid]->bs_lock); |
274 | init_rwsem(&gru_base[bid]->bs_kgts_sema); |
275 | |
276 | dsrbytes = 0; |
277 | cbrs = 0; |
278 | for (gru = gru_base[bid]->bs_grus, chip = 0; |
279 | chip < GRU_CHIPLETS_PER_BLADE; |
280 | chip++, gru++) { |
281 | paddr = gru_chiplet_paddr(paddr: gru_base_paddr, pnode, chiplet: chip); |
282 | vaddr = gru_chiplet_vaddr(vaddr: gru_base_vaddr, pnode, chiplet: chip); |
283 | gru_init_chiplet(gru, paddr, vaddr, blade_id: bid, chiplet_id: chip); |
284 | n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; |
285 | cbrs = max(cbrs, n); |
286 | n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; |
287 | dsrbytes = max(dsrbytes, n); |
288 | } |
289 | max_user_cbrs = min(max_user_cbrs, cbrs); |
290 | max_user_dsr_bytes = min(max_user_dsr_bytes, dsrbytes); |
291 | } |
292 | |
293 | return 0; |
294 | |
295 | fail: |
296 | for (bid--; bid >= 0; bid--) |
297 | free_pages(addr: (unsigned long)gru_base[bid], order); |
298 | return -ENOMEM; |
299 | } |
300 | |
301 | static void gru_free_tables(void) |
302 | { |
303 | int bid; |
304 | int order = get_order(size: sizeof(struct gru_state) * |
305 | GRU_CHIPLETS_PER_BLADE); |
306 | |
307 | for (bid = 0; bid < GRU_MAX_BLADES; bid++) |
308 | free_pages(addr: (unsigned long)gru_base[bid], order); |
309 | } |
310 | |
311 | static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) |
312 | { |
313 | unsigned long mmr = 0; |
314 | int core; |
315 | |
316 | /* |
317 | * We target the cores of a blade and not the hyperthreads themselves. |
318 | * There is a max of 8 cores per socket and 2 sockets per blade, |
319 | * making for a max total of 16 cores (i.e., 16 CPUs without |
320 | * hyperthreading and 32 CPUs with hyperthreading). |
321 | */ |
322 | core = uv_cpu_core_number(cpu) + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); |
323 | if (core >= GRU_NUM_TFM || uv_cpu_ht_number(cpu)) |
324 | return 0; |
325 | |
326 | if (chiplet == 0) { |
327 | mmr = UVH_GR0_TLB_INT0_CONFIG + |
328 | core * (UVH_GR0_TLB_INT1_CONFIG - UVH_GR0_TLB_INT0_CONFIG); |
329 | } else if (chiplet == 1) { |
330 | mmr = UVH_GR1_TLB_INT0_CONFIG + |
331 | core * (UVH_GR1_TLB_INT1_CONFIG - UVH_GR1_TLB_INT0_CONFIG); |
332 | } else { |
333 | BUG(); |
334 | } |
335 | |
336 | *corep = core; |
337 | return mmr; |
338 | } |
339 | |
340 | static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, |
341 | irq_handler_t irq_handler, int cpu, int blade) |
342 | { |
343 | unsigned long mmr; |
344 | int irq, core; |
345 | int ret; |
346 | |
347 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, corep: &core); |
348 | if (mmr == 0) |
349 | return 0; |
350 | |
351 | irq = uv_setup_irq(irq_name, cpu, blade, mmr, UV_AFFINITY_CPU); |
352 | if (irq < 0) { |
353 | printk(KERN_ERR "%s: uv_setup_irq failed, errno=%d\n" , |
354 | GRU_DRIVER_ID_STR, -irq); |
355 | return irq; |
356 | } |
357 | |
358 | ret = request_irq(irq, handler: irq_handler, flags: 0, name: irq_name, NULL); |
359 | if (ret) { |
360 | uv_teardown_irq(irq); |
361 | printk(KERN_ERR "%s: request_irq failed, errno=%d\n" , |
362 | GRU_DRIVER_ID_STR, -ret); |
363 | return ret; |
364 | } |
365 | gru_base[blade]->bs_grus[chiplet].gs_irq[core] = irq; |
366 | return 0; |
367 | } |
368 | |
369 | static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) |
370 | { |
371 | int irq, core; |
372 | unsigned long mmr; |
373 | |
374 | mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, corep: &core); |
375 | if (mmr) { |
376 | irq = gru_base[blade]->bs_grus[chiplet].gs_irq[core]; |
377 | if (irq) { |
378 | free_irq(irq, NULL); |
379 | uv_teardown_irq(irq); |
380 | } |
381 | } |
382 | } |
383 | |
384 | static void gru_teardown_tlb_irqs(void) |
385 | { |
386 | int blade; |
387 | int cpu; |
388 | |
389 | for_each_online_cpu(cpu) { |
390 | blade = uv_cpu_to_blade_id(cpu); |
391 | gru_chiplet_teardown_tlb_irq(chiplet: 0, cpu, blade); |
392 | gru_chiplet_teardown_tlb_irq(chiplet: 1, cpu, blade); |
393 | } |
394 | for_each_possible_blade(blade) { |
395 | if (uv_blade_nr_possible_cpus(bid: blade)) |
396 | continue; |
397 | gru_chiplet_teardown_tlb_irq(chiplet: 0, cpu: 0, blade); |
398 | gru_chiplet_teardown_tlb_irq(chiplet: 1, cpu: 0, blade); |
399 | } |
400 | } |
401 | |
402 | static int gru_setup_tlb_irqs(void) |
403 | { |
404 | int blade; |
405 | int cpu; |
406 | int ret; |
407 | |
408 | for_each_online_cpu(cpu) { |
409 | blade = uv_cpu_to_blade_id(cpu); |
410 | ret = gru_chiplet_setup_tlb_irq(chiplet: 0, irq_name: "GRU0_TLB" , irq_handler: gru0_intr, cpu, blade); |
411 | if (ret != 0) |
412 | goto exit1; |
413 | |
414 | ret = gru_chiplet_setup_tlb_irq(chiplet: 1, irq_name: "GRU1_TLB" , irq_handler: gru1_intr, cpu, blade); |
415 | if (ret != 0) |
416 | goto exit1; |
417 | } |
418 | for_each_possible_blade(blade) { |
419 | if (uv_blade_nr_possible_cpus(bid: blade)) |
420 | continue; |
421 | ret = gru_chiplet_setup_tlb_irq(chiplet: 0, irq_name: "GRU0_TLB" , irq_handler: gru_intr_mblade, cpu: 0, blade); |
422 | if (ret != 0) |
423 | goto exit1; |
424 | |
425 | ret = gru_chiplet_setup_tlb_irq(chiplet: 1, irq_name: "GRU1_TLB" , irq_handler: gru_intr_mblade, cpu: 0, blade); |
426 | if (ret != 0) |
427 | goto exit1; |
428 | } |
429 | |
430 | return 0; |
431 | |
432 | exit1: |
433 | gru_teardown_tlb_irqs(); |
434 | return ret; |
435 | } |
436 | |
437 | /* |
438 | * gru_init |
439 | * |
440 | * Called at boot or module load time to initialize the GRUs. |
441 | */ |
442 | static int __init gru_init(void) |
443 | { |
444 | int ret; |
445 | |
446 | if (!gru_supported()) |
447 | return 0; |
448 | |
449 | gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG) & |
450 | 0x7fffffffffffUL; |
451 | gru_start_vaddr = __va(gru_start_paddr); |
452 | gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; |
453 | printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n" , |
454 | gru_start_paddr, gru_end_paddr); |
455 | ret = misc_register(misc: &gru_miscdev); |
456 | if (ret) { |
457 | printk(KERN_ERR "%s: misc_register failed\n" , |
458 | GRU_DRIVER_ID_STR); |
459 | goto exit0; |
460 | } |
461 | |
462 | ret = gru_proc_init(); |
463 | if (ret) { |
464 | printk(KERN_ERR "%s: proc init failed\n" , GRU_DRIVER_ID_STR); |
465 | goto exit1; |
466 | } |
467 | |
468 | ret = gru_init_tables(gru_base_paddr: gru_start_paddr, gru_base_vaddr: gru_start_vaddr); |
469 | if (ret) { |
470 | printk(KERN_ERR "%s: init tables failed\n" , GRU_DRIVER_ID_STR); |
471 | goto exit2; |
472 | } |
473 | |
474 | ret = gru_setup_tlb_irqs(); |
475 | if (ret != 0) |
476 | goto exit3; |
477 | |
478 | gru_kservices_init(); |
479 | |
480 | printk(KERN_INFO "%s: v%s\n" , GRU_DRIVER_ID_STR, |
481 | GRU_DRIVER_VERSION_STR); |
482 | return 0; |
483 | |
484 | exit3: |
485 | gru_free_tables(); |
486 | exit2: |
487 | gru_proc_exit(); |
488 | exit1: |
489 | misc_deregister(misc: &gru_miscdev); |
490 | exit0: |
491 | return ret; |
492 | |
493 | } |
494 | |
495 | static void __exit gru_exit(void) |
496 | { |
497 | if (!gru_supported()) |
498 | return; |
499 | |
500 | gru_teardown_tlb_irqs(); |
501 | gru_kservices_exit(); |
502 | gru_free_tables(); |
503 | misc_deregister(misc: &gru_miscdev); |
504 | gru_proc_exit(); |
505 | mmu_notifier_synchronize(); |
506 | } |
507 | |
508 | static const struct file_operations gru_fops = { |
509 | .owner = THIS_MODULE, |
510 | .unlocked_ioctl = gru_file_unlocked_ioctl, |
511 | .mmap = gru_file_mmap, |
512 | .llseek = noop_llseek, |
513 | }; |
514 | |
515 | static struct miscdevice gru_miscdev = { |
516 | .minor = MISC_DYNAMIC_MINOR, |
517 | .name = "gru" , |
518 | .fops = &gru_fops, |
519 | }; |
520 | |
521 | const struct vm_operations_struct gru_vm_ops = { |
522 | .close = gru_vma_close, |
523 | .fault = gru_fault, |
524 | }; |
525 | |
526 | #ifndef MODULE |
527 | fs_initcall(gru_init); |
528 | #else |
529 | module_init(gru_init); |
530 | #endif |
531 | module_exit(gru_exit); |
532 | |
533 | module_param(gru_options, ulong, 0644); |
534 | MODULE_PARM_DESC(gru_options, "Various debug options" ); |
535 | |
536 | MODULE_AUTHOR("Silicon Graphics, Inc." ); |
537 | MODULE_LICENSE("GPL" ); |
538 | MODULE_DESCRIPTION(GRU_DRIVER_ID_STR GRU_DRIVER_VERSION_STR); |
539 | MODULE_VERSION(GRU_DRIVER_VERSION_STR); |
540 | |
541 | |